lib/Perl/Tidy/Formatter.pm

   1 #####################################################################
   2 #
   3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
   4 # line breaks to the token stream
   5 #
   6 #####################################################################
   7
   8 # Index...
   9 # CODE SECTION 1: Preliminary code, global definitions and sub new
  10 #                 sub new
  11 # CODE SECTION 2: Some Basic Utilities
  12 # CODE SECTION 3: Check and process options
  13 #                 sub check_options
  14 # CODE SECTION 4: Receive lines from the tokenizer
  15 #                 sub write_line
  16 # CODE SECTION 5: Pre-process the entire file
  17 #                 sub finish_formatting
  18 # CODE SECTION 6: Process line-by-line
  19 #                 sub process_all_lines
  20 # CODE SECTION 7: Process lines of code
  21 #                 process_line_of_CODE
  22 # CODE SECTION 8: Utilities for setting breakpoints
  23 #                 sub set_forced_breakpoint
  24 # CODE SECTION 9: Process batches of code
  25 #                 sub grind_batch_of_CODE
  26 # CODE SECTION 10: Code to break long statments
  27 #                  sub set_continuation_breaks
  28 # CODE SECTION 11: Code to break long lists
  29 #                  sub scan_list
  30 # CODE SECTION 12: Code for setting indentation
  31 # CODE SECTION 13: Preparing batches for vertical alignment
  32 #                  sub send_lines_to_vertical_aligner
  33 # CODE SECTION 14: Code for creating closing side comments
  34 #                  sub add_closing_side_comment
  35 # CODE SECTION 15: Summarize
  36 #                  sub wrapup
  37
  38 #######################################################################
  39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
  40 #######################################################################
  41
  42 package Perl::Tidy::Formatter;
  43 use strict;
  44 use warnings;
  45
  46 # this can be turned on for extra checking during development
  47 use constant DEVEL_MODE => 0;
  48
  49 { #<<< A non-indenting brace to contain all lexical variables
  50
  51 use Carp;
  52 our $VERSION = '20210717';
  53
  54 # The Tokenizer will be loaded with the Formatter
  55 ##use Perl::Tidy::Tokenizer;    # for is_keyword()
  56
  57 sub AUTOLOAD {
  58
  59     # Catch any undefined sub calls so that we are sure to get
  60     # some diagnostic information.  This sub should never be called
  61     # except for a programming error.
  62     our $AUTOLOAD;
  63     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  64     my ( $pkg, $fname, $lno ) = caller();
  65     my $my_package = __PACKAGE__;
  66     print STDERR <<EOM;
  67 ======================================================================
  68 Error detected in package '$my_package', version $VERSION
  69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  70 Called from package: '$pkg'
  71 Called from File '$fname'  at line '$lno'
  72 This error is probably due to a recent programming change
  73 ======================================================================
  74 EOM
  75     exit 1;
  76 }
  77
  78 sub DESTROY {
  79     my $self = shift;
  80     $self->_decrement_count();
  81     return;
  82 }
  83
  84 sub Die {
  85     my ($msg) = @_;
  86     Perl::Tidy::Die($msg);
  87     croak "unexpected return from Perl::Tidy::Die";
  88 }
  89
  90 sub Warn {
  91     my ($msg) = @_;
  92     Perl::Tidy::Warn($msg);
  93     return;
  94 }
  95
  96 sub Fault {
  97     my ($msg) = @_;
  98
  99     # This routine is called for errors that really should not occur
 100     # except if there has been a bug introduced by a recent program change.
 101     # Please add comments at calls to Fault to explain why the call
 102     # should not occur, and where to look to fix it.
 103     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
 104     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
 105     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
 106     my $input_stream_name = get_input_stream_name();
 107
 108     Die(<<EOM);
 109 ==============================================================================
 110 While operating on input stream with name: '$input_stream_name'
 111 A fault was detected at line $line0 of sub '$subroutine1'
 112 in file '$filename1'
 113 which was called from line $line1 of sub '$subroutine2'
 114 Message: '$msg'
 115 This is probably an error introduced by a recent programming change.
 116 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
 117 ==============================================================================
 118 EOM
 119
 120     # We shouldn't get here, but this return is to keep Perl-Critic from
 121     # complaining.
 122     return;
 123 }
 124
 125 sub Exit {
 126     my ($msg) = @_;
 127     Perl::Tidy::Exit($msg);
 128     croak "unexpected return from Perl::Tidy::Exit";
 129 }
 130
 131 # Global variables ...
 132 my (
 133
 134     ##################################################################
 135     # Section 1: Global variables which are either always constant or
 136     # are constant after being configured by user-supplied
 137     # parameters.  They remain constant as a file is being processed.
 138     ##################################################################
 139
 140     # user parameters and shortcuts
 141     $rOpts,
 142     $rOpts_closing_side_comment_maximum_text,
 143     $rOpts_continuation_indentation,
 144     $rOpts_indent_columns,
 145     $rOpts_line_up_parentheses,
 146     $rOpts_maximum_line_length,
 147     $rOpts_variable_maximum_line_length,
 148     $rOpts_block_brace_tightness,
 149     $rOpts_block_brace_vertical_tightness,
 150     $rOpts_stack_closing_block_brace,
 151     $rOpts_maximum_consecutive_blank_lines,
 152
 153     $rOpts_recombine,
 154     $rOpts_add_newlines,
 155     $rOpts_break_at_old_comma_breakpoints,
 156     $rOpts_ignore_old_breakpoints,
 157
 158     $rOpts_keep_interior_semicolons,
 159     $rOpts_comma_arrow_breakpoints,
 160     $rOpts_maximum_fields_per_table,
 161     $rOpts_one_line_block_semicolons,
 162     $rOpts_break_at_old_semicolon_breakpoints,
 163
 164     $rOpts_tee_side_comments,
 165     $rOpts_tee_block_comments,
 166     $rOpts_tee_pod,
 167     $rOpts_delete_side_comments,
 168     $rOpts_delete_closing_side_comments,
 169     $rOpts_format_skipping,
 170     $rOpts_indent_only,
 171     $rOpts_static_block_comments,
 172
 173     $rOpts_add_whitespace,
 174     $rOpts_delete_old_whitespace,
 175     $rOpts_freeze_whitespace,
 176     $rOpts_function_paren_vertical_alignment,
 177     $rOpts_whitespace_cycle,
 178     $rOpts_ignore_side_comment_lengths,
 179
 180     $rOpts_break_at_old_attribute_breakpoints,
 181     $rOpts_break_at_old_keyword_breakpoints,
 182     $rOpts_break_at_old_logical_breakpoints,
 183     $rOpts_break_at_old_ternary_breakpoints,
 184     $rOpts_short_concatenation_item_length,
 185     $rOpts_closing_side_comment_else_flag,
 186     $rOpts_fuzzy_line_length,
 187
 188     # Static hashes initialized in a BEGIN block
 189     %is_assignment,
 190     %is_keyword_returning_list,
 191     %is_if_unless_and_or_last_next_redo_return,
 192     %is_if_elsif_else_unless_while_until_for_foreach,
 193     %is_if_unless_while_until_for,
 194     %is_last_next_redo_return,
 195     %is_sort_map_grep,
 196     %is_sort_map_grep_eval,
 197     %is_if_unless,
 198     %is_and_or,
 199     %is_chain_operator,
 200     %is_block_without_semicolon,
 201     %ok_to_add_semicolon_for_block_type,
 202     %is_opening_type,
 203     %is_closing_type,
 204     %is_opening_token,
 205     %is_closing_token,
 206     %is_equal_or_fat_comma,
 207     %is_block_with_ci,
 208     %is_counted_type,
 209     %is_opening_sequence_token,
 210     %is_closing_sequence_token,
 211     %is_container_label_type,
 212
 213     @all_operators,
 214
 215     # Initialized in check_options. These are constants and could
 216     # just as well be initialized in a BEGIN block.
 217     %is_do_follower,
 218     %is_if_brace_follower,
 219     %is_else_brace_follower,
 220     %is_anon_sub_brace_follower,
 221     %is_anon_sub_1_brace_follower,
 222     %is_other_brace_follower,
 223
 224     # Initialized in sub initialize_whitespace_hashes;
 225     # Some can be modified according to user parameters.
 226     %binary_ws_rules,
 227     %want_left_space,
 228     %want_right_space,
 229
 230     # Configured in sub initialize_bond_strength_hashes
 231     %right_bond_strength,
 232     %left_bond_strength,
 233
 234     # Hashes for -kbb=s and -kba=s
 235     %keep_break_before_type,
 236     %keep_break_after_type,
 237
 238     # Initialized in check_options, modified by prepare_cuddled_block_types:
 239     %want_one_line_block,
 240     %is_braces_left_exclude_block,
 241
 242     # Initialized in sub prepare_cuddled_block_types
 243     $rcuddled_block_types,
 244
 245     # Initialized and configured in check_optioms
 246     %outdent_keyword,
 247     %keyword_paren_inner_tightness,
 248
 249     %want_break_before,
 250
 251     %break_before_container_types,
 252     %container_indentation_options,
 253
 254     %space_after_keyword,
 255
 256     %tightness,
 257     %matching_token,
 258
 259     %opening_vertical_tightness,
 260     %closing_vertical_tightness,
 261     %closing_token_indentation,
 262     $some_closing_token_indentation,
 263
 264     %opening_token_right,
 265     %stack_opening_token,
 266     %stack_closing_token,
 267
 268     %weld_nested_exclusion_rules,
 269     %line_up_parentheses_exclusion_rules,
 270
 271     # regex patterns for text identification.
 272     # Most are initialized in a sub make_**_pattern during configuration.
 273     # Most can be configured by user parameters.
 274     $SUB_PATTERN,
 275     $ASUB_PATTERN,
 276     $ANYSUB_PATTERN,
 277     $static_block_comment_pattern,
 278     $static_side_comment_pattern,
 279     $format_skipping_pattern_begin,
 280     $format_skipping_pattern_end,
 281     $non_indenting_brace_pattern,
 282     $bli_pattern,
 283     $block_brace_vertical_tightness_pattern,
 284     $blank_lines_after_opening_block_pattern,
 285     $blank_lines_before_closing_block_pattern,
 286     $keyword_group_list_pattern,
 287     $keyword_group_list_comment_pattern,
 288     $closing_side_comment_prefix_pattern,
 289     $closing_side_comment_list_pattern,
 290
 291     # Table to efficiently find indentation and max line length
 292     # from level.
 293     @maximum_line_length_at_level,
 294     @maximum_text_length_at_level,
 295
 296     # Total number of sequence items in a weld, for quick checks
 297     $total_weld_count,
 298
 299     #########################################################
 300     # Section 2: Work arrays for the current batch of tokens.
 301     #########################################################
 302
 303     # These are re-initialized for each batch of code
 304     # in sub initialize_batch_variables.
 305     $max_index_to_go,
 306     @block_type_to_go,
 307     @type_sequence_to_go,
 308     @bond_strength_to_go,
 309     @forced_breakpoint_to_go,
 310     @token_lengths_to_go,
 311     @summed_lengths_to_go,
 312     @levels_to_go,
 313     @leading_spaces_to_go,
 314     @reduced_spaces_to_go,
 315     @mate_index_to_go,
 316     @ci_levels_to_go,
 317     @nesting_depth_to_go,
 318     @nobreak_to_go,
 319     @old_breakpoint_to_go,
 320     @tokens_to_go,
 321     @K_to_go,
 322     @types_to_go,
 323     @inext_to_go,
 324     @iprev_to_go,
 325     @parent_seqno_to_go,
 326
 327 );
 328
 329 BEGIN {
 330
 331     # Initialize constants...
 332
 333     # Array index names for token variables
 334     my $i = 0;
 335     use constant {
 336         _BLOCK_TYPE_        => $i++,
 337         _CI_LEVEL_          => $i++,
 338         _CUMULATIVE_LENGTH_ => $i++,
 339         _LINE_INDEX_        => $i++,
 340         _KNEXT_SEQ_ITEM_    => $i++,
 341         _LEVEL_             => $i++,
 342         _SLEVEL_            => $i++,
 343         _TOKEN_             => $i++,
 344         _TOKEN_LENGTH_      => $i++,
 345         _TYPE_              => $i++,
 346         _TYPE_SEQUENCE_     => $i++,
 347
 348         # Number of token variables; must be last in list:
 349         _NVARS => $i++,
 350     };
 351
 352     # Array index names for $self (which is an array ref)
 353     $i = 0;
 354     use constant {
 355         _rlines_                    => $i++,
 356         _rlines_new_                => $i++,
 357         _rLL_                       => $i++,
 358         _Klimit_                    => $i++,
 359         _K_opening_container_       => $i++,
 360         _K_closing_container_       => $i++,
 361         _K_opening_ternary_         => $i++,
 362         _K_closing_ternary_         => $i++,
 363         _K_first_seq_item_          => $i++,
 364         _rK_phantom_semicolons_     => $i++,
 365         _rtype_count_by_seqno_      => $i++,
 366         _ris_function_call_paren_   => $i++,
 367         _rlec_count_by_seqno_       => $i++,
 368         _ris_broken_container_      => $i++,
 369         _ris_permanently_broken_    => $i++,
 370         _rhas_list_                 => $i++,
 371         _rhas_broken_list_          => $i++,
 372         _rhas_broken_list_with_lec_ => $i++,
 373         _rhas_code_block_           => $i++,
 374         _rhas_broken_code_block_    => $i++,
 375         _rhas_ternary_              => $i++,
 376         _ris_excluded_lp_container_ => $i++,
 377         _rwant_reduced_ci_          => $i++,
 378         _rno_xci_by_seqno_          => $i++,
 379         _ris_bli_container_         => $i++,
 380         _rparent_of_seqno_          => $i++,
 381         _rchildren_of_seqno_        => $i++,
 382         _ris_list_by_seqno_         => $i++,
 383         _rbreak_container_          => $i++,
 384         _rshort_nested_             => $i++,
 385         _length_function_           => $i++,
 386         _is_encoded_data_           => $i++,
 387         _fh_tee_                    => $i++,
 388         _sink_object_               => $i++,
 389         _file_writer_object_        => $i++,
 390         _vertical_aligner_object_   => $i++,
 391         _logger_object_             => $i++,
 392         _radjusted_levels_          => $i++,
 393         _this_batch_                => $i++,
 394
 395         _last_output_short_opening_token_ => $i++,
 396
 397         _last_line_leading_type_       => $i++,
 398         _last_line_leading_level_      => $i++,
 399         _last_last_line_leading_level_ => $i++,
 400
 401         _added_semicolon_count_    => $i++,
 402         _first_added_semicolon_at_ => $i++,
 403         _last_added_semicolon_at_  => $i++,
 404
 405         _deleted_semicolon_count_    => $i++,
 406         _first_deleted_semicolon_at_ => $i++,
 407         _last_deleted_semicolon_at_  => $i++,
 408
 409         _embedded_tab_count_    => $i++,
 410         _first_embedded_tab_at_ => $i++,
 411         _last_embedded_tab_at_  => $i++,
 412
 413         _first_tabbing_disagreement_       => $i++,
 414         _last_tabbing_disagreement_        => $i++,
 415         _tabbing_disagreement_count_       => $i++,
 416         _in_tabbing_disagreement_          => $i++,
 417         _first_brace_tabbing_disagreement_ => $i++,
 418         _in_brace_tabbing_disagreement_    => $i++,
 419
 420         _saw_VERSION_in_this_file_ => $i++,
 421         _saw_END_or_DATA_          => $i++,
 422
 423         _rK_weld_left_         => $i++,
 424         _rK_weld_right_        => $i++,
 425         _rweld_len_right_at_K_ => $i++,
 426
 427         _rspecial_side_comment_type_ => $i++,
 428
 429         _rseqno_controlling_my_ci_ => $i++,
 430         _ris_seqno_controlling_ci_ => $i++,
 431         _save_logfile_             => $i++,
 432         _maximum_level_            => $i++,
 433
 434         _rKrange_code_without_comments_ => $i++,
 435         _rbreak_before_Kfirst_          => $i++,
 436         _rbreak_after_Klast_            => $i++,
 437         _rwant_container_open_          => $i++,
 438         _converged_                     => $i++,
 439
 440         _rstarting_multiline_qw_seqno_by_K_ => $i++,
 441         _rending_multiline_qw_seqno_by_K_   => $i++,
 442         _rKrange_multiline_qw_by_seqno_     => $i++,
 443         _rmultiline_qw_has_extra_level_     => $i++,
 444         _rbreak_before_container_by_seqno_  => $i++,
 445         _ris_essential_old_breakpoint_      => $i++,
 446         _roverride_cab3_                    => $i++,
 447         _ris_assigned_structure_            => $i++,
 448     };
 449
 450     # Array index names for _this_batch_ (in above list)
 451     # So _this_batch_ is a sub-array of $self for
 452     # holding the batches of tokens being processed.
 453     $i = 0;
 454     use constant {
 455         _starting_in_quote_        => $i++,
 456         _ending_in_quote_          => $i++,
 457         _is_static_block_comment_  => $i++,
 458         _rlines_K_                 => $i++,
 459         _do_not_pad_               => $i++,
 460         _ibeg0_                    => $i++,
 461         _peak_batch_size_          => $i++,
 462         _max_index_to_go_          => $i++,
 463         _rK_to_go_                 => $i++,
 464         _batch_count_              => $i++,
 465         _rix_seqno_controlling_ci_ => $i++,
 466         _batch_CODE_type_          => $i++,
 467     };
 468
 469     # Sequence number assigned to the root of sequence tree.
 470     # The minimum of the actual sequences numbers is 4, so we can use 1
 471     use constant SEQ_ROOT => 1;
 472
 473     # Codes for insertion and deletion of blanks
 474     use constant DELETE => 0;
 475     use constant STABLE => 1;
 476     use constant INSERT => 2;
 477
 478     # whitespace codes
 479     use constant WS_YES      => 1;
 480     use constant WS_OPTIONAL => 0;
 481     use constant WS_NO       => -1;
 482
 483     # Token bond strengths.
 484     use constant NO_BREAK    => 10000;
 485     use constant VERY_STRONG => 100;
 486     use constant STRONG      => 2.1;
 487     use constant NOMINAL     => 1.1;
 488     use constant WEAK        => 0.8;
 489     use constant VERY_WEAK   => 0.55;
 490
 491     # values for testing indexes in output array
 492     use constant UNDEFINED_INDEX => -1;
 493
 494     # Maximum number of little messages; probably need not be changed.
 495     use constant MAX_NAG_MESSAGES => 6;
 496
 497     # increment between sequence numbers for each type
 498     # For example, ?: pairs might have numbers 7,11,15,...
 499     use constant TYPE_SEQUENCE_INCREMENT => 4;
 500
 501     # Initialize constant hashes ...
 502     my @q;
 503
 504     @q = qw(
 505       = **= += *= &= <<= &&=
 506       -= /= |= >>= ||= //=
 507       .= %= ^=
 508       x=
 509     );
 510     @is_assignment{@q} = (1) x scalar(@q);
 511
 512     @q = qw(
 513       grep
 514       keys
 515       map
 516       reverse
 517       sort
 518       split
 519     );
 520     @is_keyword_returning_list{@q} = (1) x scalar(@q);
 521
 522     @q = qw(is if unless and or err last next redo return);
 523     @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
 524
 525     # These block types may have text between the keyword and opening
 526     # curly.  Note: 'else' does not, but must be included to allow trailing
 527     # if/elsif text to be appended.
 528     # patch for SWITCH/CASE: added 'case' and 'when'
 529     @q = qw(if elsif else unless while until for foreach case when catch);
 530     @is_if_elsif_else_unless_while_until_for_foreach{@q} =
 531       (1) x scalar(@q);
 532
 533     @q = qw(if unless while until for);
 534     @is_if_unless_while_until_for{@q} =
 535       (1) x scalar(@q);
 536
 537     @q = qw(last next redo return);
 538     @is_last_next_redo_return{@q} = (1) x scalar(@q);
 539
 540     @q = qw(sort map grep);
 541     @is_sort_map_grep{@q} = (1) x scalar(@q);
 542
 543     @q = qw(sort map grep eval);
 544     @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
 545
 546     @q = qw(if unless);
 547     @is_if_unless{@q} = (1) x scalar(@q);
 548
 549     @q = qw(and or err);
 550     @is_and_or{@q} = (1) x scalar(@q);
 551
 552     # Identify certain operators which often occur in chains.
 553     # Note: the minus (-) causes a side effect of padding of the first line in
 554     # something like this (by sub set_logical_padding):
 555     #    Checkbutton => 'Transmission checked',
 556     #   -variable    => \$TRANS
 557     # This usually improves appearance so it seems ok.
 558     @q = qw(&& || and or : ? . + - * /);
 559     @is_chain_operator{@q} = (1) x scalar(@q);
 560
 561     # Operators that the user can request break before or after.
 562     # Note that some are keywords
 563     @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
 564       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
 565       . : ? && || and or err xor
 566     );
 567
 568     # We can remove semicolons after blocks preceded by these keywords
 569     @q =
 570       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
 571       unless while until for foreach given when default);
 572     @is_block_without_semicolon{@q} = (1) x scalar(@q);
 573
 574     # We will allow semicolons to be added within these block types
 575     # as well as sub and package blocks.
 576     # NOTES:
 577     # 1. Note that these keywords are omitted:
 578     #     switch case given when default sort map grep
 579     # 2. It is also ok to add for sub and package blocks and a labeled block
 580     # 3. But not okay for other perltidy types including:
 581     #     { } ; G t
 582     # 4. Test files: blktype.t, blktype1.t, semicolon.t
 583     @q =
 584       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
 585       unless do while until eval for foreach );
 586     @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
 587
 588     # 'L' is token for opening { at hash key
 589     @q = qw< L { ( [ >;
 590     @is_opening_type{@q} = (1) x scalar(@q);
 591
 592     # 'R' is token for closing } at hash key
 593     @q = qw< R } ) ] >;
 594     @is_closing_type{@q} = (1) x scalar(@q);
 595
 596     @q = qw< { ( [ >;
 597     @is_opening_token{@q} = (1) x scalar(@q);
 598
 599     @q = qw< } ) ] >;
 600     @is_closing_token{@q} = (1) x scalar(@q);
 601
 602     @q = qw< { ( [ ? >;
 603     @is_opening_sequence_token{@q} = (1) x scalar(@q);
 604
 605     @q = qw< } ) ] : >;
 606     @is_closing_sequence_token{@q} = (1) x scalar(@q);
 607
 608     # a hash needed by sub scan_list for labeling containers
 609     @q = qw( k => && || ? : . );
 610     @is_container_label_type{@q} = (1) x scalar(@q);
 611
 612     # Braces -bbht etc must follow these. Note: experimentation with
 613     # including a simple comma shows that it adds little and can lead
 614     # to poor formatting in complex lists.
 615     @q = qw( = => );
 616     @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
 617
 618     @q = qw( => ; h f );
 619     push @q, ',';
 620     @is_counted_type{@q} = (1) x scalar(@q);
 621
 622     # These block types can take ci.  This is used by the -xci option.
 623     # Note that the 'sub' in this list is an anonymous sub.  To be more correct
 624     # we could remove sub and use ASUB pattern to also handle a
 625     # prototype/signature.  But that would slow things down and would probably
 626     # never be useful.
 627     @q = qw( do sub eval sort map grep );
 628     @is_block_with_ci{@q} = (1) x scalar(@q);
 629
 630 }
 631
 632 {    ## begin closure to count instanes
 633
 634     # methods to count instances
 635     my $_count = 0;
 636     sub get_count        { return $_count; }
 637     sub _increment_count { return ++$_count }
 638     sub _decrement_count { return --$_count }
 639 } ## end closure to count instanes
 640
 641 sub new {
 642
 643     my ( $class, @args ) = @_;
 644
 645     # we are given an object with a write_line() method to take lines
 646     my %defaults = (
 647         sink_object        => undef,
 648         diagnostics_object => undef,
 649         logger_object      => undef,
 650         length_function    => sub { return length( $_[0] ) },
 651         is_encoded_data    => "",
 652         fh_tee             => undef,
 653     );
 654     my %args = ( %defaults, @args );
 655
 656     my $length_function    = $args{length_function};
 657     my $is_encoded_data    = $args{is_encoded_data};
 658     my $fh_tee             = $args{fh_tee};
 659     my $logger_object      = $args{logger_object};
 660     my $diagnostics_object = $args{diagnostics_object};
 661
 662     # we create another object with a get_line() and peek_ahead() method
 663     my $sink_object = $args{sink_object};
 664     my $file_writer_object =
 665       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
 666
 667     # initialize closure variables...
 668     set_logger_object($logger_object);
 669     set_diagnostics_object($diagnostics_object);
 670     initialize_gnu_vars();
 671     initialize_csc_vars();
 672     initialize_scan_list();
 673     initialize_saved_opening_indentation();
 674     initialize_undo_ci();
 675     initialize_process_line_of_CODE();
 676     initialize_grind_batch_of_CODE();
 677     initialize_adjusted_indentation();
 678     initialize_postponed_breakpoint();
 679     initialize_batch_variables();
 680     initialize_forced_breakpoint_vars();
 681     initialize_gnu_batch_vars();
 682     initialize_write_line();
 683
 684     my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
 685         rOpts              => $rOpts,
 686         file_writer_object => $file_writer_object,
 687         logger_object      => $logger_object,
 688         diagnostics_object => $diagnostics_object,
 689         length_function    => $length_function
 690     );
 691
 692     write_logfile_entry("\nStarting tokenization pass...\n");
 693
 694     if ( $rOpts->{'entab-leading-whitespace'} ) {
 695         write_logfile_entry(
 696 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
 697         );
 698     }
 699     elsif ( $rOpts->{'tabs'} ) {
 700         write_logfile_entry("Indentation will be with a tab character\n");
 701     }
 702     else {
 703         write_logfile_entry(
 704             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
 705     }
 706
 707     # Initialize the $self array reference.
 708     # To add an item, first add a constant index in the BEGIN block above.
 709     my $self = [];
 710
 711     # Basic data structures...
 712     $self->[_rlines_]     = [];    # = ref to array of lines of the file
 713     $self->[_rlines_new_] = [];    # = ref to array of output lines
 714
 715     # 'rLL' = reference to the liner array of all tokens in the file.
 716     # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
 717     # 'LL' stuck because it is easy to type.
 718     $self->[_rLL_]    = [];
 719     $self->[_Klimit_] = undef;    # = maximum K index for rLL.
 720
 721     # Arrays for quickly traversing the structures
 722     $self->[_K_opening_container_] = {};
 723     $self->[_K_closing_container_] = {};
 724     $self->[_K_opening_ternary_]   = {};
 725     $self->[_K_closing_ternary_]   = {};
 726     $self->[_K_first_seq_item_]    = undef; # K of first token with a sequence #
 727
 728     # Array of phantom semicolons, in case we ever need to undo them
 729     $self->[_rK_phantom_semicolons_] = undef;
 730
 731     # Mostly list characteristics and processing flags
 732     $self->[_rtype_count_by_seqno_]      = {};
 733     $self->[_ris_function_call_paren_]   = {};
 734     $self->[_rlec_count_by_seqno_]       = {};
 735     $self->[_ris_broken_container_]      = {};
 736     $self->[_ris_permanently_broken_]    = {};
 737     $self->[_rhas_list_]                 = {};
 738     $self->[_rhas_broken_list_]          = {};
 739     $self->[_rhas_broken_list_with_lec_] = {};
 740     $self->[_rhas_code_block_]           = {};
 741     $self->[_rhas_broken_code_block_]    = {};
 742     $self->[_rhas_ternary_]              = {};
 743     $self->[_ris_excluded_lp_container_] = {};
 744     $self->[_rwant_reduced_ci_]          = {};
 745     $self->[_rno_xci_by_seqno_]          = {};
 746     $self->[_ris_bli_container_]         = {};
 747     $self->[_rparent_of_seqno_]          = {};
 748     $self->[_rchildren_of_seqno_]        = {};
 749     $self->[_ris_list_by_seqno_]         = {};
 750
 751     $self->[_rbreak_container_] = {};                 # prevent one-line blocks
 752     $self->[_rshort_nested_]    = {};                 # blocks not forced open
 753     $self->[_length_function_]  = $length_function;
 754     $self->[_is_encoded_data_]  = $is_encoded_data;
 755
 756     # Some objects...
 757     $self->[_fh_tee_]                  = $fh_tee;
 758     $self->[_sink_object_]             = $sink_object;
 759     $self->[_file_writer_object_]      = $file_writer_object;
 760     $self->[_vertical_aligner_object_] = $vertical_aligner_object;
 761     $self->[_logger_object_]           = $logger_object;
 762
 763     # Reference to the batch being processed
 764     $self->[_this_batch_] = [];
 765
 766     # Memory of processed text...
 767     $self->[_last_last_line_leading_level_]    = 0;
 768     $self->[_last_line_leading_level_]         = 0;
 769     $self->[_last_line_leading_type_]          = '#';
 770     $self->[_last_output_short_opening_token_] = 0;
 771     $self->[_added_semicolon_count_]           = 0;
 772     $self->[_first_added_semicolon_at_]        = 0;
 773     $self->[_last_added_semicolon_at_]         = 0;
 774     $self->[_deleted_semicolon_count_]         = 0;
 775     $self->[_first_deleted_semicolon_at_]      = 0;
 776     $self->[_last_deleted_semicolon_at_]       = 0;
 777     $self->[_embedded_tab_count_]              = 0;
 778     $self->[_first_embedded_tab_at_]           = 0;
 779     $self->[_last_embedded_tab_at_]            = 0;
 780     $self->[_first_tabbing_disagreement_]      = 0;
 781     $self->[_last_tabbing_disagreement_]       = 0;
 782     $self->[_tabbing_disagreement_count_]      = 0;
 783     $self->[_in_tabbing_disagreement_]         = 0;
 784     $self->[_saw_VERSION_in_this_file_]        = !$rOpts->{'pass-version-line'};
 785     $self->[_saw_END_or_DATA_]                 = 0;
 786
 787     # Hashes related to container welding...
 788     $self->[_radjusted_levels_] = [];
 789
 790     # Weld data structures
 791     $self->[_rK_weld_left_]         = {};
 792     $self->[_rK_weld_right_]        = {};
 793     $self->[_rweld_len_right_at_K_] = {};
 794
 795     # -xci stuff
 796     $self->[_rseqno_controlling_my_ci_] = {};
 797     $self->[_ris_seqno_controlling_ci_] = {};
 798
 799     $self->[_rspecial_side_comment_type_] = {};
 800     $self->[_maximum_level_]              = 0;
 801
 802     $self->[_rKrange_code_without_comments_] = [];
 803     $self->[_rbreak_before_Kfirst_]          = {};
 804     $self->[_rbreak_after_Klast_]            = {};
 805     $self->[_rwant_container_open_]          = {};
 806     $self->[_converged_]                     = 0;
 807
 808     # qw stuff
 809     $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
 810     $self->[_rending_multiline_qw_seqno_by_K_]   = {};
 811     $self->[_rKrange_multiline_qw_by_seqno_]     = {};
 812     $self->[_rmultiline_qw_has_extra_level_]     = {};
 813
 814     $self->[_rbreak_before_container_by_seqno_] = {};
 815     $self->[_ris_essential_old_breakpoint_]     = {};
 816     $self->[_roverride_cab3_]                   = {};
 817     $self->[_ris_assigned_structure_]           = {};
 818
 819     # This flag will be updated later by a call to get_save_logfile()
 820     $self->[_save_logfile_] = defined($logger_object);
 821
 822     bless $self, $class;
 823
 824     # Safety check..this is not a class yet
 825     if ( _increment_count() > 1 ) {
 826         confess
 827 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
 828     }
 829     return $self;
 830 }
 831
 832 ######################################
 833 # CODE SECTION 2: Some Basic Utilities
 834 ######################################
 835
 836 {    ## begin closure for logger routines
 837     my $logger_object;
 838
 839     # Called once per file to initialize the logger object
 840     sub set_logger_object {
 841         $logger_object = shift;
 842         return;
 843     }
 844
 845     sub get_logger_object {
 846         return $logger_object;
 847     }
 848
 849     sub get_input_stream_name {
 850         my $input_stream_name = "";
 851         if ($logger_object) {
 852             $input_stream_name = $logger_object->get_input_stream_name();
 853         }
 854         return $input_stream_name;
 855     }
 856
 857     # interface to Perl::Tidy::Logger routines
 858     sub warning {
 859         my ($msg) = @_;
 860         if ($logger_object) { $logger_object->warning($msg); }
 861         return;
 862     }
 863
 864     sub complain {
 865         my ($msg) = @_;
 866         if ($logger_object) {
 867             $logger_object->complain($msg);
 868         }
 869         return;
 870     }
 871
 872     sub write_logfile_entry {
 873         my @msg = @_;
 874         if ($logger_object) {
 875             $logger_object->write_logfile_entry(@msg);
 876         }
 877         return;
 878     }
 879
 880     sub report_definite_bug {
 881         if ($logger_object) {
 882             $logger_object->report_definite_bug();
 883         }
 884         return;
 885     }
 886
 887     sub get_saw_brace_error {
 888         if ($logger_object) {
 889             return $logger_object->get_saw_brace_error();
 890         }
 891         return;
 892     }
 893
 894     sub we_are_at_the_last_line {
 895         if ($logger_object) {
 896             $logger_object->we_are_at_the_last_line();
 897         }
 898         return;
 899     }
 900
 901 } ## end closure for logger routines
 902
 903 {    ## begin closure for diagnostics routines
 904     my $diagnostics_object;
 905
 906     # Called once per file to initialize the diagnostics object
 907     sub set_diagnostics_object {
 908         $diagnostics_object = shift;
 909         return;
 910     }
 911
 912     sub write_diagnostics {
 913         my ($msg) = @_;
 914         if ($diagnostics_object) {
 915             $diagnostics_object->write_diagnostics($msg);
 916         }
 917         return;
 918     }
 919 } ## end closure for diagnostics routines
 920
 921 sub get_convergence_check {
 922     my ($self) = @_;
 923     return $self->[_converged_];
 924 }
 925
 926 sub get_added_semicolon_count {
 927     my $self = shift;
 928     return $self->[_added_semicolon_count_];
 929 }
 930
 931 sub get_output_line_number {
 932     my ($self) = @_;
 933     my $vao = $self->[_vertical_aligner_object_];
 934     return $vao->get_output_line_number();
 935 }
 936
 937 sub check_token_array {
 938     my $self = shift;
 939
 940     # Check for errors in the array of tokens. This is only called now
 941     # when the DEVEL_MODE flag is set, so this Fault will only occur
 942     # during code development.
 943     my $rLL = $self->[_rLL_];
 944     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
 945         my $nvars = @{ $rLL->[$KK] };
 946         if ( $nvars != _NVARS ) {
 947             my $NVARS = _NVARS;
 948             my $type  = $rLL->[$KK]->[_TYPE_];
 949             $type = '*' unless defined($type);
 950
 951             # The number of variables per token node is _NVARS and was set when
 952             # the array indexes were generated. So if the number of variables
 953             # is different we have done something wrong, like not store all of
 954             # them in sub 'write_line' when they were received from the
 955             # tokenizer.
 956             Fault(
 957 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
 958             );
 959         }
 960         foreach my $var ( _TOKEN_, _TYPE_ ) {
 961             if ( !defined( $rLL->[$KK]->[$var] ) ) {
 962                 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
 963
 964                 # This is a simple check that each token has some basic
 965                 # variables.  In other words, that there are no holes in the
 966                 # array of tokens.  Sub 'write_line' pushes tokens into the
 967                 # $rLL array, so this should guarantee no gaps.
 968                 Fault("Undefined variable $var for K=$KK, line=$iline\n");
 969             }
 970         }
 971     }
 972     return;
 973 }
 974
 975 sub want_blank_line {
 976     my $self = shift;
 977     $self->flush();
 978     my $file_writer_object = $self->[_file_writer_object_];
 979     $file_writer_object->want_blank_line();
 980     return;
 981 }
 982
 983 sub write_unindented_line {
 984     my ( $self, $line ) = @_;
 985     $self->flush();
 986     my $file_writer_object = $self->[_file_writer_object_];
 987     $file_writer_object->write_line($line);
 988     return;
 989 }
 990
 991 sub consecutive_nonblank_lines {
 992     my ($self)             = @_;
 993     my $file_writer_object = $self->[_file_writer_object_];
 994     my $vao                = $self->[_vertical_aligner_object_];
 995     return $file_writer_object->get_consecutive_nonblank_lines() +
 996       $vao->get_cached_line_count();
 997 }
 998
 999 sub trim {
1000
1001     # trim leading and trailing whitespace from a string
1002     my $str = shift;
1003     $str =~ s/\s+$//;
1004     $str =~ s/^\s+//;
1005     return $str;
1006 }
1007
1008 sub max {
1009     my (@vals) = @_;
1010     my $max = shift @vals;
1011     for (@vals) { $max = $_ > $max ? $_ : $max }
1012     return $max;
1013 }
1014
1015 sub min {
1016     my (@vals) = @_;
1017     my $min = shift @vals;
1018     for (@vals) { $min = $_ < $min ? $_ : $min }
1019     return $min;
1020 }
1021
1022 sub split_words {
1023
1024     # given a string containing words separated by whitespace,
1025     # return the list of words
1026     my ($str) = @_;
1027     return unless $str;
1028     $str =~ s/\s+$//;
1029     $str =~ s/^\s+//;
1030     return split( /\s+/, $str );
1031 }
1032
1033 ###########################################
1034 # CODE SECTION 3: Check and process options
1035 ###########################################
1036
1037 sub check_options {
1038
1039     # This routine is called to check the user-supplied run parameters
1040     # and to configure the control hashes to them.
1041     $rOpts = shift;
1042
1043     initialize_whitespace_hashes();
1044     initialize_bond_strength_hashes();
1045
1046     # Make needed regex patterns for matching text.
1047     # NOTE: sub_matching_patterns must be made first because later patterns use
1048     # them; see RT #133130.
1049     make_sub_matching_pattern();
1050     make_static_block_comment_pattern();
1051     make_static_side_comment_pattern();
1052     make_closing_side_comment_prefix();
1053     make_closing_side_comment_list_pattern();
1054     $format_skipping_pattern_begin =
1055       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1056     $format_skipping_pattern_end =
1057       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1058     make_non_indenting_brace_pattern();
1059
1060     # If closing side comments ARE selected, then we can safely
1061     # delete old closing side comments unless closing side comment
1062     # warnings are requested.  This is a good idea because it will
1063     # eliminate any old csc's which fall below the line count threshold.
1064     # We cannot do this if warnings are turned on, though, because we
1065     # might delete some text which has been added.  So that must
1066     # be handled when comments are created.  And we cannot do this
1067     # with -io because -csc will be skipped altogether.
1068     if ( $rOpts->{'closing-side-comments'} ) {
1069         if (   !$rOpts->{'closing-side-comment-warnings'}
1070             && !$rOpts->{'indent-only'} )
1071         {
1072             $rOpts->{'delete-closing-side-comments'} = 1;
1073         }
1074     }
1075
1076     # If closing side comments ARE NOT selected, but warnings ARE
1077     # selected and we ARE DELETING csc's, then we will pretend to be
1078     # adding with a huge interval.  This will force the comments to be
1079     # generated for comparison with the old comments, but not added.
1080     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1081         if ( $rOpts->{'delete-closing-side-comments'} ) {
1082             $rOpts->{'delete-closing-side-comments'}  = 0;
1083             $rOpts->{'closing-side-comments'}         = 1;
1084             $rOpts->{'closing-side-comment-interval'} = 100000000;
1085         }
1086     }
1087
1088     make_bli_pattern();
1089     make_block_brace_vertical_tightness_pattern();
1090     make_blank_line_pattern();
1091     make_keyword_group_list_pattern();
1092
1093     # Make initial list of desired one line block types
1094     # They will be modified by 'prepare_cuddled_block_types'
1095     %want_one_line_block = %is_sort_map_grep_eval;
1096
1097     # Default is to exclude one-line block types from -bl formatting
1098     # FIXME: Eventually a flag should be added to modify this.
1099     %is_braces_left_exclude_block = %is_sort_map_grep_eval;
1100
1101     prepare_cuddled_block_types();
1102     if ( $rOpts->{'dump-cuddled-block-list'} ) {
1103         dump_cuddled_block_list(*STDOUT);
1104         Exit(0);
1105     }
1106
1107     if ( $rOpts->{'line-up-parentheses'} ) {
1108
1109         if (   $rOpts->{'indent-only'}
1110             || !$rOpts->{'add-newlines'}
1111             || !$rOpts->{'delete-old-newlines'} )
1112         {
1113             Warn(<<EOM);
1114 -----------------------------------------------------------------------
1115 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1116
1117 The -lp indentation logic requires that perltidy be able to coordinate
1118 arbitrarily large numbers of line breakpoints.  This isn't possible
1119 with these flags.
1120 -----------------------------------------------------------------------
1121 EOM
1122             $rOpts->{'line-up-parentheses'} = 0;
1123         }
1124
1125         if ( $rOpts->{'whitespace-cycle'} ) {
1126             Warn(<<EOM);
1127 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1128 EOM
1129             $rOpts->{'whitespace-cycle'} = 0;
1130         }
1131     }
1132
1133     # At present, tabs are not compatible with the line-up-parentheses style
1134     # (it would be possible to entab the total leading whitespace
1135     # just prior to writing the line, if desired).
1136     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1137         Warn(<<EOM);
1138 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
1139 EOM
1140         $rOpts->{'tabs'} = 0;
1141     }
1142
1143     # Likewise, tabs are not compatible with outdenting..
1144     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1145         Warn(<<EOM);
1146 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1147 EOM
1148         $rOpts->{'tabs'} = 0;
1149     }
1150
1151     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1152         Warn(<<EOM);
1153 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
1154 EOM
1155         $rOpts->{'tabs'} = 0;
1156     }
1157
1158     if ( !$rOpts->{'space-for-semicolon'} ) {
1159         $want_left_space{'f'} = -1;
1160     }
1161
1162     if ( $rOpts->{'space-terminal-semicolon'} ) {
1163         $want_left_space{';'} = 1;
1164     }
1165
1166     # We should put an upper bound on any -sil=n value. Otherwise enormous
1167     # files could be created by mistake.
1168     for ( $rOpts->{'starting-indentation-level'} ) {
1169         if ( $_ && $_ > 100 ) {
1170             Warn(<<EOM);
1171 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1172 EOM
1173             $_ = 0;
1174         }
1175     }
1176
1177     # implement outdenting preferences for keywords
1178     %outdent_keyword = ();
1179     my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1180     unless (@okw) {
1181         @okw = qw(next last redo goto return);    # defaults
1182     }
1183
1184     # FUTURE: if not a keyword, assume that it is an identifier
1185     foreach (@okw) {
1186         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1187             $outdent_keyword{$_} = 1;
1188         }
1189         else {
1190             Warn("ignoring '$_' in -okwl list; not a perl keyword");
1191         }
1192     }
1193
1194     # setup hash for -kpit option
1195     %keyword_paren_inner_tightness = ();
1196     my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1197     if ( defined($kpit_value) && $kpit_value != 1 ) {
1198         my @kpit =
1199           split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1200         unless (@kpit) {
1201             @kpit = qw(if elsif unless while until for foreach);    # defaults
1202         }
1203
1204         # we will allow keywords and user-defined identifiers
1205         foreach (@kpit) {
1206             $keyword_paren_inner_tightness{$_} = $kpit_value;
1207         }
1208     }
1209
1210     # implement user whitespace preferences
1211     if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1212         @want_left_space{@q} = (1) x scalar(@q);
1213     }
1214
1215     if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1216         @want_right_space{@q} = (1) x scalar(@q);
1217     }
1218
1219     if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1220         @want_left_space{@q} = (-1) x scalar(@q);
1221     }
1222
1223     if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1224         @want_right_space{@q} = (-1) x scalar(@q);
1225     }
1226     if ( $rOpts->{'dump-want-left-space'} ) {
1227         dump_want_left_space(*STDOUT);
1228         Exit(0);
1229     }
1230
1231     if ( $rOpts->{'dump-want-right-space'} ) {
1232         dump_want_right_space(*STDOUT);
1233         Exit(0);
1234     }
1235
1236     # default keywords for which space is introduced before an opening paren
1237     # (at present, including them messes up vertical alignment)
1238     my @sak = qw(my local our and or xor err eq ne if else elsif until
1239       unless while for foreach return switch case given when catch);
1240     %space_after_keyword = map { $_ => 1 } @sak;
1241
1242     # first remove any or all of these if desired
1243     if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
1244
1245         # -nsak='*' selects all the above keywords
1246         if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
1247         @space_after_keyword{@q} = (0) x scalar(@q);
1248     }
1249
1250     # then allow user to add to these defaults
1251     if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
1252         @space_after_keyword{@q} = (1) x scalar(@q);
1253     }
1254
1255     # implement user break preferences
1256     my $break_after = sub {
1257         my @toks = @_;
1258         foreach my $tok (@toks) {
1259             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
1260             my $lbs = $left_bond_strength{$tok};
1261             my $rbs = $right_bond_strength{$tok};
1262             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
1263                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1264                   ( $lbs, $rbs );
1265             }
1266         }
1267     };
1268
1269     my $break_before = sub {
1270         my @toks = @_;
1271         foreach my $tok (@toks) {
1272             my $lbs = $left_bond_strength{$tok};
1273             my $rbs = $right_bond_strength{$tok};
1274             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
1275                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1276                   ( $lbs, $rbs );
1277             }
1278         }
1279     };
1280
1281     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
1282     $break_before->(@all_operators)
1283       if ( $rOpts->{'break-before-all-operators'} );
1284
1285     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
1286     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
1287
1288     # make note if breaks are before certain key types
1289     %want_break_before = ();
1290     foreach my $tok ( @all_operators, ',' ) {
1291         $want_break_before{$tok} =
1292           $left_bond_strength{$tok} < $right_bond_strength{$tok};
1293     }
1294
1295     # Coordinate ?/: breaks, which must be similar
1296     if ( !$want_break_before{':'} ) {
1297         $want_break_before{'?'}   = $want_break_before{':'};
1298         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
1299         $left_bond_strength{'?'}  = NO_BREAK;
1300     }
1301
1302     # Only make a hash entry for the next parameters if values are defined.
1303     # That allows a quick check to be made later.
1304     %break_before_container_types = ();
1305     for ( $rOpts->{'break-before-hash-brace'} ) {
1306         $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
1307     }
1308     for ( $rOpts->{'break-before-square-bracket'} ) {
1309         $break_before_container_types{'['} = $_ if $_ && $_ > 0;
1310     }
1311     for ( $rOpts->{'break-before-paren'} ) {
1312         $break_before_container_types{'('} = $_ if $_ && $_ > 0;
1313     }
1314
1315     %container_indentation_options = ();
1316     foreach my $pair (
1317         [ 'break-before-hash-brace-and-indent',     '{' ],
1318         [ 'break-before-square-bracket-and-indent', '[' ],
1319         [ 'break-before-paren-and-indent',          '(' ],
1320       )
1321     {
1322         my ( $key, $tok ) = @{$pair};
1323         my $opt = $rOpts->{$key};
1324         if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1325         {
1326
1327             # (1) -lp is not compatable with opt=2, silently set to opt=0
1328             # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1329             if ( $opt == 2 ) {
1330                 if (   $rOpts->{'line-up-parentheses'}
1331                     || $rOpts->{'indent-columns'} ==
1332                     $rOpts->{'continuation-indentation'} )
1333                 {
1334                     $opt = 0;
1335                 }
1336             }
1337             $container_indentation_options{$tok} = $opt;
1338         }
1339     }
1340
1341     # Define here tokens which may follow the closing brace of a do statement
1342     # on the same line, as in:
1343     #   } while ( $something);
1344     my @dof = qw(until while unless if ; : );
1345     push @dof, ',';
1346     @is_do_follower{@dof} = (1) x scalar(@dof);
1347
1348     # What tokens may follow the closing brace of an if or elsif block?
1349     # Not used. Previously used for cuddled else, but no longer needed.
1350     %is_if_brace_follower = ();
1351
1352     # nothing can follow the closing curly of an else { } block:
1353     %is_else_brace_follower = ();
1354
1355     # what can follow a multi-line anonymous sub definition closing curly:
1356     my @asf = qw# ; : => or and  && || ~~ !~~ ) #;
1357     push @asf, ',';
1358     @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
1359
1360     # what can follow a one-line anonymous sub closing curly:
1361     # one-line anonymous subs also have ']' here...
1362     # see tk3.t and PP.pm
1363     my @asf1 = qw#  ; : => or and  && || ) ] ~~ !~~ #;
1364     push @asf1, ',';
1365     @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
1366
1367     # What can follow a closing curly of a block
1368     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
1369     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
1370     my @obf = qw#  ; : => or and  && || ) #;
1371     push @obf, ',';
1372     @is_other_brace_follower{@obf} = (1) x scalar(@obf);
1373
1374     $right_bond_strength{'{'} = WEAK;
1375     $left_bond_strength{'{'}  = VERY_STRONG;
1376
1377     # make -l=0  equal to -l=infinite
1378     if ( !$rOpts->{'maximum-line-length'} ) {
1379         $rOpts->{'maximum-line-length'} = 1000000;
1380     }
1381
1382     # make -lbl=0  equal to -lbl=infinite
1383     if ( !$rOpts->{'long-block-line-count'} ) {
1384         $rOpts->{'long-block-line-count'} = 1000000;
1385     }
1386
1387     my $ole = $rOpts->{'output-line-ending'};
1388     if ($ole) {
1389         my %endings = (
1390             dos  => "\015\012",
1391             win  => "\015\012",
1392             mac  => "\015",
1393             unix => "\012",
1394         );
1395
1396         # Patch for RT #99514, a memoization issue.
1397         # Normally, the user enters one of 'dos', 'win', etc, and we change the
1398         # value in the options parameter to be the corresponding line ending
1399         # character.  But, if we are using memoization, on later passes through
1400         # here the option parameter will already have the desired ending
1401         # character rather than the keyword 'dos', 'win', etc.  So
1402         # we must check to see if conversion has already been done and, if so,
1403         # bypass the conversion step.
1404         my %endings_inverted = (
1405             "\015\012" => 'dos',
1406             "\015\012" => 'win',
1407             "\015"     => 'mac',
1408             "\012"     => 'unix',
1409         );
1410
1411         if ( defined( $endings_inverted{$ole} ) ) {
1412
1413             # we already have valid line ending, nothing more to do
1414         }
1415         else {
1416             $ole = lc $ole;
1417             unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
1418                 my $str = join " ", keys %endings;
1419                 Die(<<EOM);
1420 Unrecognized line ending '$ole'; expecting one of: $str
1421 EOM
1422             }
1423             if ( $rOpts->{'preserve-line-endings'} ) {
1424                 Warn("Ignoring -ple; conflicts with -ole\n");
1425                 $rOpts->{'preserve-line-endings'} = undef;
1426             }
1427         }
1428     }
1429
1430     # hashes used to simplify setting whitespace
1431     %tightness = (
1432         '{' => $rOpts->{'brace-tightness'},
1433         '}' => $rOpts->{'brace-tightness'},
1434         '(' => $rOpts->{'paren-tightness'},
1435         ')' => $rOpts->{'paren-tightness'},
1436         '[' => $rOpts->{'square-bracket-tightness'},
1437         ']' => $rOpts->{'square-bracket-tightness'},
1438     );
1439     %matching_token = (
1440         '{' => '}',
1441         '(' => ')',
1442         '[' => ']',
1443         '?' => ':',
1444     );
1445
1446     # note any requested old line breaks to keep
1447     %keep_break_before_type = ();
1448     %keep_break_after_type  = ();
1449     if ( !$rOpts->{'ignore-old-breakpoints'} ) {
1450
1451         # FIXME: could check for valid types here.
1452         # Invalid types are harmless but probably not intended.
1453         my @types;
1454         @types = ( split_words( $rOpts->{'keep-old-breakpoints-before'} ) );
1455         @keep_break_before_type{@types} = (1) x scalar(@types);
1456         @types = ( split_words( $rOpts->{'keep-old-breakpoints-after'} ) );
1457         @keep_break_after_type{@types} = (1) x scalar(@types);
1458     }
1459     else {
1460         if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1461             Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
1462             );
1463             $rOpts->{'break-at-old-method-breakpoints'} = 0;
1464         }
1465         if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1466             Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
1467             );
1468             $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1469         }
1470         if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1471             Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
1472             );
1473             $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1474         }
1475         if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1476             Warn("Conflicting parameters: -iob and -kbb; -kbb will be ignored\n"
1477             );
1478             $rOpts->{'keep-old-breakpoints-before'} = "";
1479         }
1480         if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1481             Warn("Conflicting parameters: -iob and -kba; -kba will be ignored\n"
1482             );
1483             $rOpts->{'keep-old-breakpoints-after'} = "";
1484         }
1485
1486         # Note: These additional parameters are made inactive by -iob.
1487         # They are silently turned off here because they are on by default.
1488         # We would generate unexpected warnings if we issued a warning.
1489         $rOpts->{'break-at-old-keyword-breakpoints'}   = 0;
1490         $rOpts->{'break-at-old-logical-breakpoints'}   = 0;
1491         $rOpts->{'break-at-old-ternary-breakpoints'}   = 0;
1492         $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1493     }
1494
1495     #############################################################
1496     # Make global vars for frequently used options for efficiency
1497     #############################################################
1498
1499     $rOpts_closing_side_comment_maximum_text =
1500       $rOpts->{'closing-side-comment-maximum-text'};
1501     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
1502     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
1503     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
1504     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
1505     $rOpts_variable_maximum_line_length =
1506       $rOpts->{'variable-maximum-line-length'};
1507     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
1508     $rOpts_block_brace_vertical_tightness =
1509       $rOpts->{'block-brace-vertical-tightness'};
1510     $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
1511     $rOpts_maximum_consecutive_blank_lines =
1512       $rOpts->{'maximum-consecutive-blank-lines'};
1513     $rOpts_recombine    = $rOpts->{'recombine'};
1514     $rOpts_add_newlines = $rOpts->{'add-newlines'};
1515     $rOpts_break_at_old_comma_breakpoints =
1516       $rOpts->{'break-at-old-comma-breakpoints'};
1517     $rOpts_ignore_old_breakpoints    = $rOpts->{'ignore-old-breakpoints'};
1518     $rOpts_keep_interior_semicolons  = $rOpts->{'keep-interior-semicolons'};
1519     $rOpts_comma_arrow_breakpoints   = $rOpts->{'comma-arrow-breakpoints'};
1520     $rOpts_maximum_fields_per_table  = $rOpts->{'maximum-fields-per-table'};
1521     $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
1522     $rOpts_break_at_old_semicolon_breakpoints =
1523       $rOpts->{'break-at-old-semicolon-breakpoints'};
1524
1525     $rOpts_tee_side_comments    = $rOpts->{'tee-side-comments'};
1526     $rOpts_tee_block_comments   = $rOpts->{'tee-block-comments'};
1527     $rOpts_tee_pod              = $rOpts->{'tee-pod'};
1528     $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
1529     $rOpts_delete_closing_side_comments =
1530       $rOpts->{'delete-closing-side-comments'};
1531     $rOpts_format_skipping       = $rOpts->{'format-skipping'};
1532     $rOpts_indent_only           = $rOpts->{'indent-only'};
1533     $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
1534
1535     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
1536     $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
1537     $rOpts_freeze_whitespace     = $rOpts->{'freeze-whitespace'};
1538
1539     $rOpts_function_paren_vertical_alignment =
1540       $rOpts->{'function-paren-vertical-alignment'};
1541     $rOpts_ignore_side_comment_lengths =
1542       $rOpts->{'ignore-side-comment-lengths'};
1543
1544     $rOpts_break_at_old_attribute_breakpoints =
1545       $rOpts->{'break-at-old-attribute-breakpoints'};
1546     $rOpts_break_at_old_keyword_breakpoints =
1547       $rOpts->{'break-at-old-keyword-breakpoints'};
1548     $rOpts_break_at_old_logical_breakpoints =
1549       $rOpts->{'break-at-old-logical-breakpoints'};
1550     $rOpts_break_at_old_ternary_breakpoints =
1551       $rOpts->{'break-at-old-ternary-breakpoints'};
1552     $rOpts_short_concatenation_item_length =
1553       $rOpts->{'short-concatenation-item-length'};
1554     $rOpts_closing_side_comment_else_flag =
1555       $rOpts->{'closing-side-comment-else-flag'};
1556     $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
1557
1558     # Note that both opening and closing tokens can access the opening
1559     # and closing flags of their container types.
1560     %opening_vertical_tightness = (
1561         '(' => $rOpts->{'paren-vertical-tightness'},
1562         '{' => $rOpts->{'brace-vertical-tightness'},
1563         '[' => $rOpts->{'square-bracket-vertical-tightness'},
1564         ')' => $rOpts->{'paren-vertical-tightness'},
1565         '}' => $rOpts->{'brace-vertical-tightness'},
1566         ']' => $rOpts->{'square-bracket-vertical-tightness'},
1567     );
1568
1569     %closing_vertical_tightness = (
1570         '(' => $rOpts->{'paren-vertical-tightness-closing'},
1571         '{' => $rOpts->{'brace-vertical-tightness-closing'},
1572         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1573         ')' => $rOpts->{'paren-vertical-tightness-closing'},
1574         '}' => $rOpts->{'brace-vertical-tightness-closing'},
1575         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1576     );
1577
1578     # assume flag for '>' same as ')' for closing qw quotes
1579     %closing_token_indentation = (
1580         ')' => $rOpts->{'closing-paren-indentation'},
1581         '}' => $rOpts->{'closing-brace-indentation'},
1582         ']' => $rOpts->{'closing-square-bracket-indentation'},
1583         '>' => $rOpts->{'closing-paren-indentation'},
1584     );
1585
1586     # flag indicating if any closing tokens are indented
1587     $some_closing_token_indentation =
1588          $rOpts->{'closing-paren-indentation'}
1589       || $rOpts->{'closing-brace-indentation'}
1590       || $rOpts->{'closing-square-bracket-indentation'}
1591       || $rOpts->{'indent-closing-brace'};
1592
1593     %opening_token_right = (
1594         '(' => $rOpts->{'opening-paren-right'},
1595         '{' => $rOpts->{'opening-hash-brace-right'},
1596         '[' => $rOpts->{'opening-square-bracket-right'},
1597     );
1598
1599     %stack_opening_token = (
1600         '(' => $rOpts->{'stack-opening-paren'},
1601         '{' => $rOpts->{'stack-opening-hash-brace'},
1602         '[' => $rOpts->{'stack-opening-square-bracket'},
1603     );
1604
1605     %stack_closing_token = (
1606         ')' => $rOpts->{'stack-closing-paren'},
1607         '}' => $rOpts->{'stack-closing-hash-brace'},
1608         ']' => $rOpts->{'stack-closing-square-bracket'},
1609     );
1610
1611     # Create a table of maximum line length vs level for later efficient use.
1612     # We will make the tables very long to be sure it will not be exceeded.
1613     # But we have to choose a fixed length.  A check will be made at the start
1614     # of sub 'finish_formatting' to be sure it is not exceeded.  Note, some of
1615     # my standard test problems have indentation levels of about 150, so this
1616     # should be fairly large.  If the choice of a maximum level ever becomes
1617     # an issue then these table values could be returned in a sub with a simple
1618     # memoization scheme.
1619
1620     # Also create a table of the maximum spaces available for text due to the
1621     # level only.  If a line has continuation indentation, then that space must
1622     # be subtracted from the table value.  This table is used for preliminary
1623     # estimates in welding, extended_ci, BBX, and marking short blocks.
1624     my $level_max = 1000;
1625
1626     # The basic scheme:
1627     foreach my $level ( 0 .. $level_max ) {
1628         my $indent = $level * $rOpts_indent_columns;
1629         $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
1630         $maximum_text_length_at_level[$level] =
1631           $rOpts_maximum_line_length - $indent;
1632     }
1633
1634     # Correct the maximum_text_length table if the -wc=n flag is used
1635     $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
1636     if ($rOpts_whitespace_cycle) {
1637         if ( $rOpts_whitespace_cycle > 0 ) {
1638             foreach my $level ( 0 .. $level_max ) {
1639                 my $level_mod = $level % $rOpts_whitespace_cycle;
1640                 my $indent    = $level_mod * $rOpts_indent_columns;
1641                 $maximum_text_length_at_level[$level] =
1642                   $rOpts_maximum_line_length - $indent;
1643             }
1644         }
1645         else {
1646             $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
1647         }
1648     }
1649
1650     # Correct the tables if the -vmll flag is used.  These values override the
1651     # previous values.
1652     if ($rOpts_variable_maximum_line_length) {
1653         foreach my $level ( 0 .. $level_max ) {
1654             $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
1655             $maximum_line_length_at_level[$level] =
1656               $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
1657         }
1658     }
1659
1660     initialize_weld_nested_exclusion_rules($rOpts);
1661     initialize_line_up_parentheses_exclusion_rules($rOpts);
1662     return;
1663 }
1664
1665 sub initialize_weld_nested_exclusion_rules {
1666     my ($rOpts) = @_;
1667     %weld_nested_exclusion_rules = ();
1668
1669     my $opt_name = 'weld-nested-exclusion-list';
1670     my $str      = $rOpts->{$opt_name};
1671     return unless ($str);
1672     $str =~ s/^\s+//;
1673     $str =~ s/\s+$//;
1674     return unless ($str);
1675
1676     # There are four container tokens.
1677     my %token_keys = (
1678         '(' => '(',
1679         '[' => '[',
1680         '{' => '{',
1681         'q' => 'q',
1682     );
1683
1684     # We are parsing an exclusion list for nested welds. The list is a string
1685     # with spaces separating any number of items.  Each item consists of three
1686     # pieces of information:
1687     # <optional position> <optional type> <type of container>
1688     # <     ^ or .      > <    k or K   > <     ( [ {       >
1689
1690     # The last character is the required container type and must be one of:
1691     # ( = paren
1692     # [ = square bracket
1693     # { = brace
1694
1695     # An optional leading position indicator:
1696     # ^ means the leading token position in the weld
1697     # . means a secondary token position in the weld
1698     #   no position indicator means all positions match
1699
1700     # An optional alphanumeric character between the position and container
1701     # token selects to which the rule applies:
1702     # k = any keyword
1703     # K = any non-keyword
1704     # f = function call
1705     # F = not a function call
1706     # w = function or keyword
1707     # W = not a function or keyword
1708     #     no letter means any preceding type matches
1709
1710     # Examples:
1711     # ^(  - the weld must not start with a paren
1712     # .(  - the second and later tokens may not be parens
1713     # (   - no parens in weld
1714     # ^K(  - exclude a leading paren not preceded by a keyword
1715     # .k(  - exclude a secondary paren preceded by a keyword
1716     # [ {  - exclude all brackets and braces
1717
1718     my @items = split /\s+/, $str;
1719     my $msg1;
1720     my $msg2;
1721     foreach my $item (@items) {
1722         my $item_save = $item;
1723         my $tok       = chop($item);
1724         my $key       = $token_keys{$tok};
1725         if ( !defined($key) ) {
1726             $msg1 .= " '$item_save'";
1727             next;
1728         }
1729         if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
1730             $weld_nested_exclusion_rules{$key} = [];
1731         }
1732         my $rflags = $weld_nested_exclusion_rules{$key};
1733
1734         # A 'q' means do not weld quotes
1735         if ( $tok eq 'q' ) {
1736             $rflags->[0] = '*';
1737             $rflags->[1] = '*';
1738             next;
1739         }
1740
1741         my $pos    = '*';
1742         my $select = '*';
1743         if ($item) {
1744             if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
1745                 $pos    = $1 if ($1);
1746                 $select = $2 if ($2);
1747             }
1748             else {
1749                 $msg1 .= " '$item_save'";
1750                 next;
1751             }
1752         }
1753
1754         my $err;
1755         if ( $pos eq '^' || $pos eq '*' ) {
1756             if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
1757                 $err = 1;
1758             }
1759             $rflags->[0] = $select;
1760         }
1761         if ( $pos eq '.' || $pos eq '*' ) {
1762             if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
1763                 $err = 1;
1764             }
1765             $rflags->[1] = $select;
1766         }
1767         if ($err) { $msg2 .= " '$item_save'"; }
1768     }
1769     if ($msg1) {
1770         Warn(<<EOM);
1771 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
1772 $msg1
1773 EOM
1774     }
1775     if ($msg2) {
1776         Warn(<<EOM);
1777 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
1778 $msg2
1779 Only the last will be used.
1780 EOM
1781     }
1782     return;
1783 }
1784
1785 sub initialize_line_up_parentheses_exclusion_rules {
1786     my ($rOpts) = @_;
1787     %line_up_parentheses_exclusion_rules = ();
1788     my $opt_name = 'line-up-parentheses-exclusion-list';
1789     my $str      = $rOpts->{$opt_name};
1790     return unless ($str);
1791     $str =~ s/^\s+//;
1792     $str =~ s/\s+$//;
1793     return unless ($str);
1794
1795     # The format is space separated items, where each item must consist of a
1796     # string with a token type preceded by an optional text token and followed
1797     # by an integer:
1798     # For example:
1799     #    W(1
1800     #  = (flag1)(key)(flag2), where
1801     #    flag1 = 'W'
1802     #    key = '('
1803     #    flag2 = '1'
1804
1805     my @items = split /\s+/, $str;
1806     my $msg1;
1807     my $msg2;
1808     foreach my $item (@items) {
1809         my $item_save = $item;
1810         my ( $flag1, $key, $flag2 );
1811         if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
1812             $flag1 = $1 if $1;
1813             $key   = $2 if $2;
1814             $flag2 = $3 if $3;
1815         }
1816         else {
1817             $msg1 .= " '$item_save'";
1818             next;
1819         }
1820
1821         if ( !defined($key) ) {
1822             $msg1 .= " '$item_save'";
1823             next;
1824         }
1825
1826         # Check for valid flag1
1827         if    ( !defined($flag1) ) { $flag1 = '*' }
1828         elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
1829             $msg1 .= " '$item_save'";
1830             next;
1831         }
1832
1833         # Check for valid flag2
1834         # 0 or blank: ignore container contents
1835         # 1 all containers with sublists match
1836         # 2 all containers with sublists, code blocks or ternary operators match
1837         # ... this could be extended in the future
1838         if    ( !defined($flag2) ) { $flag2 = 0 }
1839         elsif ( $flag2 !~ /^[012]$/ ) {
1840             $msg1 .= " '$item_save'";
1841             next;
1842         }
1843
1844         if ( !defined( $line_up_parentheses_exclusion_rules{$key} ) ) {
1845             $line_up_parentheses_exclusion_rules{$key} = [ $flag1, $flag2 ];
1846             next;
1847         }
1848
1849         # check for multiple conflicting specifications
1850         my $rflags = $line_up_parentheses_exclusion_rules{$key};
1851         my $err;
1852         if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
1853             $err = 1;
1854             $rflags->[0] = $flag1;
1855         }
1856         if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
1857             $err = 1;
1858             $rflags->[1] = $flag2;
1859         }
1860         $msg2 .= " '$item_save'" if ($err);
1861         next;
1862     }
1863     if ($msg1) {
1864         Warn(<<EOM);
1865 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
1866 $msg1
1867 EOM
1868     }
1869     if ($msg2) {
1870         Warn(<<EOM);
1871 Multiple specifications were encountered in the $opt_name at:
1872 $msg2
1873 Only the last will be used.
1874 EOM
1875     }
1876
1877     # Possible speedup: we could turn off -lp if it is not actually used
1878     my $all_off = 1;
1879     foreach my $key (qw# ( { [ #) {
1880         my $rflags = $line_up_parentheses_exclusion_rules{$key};
1881         if ( defined($rflags) ) {
1882             my ( $flag1, $flag2 ) = @{$rflags};
1883             if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
1884             if ($flag2)                    { $all_off = 0; last }
1885         }
1886     }
1887     if ($all_off) {
1888
1889         # FIXME: This speedup works but is currently deactivated because at
1890         # present users of -lp could see some discontinuities in formatting,
1891         # such as those involving the choice of breaks at '='.  Only if/when
1892         # these issues have been checked and resolved it should be reactivated
1893         # as a speedup.
1894         ## $rOpts->{'line-up-parentheses'} = "";
1895     }
1896
1897     return;
1898 }
1899
1900 sub initialize_whitespace_hashes {
1901
1902     # This is called once before formatting begins to initialize these global
1903     # hashes, which control the use of whitespace around tokens:
1904     #
1905     # %binary_ws_rules
1906     # %want_left_space
1907     # %want_right_space
1908     # %space_after_keyword
1909     #
1910     # Many token types are identical to the tokens themselves.
1911     # See the tokenizer for a complete list. Here are some special types:
1912     #   k = perl keyword
1913     #   f = semicolon in for statement
1914     #   m = unary minus
1915     #   p = unary plus
1916     # Note that :: is excluded since it should be contained in an identifier
1917     # Note that '->' is excluded because it never gets space
1918     # parentheses and brackets are excluded since they are handled specially
1919     # curly braces are included but may be overridden by logic, such as
1920     # newline logic.
1921
1922     # NEW_TOKENS: create a whitespace rule here.  This can be as
1923     # simple as adding your new letter to @spaces_both_sides, for
1924     # example.
1925
1926     my @opening_type = qw< L { ( [ >;
1927     @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
1928
1929     my @closing_type = qw< R } ) ] >;
1930     @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
1931
1932     my @spaces_both_sides = qw#
1933       + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
1934       .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
1935       &&= ||= //= <=> A k f w F n C Y U G v
1936       #;
1937
1938     my @spaces_left_side = qw<
1939       t ! ~ m p { \ h pp mm Z j
1940     >;
1941     push( @spaces_left_side, '#' );    # avoids warning message
1942
1943     my @spaces_right_side = qw<
1944       ; } ) ] R J ++ -- **=
1945     >;
1946     push( @spaces_right_side, ',' );    # avoids warning message
1947
1948     %want_left_space  = ();
1949     %want_right_space = ();
1950     %binary_ws_rules  = ();
1951
1952     # Note that we setting defaults here.  Later in processing
1953     # the values of %want_left_space and  %want_right_space
1954     # may be overridden by any user settings specified by the
1955     # -wls and -wrs parameters.  However the binary_whitespace_rules
1956     # are hardwired and have priority.
1957     @want_left_space{@spaces_both_sides} =
1958       (1) x scalar(@spaces_both_sides);
1959     @want_right_space{@spaces_both_sides} =
1960       (1) x scalar(@spaces_both_sides);
1961     @want_left_space{@spaces_left_side} =
1962       (1) x scalar(@spaces_left_side);
1963     @want_right_space{@spaces_left_side} =
1964       (-1) x scalar(@spaces_left_side);
1965     @want_left_space{@spaces_right_side} =
1966       (-1) x scalar(@spaces_right_side);
1967     @want_right_space{@spaces_right_side} =
1968       (1) x scalar(@spaces_right_side);
1969     $want_left_space{'->'}      = WS_NO;
1970     $want_right_space{'->'}     = WS_NO;
1971     $want_left_space{'**'}      = WS_NO;
1972     $want_right_space{'**'}     = WS_NO;
1973     $want_right_space{'CORE::'} = WS_NO;
1974
1975     # These binary_ws_rules are hardwired and have priority over the above
1976     # settings.  It would be nice to allow adjustment by the user,
1977     # but it would be complicated to specify.
1978     #
1979     # hash type information must stay tightly bound
1980     # as in :  ${xxxx}
1981     $binary_ws_rules{'i'}{'L'} = WS_NO;
1982     $binary_ws_rules{'i'}{'{'} = WS_YES;
1983     $binary_ws_rules{'k'}{'{'} = WS_YES;
1984     $binary_ws_rules{'U'}{'{'} = WS_YES;
1985     $binary_ws_rules{'i'}{'['} = WS_NO;
1986     $binary_ws_rules{'R'}{'L'} = WS_NO;
1987     $binary_ws_rules{'R'}{'{'} = WS_NO;
1988     $binary_ws_rules{'t'}{'L'} = WS_NO;
1989     $binary_ws_rules{'t'}{'{'} = WS_NO;
1990     $binary_ws_rules{'t'}{'='} = WS_OPTIONAL;    # for signatures; fixes b1123
1991     $binary_ws_rules{'}'}{'L'} = WS_NO;
1992     $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL;    # RT#129850; was WS_NO
1993     $binary_ws_rules{'$'}{'L'} = WS_NO;
1994     $binary_ws_rules{'$'}{'{'} = WS_NO;
1995     $binary_ws_rules{'@'}{'L'} = WS_NO;
1996     $binary_ws_rules{'@'}{'{'} = WS_NO;
1997     $binary_ws_rules{'='}{'L'} = WS_YES;
1998     $binary_ws_rules{'J'}{'J'} = WS_YES;
1999
2000     # the following includes ') {'
2001     # as in :    if ( xxx ) { yyy }
2002     $binary_ws_rules{']'}{'L'} = WS_NO;
2003     $binary_ws_rules{']'}{'{'} = WS_NO;
2004     $binary_ws_rules{')'}{'{'} = WS_YES;
2005     $binary_ws_rules{')'}{'['} = WS_NO;
2006     $binary_ws_rules{']'}{'['} = WS_NO;
2007     $binary_ws_rules{']'}{'{'} = WS_NO;
2008     $binary_ws_rules{'}'}{'['} = WS_NO;
2009     $binary_ws_rules{'R'}{'['} = WS_NO;
2010
2011     $binary_ws_rules{']'}{'++'} = WS_NO;
2012     $binary_ws_rules{']'}{'--'} = WS_NO;
2013     $binary_ws_rules{')'}{'++'} = WS_NO;
2014     $binary_ws_rules{')'}{'--'} = WS_NO;
2015
2016     $binary_ws_rules{'R'}{'++'} = WS_NO;
2017     $binary_ws_rules{'R'}{'--'} = WS_NO;
2018
2019     $binary_ws_rules{'i'}{'Q'} = WS_YES;
2020     $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
2021
2022     $binary_ws_rules{'i'}{'('} = WS_NO;
2023
2024     $binary_ws_rules{'w'}{'('} = WS_NO;
2025     $binary_ws_rules{'w'}{'{'} = WS_YES;
2026     return;
2027
2028 } ## end initialize_whitespace_hashes
2029
2030 sub set_whitespace_flags {
2031
2032     # This routine is called once per file to set whitespace flags for that
2033     # file.  This routine examines each pair of nonblank tokens and sets a flag
2034     # indicating if white space is needed.
2035     #
2036     # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2037     # BEFORE token $j is needed, with the following values:
2038     #
2039     #             WS_NO      = -1 do not want a space BEFORE token $j
2040     #             WS_OPTIONAL=  0 optional space or $j is a whitespace
2041     #             WS_YES     =  1 want a space BEFORE token $j
2042     #
2043
2044     my $self = shift;
2045     my $rLL  = $self->[_rLL_];
2046     use constant DEBUG_WHITE => 0;
2047
2048     my $rOpts_space_keyword_paren   = $rOpts->{'space-keyword-paren'};
2049     my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2050     my $rOpts_space_function_paren  = $rOpts->{'space-function-paren'};
2051
2052     my $rwhitespace_flags       = [];
2053     my $ris_function_call_paren = {};
2054
2055     my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2056
2057     my ( $token, $type, $block_type, $seqno, $input_line_no );
2058     my (
2059         $last_token, $last_type, $last_block_type,
2060         $last_seqno, $last_input_line_no
2061     );
2062
2063     my $j_tight_closing_paren = -1;
2064
2065     $token              = ' ';
2066     $type               = 'b';
2067     $block_type         = '';
2068     $seqno              = '';
2069     $input_line_no      = 0;
2070     $last_token         = ' ';
2071     $last_type          = 'b';
2072     $last_block_type    = '';
2073     $last_seqno         = '';
2074     $last_input_line_no = 0;
2075
2076     my $jmax = @{$rLL} - 1;
2077
2078     my ($ws);
2079
2080     # This is some logic moved to a sub to avoid deep nesting of if stmts
2081     my $ws_in_container = sub {
2082
2083         my ($j) = @_;
2084         my $ws = WS_YES;
2085         if ( $j + 1 > $jmax ) { return (WS_NO) }
2086
2087         # Patch to count '-foo' as single token so that
2088         # each of  $a{-foo} and $a{foo} and $a{'foo'} do
2089         # not get spaces with default formatting.
2090         my $j_here = $j;
2091         ++$j_here
2092           if ( $token eq '-'
2093             && $last_token eq '{'
2094             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
2095
2096         # Patch to count a sign separated from a number as a single token, as
2097         # in the following line. Otherwise, it takes two steps to converge:
2098         #    deg2rad(-  0.5)
2099         if (   ( $type eq 'm' || $type eq 'p' )
2100             && $j < $jmax + 1
2101             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
2102             && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
2103             && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
2104         {
2105             $j_here = $j + 2;
2106         }
2107
2108         # $j_next is where a closing token should be if
2109         # the container has a single token
2110         if ( $j_here + 1 > $jmax ) { return (WS_NO) }
2111         my $j_next =
2112           ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
2113           ? $j_here + 2
2114           : $j_here + 1;
2115
2116         if ( $j_next > $jmax ) { return WS_NO }
2117         my $tok_next  = $rLL->[$j_next]->[_TOKEN_];
2118         my $type_next = $rLL->[$j_next]->[_TYPE_];
2119
2120         # for tightness = 1, if there is just one token
2121         # within the matching pair, we will keep it tight
2122         if (
2123             $tok_next eq $matching_token{$last_token}
2124
2125             # but watch out for this: [ [ ]    (misc.t)
2126             && $last_token ne $token
2127
2128             # double diamond is usually spaced
2129             && $token ne '<<>>'
2130
2131           )
2132         {
2133
2134             # remember where to put the space for the closing paren
2135             $j_tight_closing_paren = $j_next;
2136             return (WS_NO);
2137         }
2138         return (WS_YES);
2139     };
2140
2141     # Local hashes to set spaces around container tokens according to their
2142     # sequence numbers.  These are set as keywords are examined.
2143     # They are controlled by the -kpit and -kpitl flags.
2144     my %opening_container_inside_ws;
2145     my %closing_container_inside_ws;
2146     my $set_container_ws_by_keyword = sub {
2147
2148         return unless (%keyword_paren_inner_tightness);
2149
2150         my ( $word, $sequence_number ) = @_;
2151
2152         # We just saw a keyword (or other function name) followed by an opening
2153         # paren. Now check to see if the following paren should have special
2154         # treatment for its inside space.  If so we set a hash value using the
2155         # sequence number as key.
2156         if ( $word && $sequence_number ) {
2157             my $tightness = $keyword_paren_inner_tightness{$word};
2158             if ( defined($tightness) && $tightness != 1 ) {
2159                 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
2160                 $opening_container_inside_ws{$sequence_number} = $ws_flag;
2161                 $closing_container_inside_ws{$sequence_number} = $ws_flag;
2162             }
2163         }
2164     };
2165
2166     my $ws_opening_container_override = sub {
2167         my ( $ws, $sequence_number ) = @_;
2168         return $ws unless (%opening_container_inside_ws);
2169         if ($sequence_number) {
2170             my $ws_override = $opening_container_inside_ws{$sequence_number};
2171             if ($ws_override) { $ws = $ws_override }
2172         }
2173         return $ws;
2174     };
2175
2176     my $ws_closing_container_override = sub {
2177         my ( $ws, $sequence_number ) = @_;
2178         return $ws unless (%closing_container_inside_ws);
2179         if ($sequence_number) {
2180             my $ws_override = $closing_container_inside_ws{$sequence_number};
2181             if ($ws_override) { $ws = $ws_override }
2182         }
2183         return $ws;
2184     };
2185
2186     # main loop over all tokens to define the whitespace flags
2187     for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
2188
2189         my $rtokh = $rLL->[$j];
2190
2191         # Set a default
2192         $rwhitespace_flags->[$j] = WS_OPTIONAL;
2193
2194         if ( $rtokh->[_TYPE_] eq 'b' ) {
2195             next;
2196         }
2197
2198         # set a default value, to be changed as needed
2199         $ws                 = undef;
2200         $last_token         = $token;
2201         $last_type          = $type;
2202         $last_block_type    = $block_type;
2203         $last_seqno         = $seqno;
2204         $last_input_line_no = $input_line_no;
2205         $token              = $rtokh->[_TOKEN_];
2206         $type               = $rtokh->[_TYPE_];
2207         $block_type         = $rtokh->[_BLOCK_TYPE_];
2208         $seqno              = $rtokh->[_TYPE_SEQUENCE_];
2209         $input_line_no      = $rtokh->[_LINE_INDEX_];
2210
2211         #---------------------------------------------------------------
2212         # Whitespace Rules Section 1:
2213         # Handle space on the inside of opening braces.
2214         #---------------------------------------------------------------
2215
2216         #    /^[L\{\(\[]$/
2217         if ( $is_opening_type{$last_type} ) {
2218
2219             $j_tight_closing_paren = -1;
2220
2221             # let us keep empty matched braces together: () {} []
2222             # except for BLOCKS
2223             if ( $token eq $matching_token{$last_token} ) {
2224                 if ($block_type) {
2225                     $ws = WS_YES;
2226                 }
2227                 else {
2228                     $ws = WS_NO;
2229                 }
2230             }
2231             else {
2232
2233                 # we're considering the right of an opening brace
2234                 # tightness = 0 means always pad inside with space
2235                 # tightness = 1 means pad inside if "complex"
2236                 # tightness = 2 means never pad inside with space
2237
2238                 my $tightness;
2239                 if (   $last_type eq '{'
2240                     && $last_token eq '{'
2241                     && $last_block_type )
2242                 {
2243                     $tightness = $rOpts_block_brace_tightness;
2244                 }
2245                 else { $tightness = $tightness{$last_token} }
2246
2247                #=============================================================
2248                # Patch for test problem <<snippets/fabrice_bug.in>>
2249                # We must always avoid spaces around a bare word beginning
2250                # with ^ as in:
2251                #    my $before = ${^PREMATCH};
2252                # Because all of the following cause an error in perl:
2253                #    my $before = ${ ^PREMATCH };
2254                #    my $before = ${ ^PREMATCH};
2255                #    my $before = ${^PREMATCH };
2256                # So if brace tightness flag is -bt=0 we must temporarily reset
2257                # to bt=1.  Note that here we must set tightness=1 and not 2 so
2258                # that the closing space
2259                # is also avoided (via the $j_tight_closing_paren flag in coding)
2260                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2261
2262                 #=============================================================
2263
2264                 if ( $tightness <= 0 ) {
2265                     $ws = WS_YES;
2266                 }
2267                 elsif ( $tightness > 1 ) {
2268                     $ws = WS_NO;
2269                 }
2270                 else {
2271                     $ws = $ws_in_container->($j);
2272                 }
2273             }
2274
2275             # check for special cases which override the above rules
2276             $ws = $ws_opening_container_override->( $ws, $last_seqno );
2277
2278         }    # end setting space flag inside opening tokens
2279         my $ws_1;
2280         $ws_1 = $ws
2281           if DEBUG_WHITE;
2282
2283         #---------------------------------------------------------------
2284         # Whitespace Rules Section 2:
2285         # Handle space on inside of closing brace pairs.
2286         #---------------------------------------------------------------
2287
2288         #   /[\}\)\]R]/
2289         if ( $is_closing_type{$type} ) {
2290
2291             if ( $j == $j_tight_closing_paren ) {
2292
2293                 $j_tight_closing_paren = -1;
2294                 $ws                    = WS_NO;
2295             }
2296             else {
2297
2298                 if ( !defined($ws) ) {
2299
2300                     my $tightness;
2301                     if ( $type eq '}' && $token eq '}' && $block_type ) {
2302                         $tightness = $rOpts_block_brace_tightness;
2303                     }
2304                     else { $tightness = $tightness{$token} }
2305
2306                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2307                 }
2308             }
2309
2310             # check for special cases which override the above rules
2311             $ws = $ws_closing_container_override->( $ws, $seqno );
2312
2313         }    # end setting space flag inside closing tokens
2314
2315         my $ws_2;
2316         $ws_2 = $ws
2317           if DEBUG_WHITE;
2318
2319         #---------------------------------------------------------------
2320         # Whitespace Rules Section 3:
2321         # Use the binary rule table.
2322         #---------------------------------------------------------------
2323         if ( !defined($ws) ) {
2324             $ws = $binary_ws_rules{$last_type}{$type};
2325         }
2326         my $ws_3;
2327         $ws_3 = $ws
2328           if DEBUG_WHITE;
2329
2330         #---------------------------------------------------------------
2331         # Whitespace Rules Section 4:
2332         # Handle some special cases.
2333         #---------------------------------------------------------------
2334         if ( $token eq '(' ) {
2335
2336             # This will have to be tweaked as tokenization changes.
2337             # We usually want a space at '} (', for example:
2338             # <<snippets/space1.in>>
2339             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2340             #
2341             # But not others:
2342             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2343             # At present, the above & block is marked as type L/R so this case
2344             # won't go through here.
2345             if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
2346
2347             # NOTE: some older versions of Perl had occasional problems if
2348             # spaces are introduced between keywords or functions and opening
2349             # parens.  So the default is not to do this except is certain
2350             # cases.  The current Perl seems to tolerate spaces.
2351
2352             # Space between keyword and '('
2353             elsif ( $last_type eq 'k' ) {
2354                 $ws = WS_NO
2355                   unless ( $rOpts_space_keyword_paren
2356                     || $space_after_keyword{$last_token} );
2357
2358                 # Set inside space flag if requested
2359                 $set_container_ws_by_keyword->( $last_token, $seqno );
2360             }
2361
2362             # Space between function and '('
2363             # -----------------------------------------------------
2364             # 'w' and 'i' checks for something like:
2365             #   myfun(    &myfun(   ->myfun(
2366             # -----------------------------------------------------
2367
2368             # Note that at this point an identifier may still have a leading
2369             # arrow, but the arrow will be split off during token respacing.
2370             # After that, the token may become a bare word without leading
2371             # arrow.  The point is, it is best to mark function call parens
2372             # right here before that happens.
2373             # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
2374             # NOTE: this would be the place to allow spaces between repeated
2375             # parens, like () () (), as in case c017, but I decided that would
2376             # not be a good idea.
2377             elsif (( $last_type =~ /^[wCUG]$/ )
2378                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ ) )
2379             {
2380                 $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
2381                 $set_container_ws_by_keyword->( $last_token, $seqno );
2382                 $ris_function_call_paren->{$seqno} = 1;
2383             }
2384
2385             # space between something like $i and ( in <<snippets/space2.in>>
2386             # for $i ( 0 .. 20 ) {
2387             # FIXME: eventually, type 'i' could be split into multiple
2388             # token types so this can be a hardwired rule.
2389             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2390                 $ws = WS_YES;
2391             }
2392
2393             # allow constant function followed by '()' to retain no space
2394             elsif ($last_type eq 'C'
2395                 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2396             {
2397                 $ws = WS_NO;
2398             }
2399         }
2400
2401         # patch for SWITCH/CASE: make space at ']{' optional
2402         # since the '{' might begin a case or when block
2403         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2404             $ws = WS_OPTIONAL;
2405         }
2406
2407         # keep space between 'sub' and '{' for anonymous sub definition
2408         if ( $type eq '{' ) {
2409             if ( $last_token eq 'sub' ) {
2410                 $ws = WS_YES;
2411             }
2412
2413             # this is needed to avoid no space in '){'
2414             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
2415
2416             # avoid any space before the brace or bracket in something like
2417             #  @opts{'a','b',...}
2418             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
2419                 $ws = WS_NO;
2420             }
2421         }
2422
2423         elsif ( $type eq 'i' ) {
2424
2425             # never a space before ->
2426             if ( substr( $token, 0, 2 ) eq '->' ) {
2427                 $ws = WS_NO;
2428             }
2429         }
2430
2431         # retain any space between '-' and bare word
2432         elsif ( $type eq 'w' || $type eq 'C' ) {
2433             $ws = WS_OPTIONAL if $last_type eq '-';
2434
2435             # never a space before ->
2436             if ( substr( $token, 0, 2 ) eq '->' ) {
2437                 $ws = WS_NO;
2438             }
2439         }
2440
2441         # retain any space between '-' and bare word; for example
2442         # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2443         #   $myhash{USER-NAME}='steve';
2444         elsif ( $type eq 'm' || $type eq '-' ) {
2445             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2446         }
2447
2448         # always space before side comment
2449         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2450
2451         # always preserver whatever space was used after a possible
2452         # filehandle (except _) or here doc operator
2453         if (
2454             $type ne '#'
2455             && ( ( $last_type eq 'Z' && $last_token ne '_' )
2456                 || $last_type eq 'h' )
2457           )
2458         {
2459             $ws = WS_OPTIONAL;
2460         }
2461
2462         # space_backslash_quote; RT #123774  <<snippets/rt123774.in>>
2463         # allow a space between a backslash and single or double quote
2464         # to avoid fooling html formatters
2465         elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
2466             if ($rOpts_space_backslash_quote) {
2467                 if ( $rOpts_space_backslash_quote == 1 ) {
2468                     $ws = WS_OPTIONAL;
2469                 }
2470                 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2471                 else { }    # shouldnt happen
2472             }
2473             else {
2474                 $ws = WS_NO;
2475             }
2476         }
2477         elsif ( $type eq 'k' ) {
2478
2479             # Keywords 'for', 'foreach' are special cases for -kpit since the
2480             # opening paren does not always immediately follow the keyword. So
2481             # we have to search forward for the paren in this case.  I have
2482             # limited the search to 10 tokens ahead, just in case somebody
2483             # has a big file and no opening paren.  This should be enough for
2484             # all normal code.
2485             if (   $is_for_foreach{$token}
2486                 && %keyword_paren_inner_tightness
2487                 && defined( $keyword_paren_inner_tightness{$token} )
2488                 && $j < $jmax )
2489             {
2490                 my $jp = $j;
2491                 for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
2492                     $jp++;
2493                     last if ( $jp > $jmax );
2494                     next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
2495                     my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
2496                     $set_container_ws_by_keyword->( $token, $seqno );
2497                     last;
2498                 }
2499             }
2500         }
2501
2502         my $ws_4;
2503         $ws_4 = $ws
2504           if DEBUG_WHITE;
2505
2506         #---------------------------------------------------------------
2507         # Whitespace Rules Section 5:
2508         # Apply default rules not covered above.
2509         #---------------------------------------------------------------
2510
2511         # If we fall through to here, look at the pre-defined hash tables for
2512         # the two tokens, and:
2513         #  if (they are equal) use the common value
2514         #  if (either is zero or undef) use the other
2515         #  if (either is -1) use it
2516         # That is,
2517         # left  vs right
2518         #  1    vs    1     -->  1
2519         #  0    vs    0     -->  0
2520         # -1    vs   -1     --> -1
2521         #
2522         #  0    vs   -1     --> -1
2523         #  0    vs    1     -->  1
2524         #  1    vs    0     -->  1
2525         # -1    vs    0     --> -1
2526         #
2527         # -1    vs    1     --> -1
2528         #  1    vs   -1     --> -1
2529         if ( !defined($ws) ) {
2530             my $wl = $want_left_space{$type};
2531             my $wr = $want_right_space{$last_type};
2532             if ( !defined($wl) ) { $wl = 0 }
2533             if ( !defined($wr) ) { $wr = 0 }
2534             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
2535         }
2536
2537         if ( !defined($ws) ) {
2538             $ws = 0;
2539             write_diagnostics(
2540                 "WS flag is undefined for tokens $last_token $token\n");
2541         }
2542
2543         # Treat newline as a whitespace. Otherwise, we might combine
2544         # 'Send' and '-recipients' here according to the above rules:
2545         # <<snippets/space3.in>>
2546         #    my $msg = new Fax::Send
2547         #      -recipients => $to,
2548         #      -data => $data;
2549         if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
2550
2551         $rwhitespace_flags->[$j] = $ws;
2552
2553         DEBUG_WHITE && do {
2554             my $str = substr( $last_token, 0, 15 );
2555             $str .= ' ' x ( 16 - length($str) );
2556             if ( !defined($ws_1) ) { $ws_1 = "*" }
2557             if ( !defined($ws_2) ) { $ws_2 = "*" }
2558             if ( !defined($ws_3) ) { $ws_3 = "*" }
2559             if ( !defined($ws_4) ) { $ws_4 = "*" }
2560             print STDOUT
2561 "NEW WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
2562         };
2563     } ## end main loop
2564
2565     if ( $rOpts->{'tight-secret-operators'} ) {
2566         new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
2567     }
2568     $self->[_ris_function_call_paren_] = $ris_function_call_paren;
2569     return $rwhitespace_flags;
2570
2571 } ## end sub set_whitespace_flags
2572
2573 sub dump_want_left_space {
2574     my $fh = shift;
2575     local $" = "\n";
2576     $fh->print(<<EOM);
2577 These values are the main control of whitespace to the left of a token type;
2578 They may be altered with the -wls parameter.
2579 For a list of token types, use perltidy --dump-token-types (-dtt)
2580  1 means the token wants a space to its left
2581 -1 means the token does not want a space to its left
2582 ------------------------------------------------------------------------
2583 EOM
2584     foreach my $key ( sort keys %want_left_space ) {
2585         $fh->print("$key\t$want_left_space{$key}\n");
2586     }
2587     return;
2588 }
2589
2590 sub dump_want_right_space {
2591     my $fh = shift;
2592     local $" = "\n";
2593     $fh->print(<<EOM);
2594 These values are the main control of whitespace to the right of a token type;
2595 They may be altered with the -wrs parameter.
2596 For a list of token types, use perltidy --dump-token-types (-dtt)
2597  1 means the token wants a space to its right
2598 -1 means the token does not want a space to its right
2599 ------------------------------------------------------------------------
2600 EOM
2601     foreach my $key ( sort keys %want_right_space ) {
2602         $fh->print("$key\t$want_right_space{$key}\n");
2603     }
2604     return;
2605 }
2606
2607 {    ## begin closure is_essential_whitespace
2608
2609     my %is_sort_grep_map;
2610     my %is_for_foreach;
2611     my %is_digraph;
2612     my %is_trigraph;
2613     my %essential_whitespace_filter_l1;
2614     my %essential_whitespace_filter_r1;
2615     my %essential_whitespace_filter_l2;
2616     my %essential_whitespace_filter_r2;
2617     my %is_type_with_space_before_bareword;
2618
2619     BEGIN {
2620
2621         my @q;
2622         @q = qw(sort grep map);
2623         @is_sort_grep_map{@q} = (1) x scalar(@q);
2624
2625         @q = qw(for foreach);
2626         @is_for_foreach{@q} = (1) x scalar(@q);
2627
2628         @q = qw(
2629           .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
2630           <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
2631         );
2632         @is_digraph{@q} = (1) x scalar(@q);
2633
2634         @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
2635         @is_trigraph{@q} = (1) x scalar(@q);
2636
2637         # These are used as a speedup filters for sub is_essential_whitespace.
2638
2639         # Filter 1:
2640         # These left side token types USUALLY do not require a space:
2641         @q = qw( ; { } [ ] L R );
2642         push @q, ',';
2643         push @q, ')';
2644         push @q, '(';
2645         @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
2646
2647         # BUT some might if followed by these right token types
2648         @q = qw( pp mm << <<= h );
2649         @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
2650
2651         # Filter 2:
2652         # These right side filters usually do not require a space
2653         @q = qw( ; ] R } );
2654         push @q, ',';
2655         push @q, ')';
2656         @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
2657
2658         # BUT some might if followed by these left token types
2659         @q = qw( h Z );
2660         @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
2661
2662         # Keep a space between certain types and any bareword:
2663         # Q: keep a space between a quote and a bareword to prevent the
2664         #    bareword from becoming a quote modifier.
2665         # &: do not remove space between an '&' and a bare word because
2666         #    it may turn into a function evaluation, like here
2667         #    between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
2668         #      $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
2669         @q = qw( Q & );
2670         @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
2671
2672     }
2673
2674     sub is_essential_whitespace {
2675
2676         # Essential whitespace means whitespace which cannot be safely deleted
2677         # without risking the introduction of a syntax error.
2678         # We are given three tokens and their types:
2679         # ($tokenl, $typel) is the token to the left of the space in question
2680         # ($tokenr, $typer) is the token to the right of the space in question
2681         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
2682         #
2683         # Note1: This routine should almost never need to be changed.  It is
2684         # for avoiding syntax problems rather than for formatting.
2685
2686         # Note2: The -mangle option causes large numbers of calls to this
2687         # routine and therefore is a good test. So if a change is made, be sure
2688         # to run a large number of files with the -mangle option and check for
2689         # differences.
2690
2691         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
2692
2693         # This is potentially a very slow routine but the following quick
2694         # filters typically catch and handle over 90% of the calls.
2695
2696         # Filter 1: usually no space required after common types ; , [ ] { } ( )
2697         return
2698           if ( $essential_whitespace_filter_l1{$typel}
2699             && !$essential_whitespace_filter_r1{$typer} );
2700
2701         # Filter 2: usually no space before common types ; ,
2702         return
2703           if ( $essential_whitespace_filter_r2{$typer}
2704             && !$essential_whitespace_filter_l2{$typel} );
2705
2706         # Filter 3: Handle side comments: a space is only essential if the left
2707         # token ends in '$' For example, we do not want to create $#foo below:
2708
2709         #   sub t086
2710         #       ( #foo)))
2711         #       $ #foo)))
2712         #       a #foo)))
2713         #       ) #foo)))
2714         #       { ... }
2715
2716         # Also, I prefer not to put a ? and # together because ? used to be
2717         # a pattern delmiter and spacing was used if guessing was needed.
2718
2719         if ( $typer eq '#' ) {
2720
2721             return 1
2722               if ( $tokenl
2723                 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
2724             return;
2725         }
2726
2727         my $tokenr_is_bareword   = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
2728         my $tokenr_is_open_paren = $tokenr eq '(';
2729         my $token_joined         = $tokenl . $tokenr;
2730         my $tokenl_is_dash       = $tokenl eq '-';
2731
2732         my $result =
2733
2734           # never combine two bare words or numbers
2735           # examples:  and ::ok(1)
2736           #            return ::spw(...)
2737           #            for bla::bla:: abc
2738           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
2739           #            $input eq"quit" to make $inputeq"quit"
2740           #            my $size=-s::SINK if $file;  <==OK but we won't do it
2741           # don't join something like: for bla::bla:: abc
2742           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
2743           (      ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
2744               && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
2745
2746           # do not combine a number with a concatenation dot
2747           # example: pom.caputo:
2748           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
2749           || $typel eq 'n' && $tokenr eq '.'
2750           || $typer eq 'n'
2751           && $tokenl eq '.'
2752
2753           # cases of a space before a bareword...
2754           || (
2755             $tokenr_is_bareword && (
2756
2757                 # do not join a minus with a bare word, because you might form
2758                 # a file test operator.  Example from Complex.pm:
2759                 # if (CORE::abs($z - i) < $eps);
2760                 # "z-i" would be taken as a file test.
2761                 $tokenl_is_dash && length($tokenr) == 1
2762
2763                 # and something like this could become ambiguous without space
2764                 # after the '-':
2765                 #   use constant III=>1;
2766                 #   $a = $b - III;
2767                 # and even this:
2768                 #   $a = - III;
2769                 || $tokenl_is_dash && $typer =~ /^[wC]$/
2770
2771                 # keep space between types Q & and a bareword
2772                 || $is_type_with_space_before_bareword{$typel}
2773
2774                 # +-: binary plus and minus before a bareword could get
2775                 # converted into unary plus and minus on next pass through the
2776                 # tokenizer. This can lead to blinkers: cases b660 b670 b780
2777                 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
2778                 # follows an operator
2779                 || ( ( $typel eq '+' || $typel eq '-' )
2780                     && $typell !~ /^[niC\)\}\]R]$/ )
2781
2782                 # keep a space between a token ending in '$' and any word;
2783                 # this caused trouble:  "die @$ if $@"
2784                 || $typel eq 'i' && $tokenl =~ /\$$/
2785
2786                 # don't combine $$ or $# with any alphanumeric
2787                 # (testfile mangle.t with --mangle)
2788                 || $tokenl =~ /^\$[\$\#]$/
2789
2790             )
2791           )    ## end $tokenr_is_bareword
2792
2793           # OLD, not used
2794           # '= -' should not become =- or you will get a warning
2795           # about reversed -=
2796           # || ($tokenr eq '-')
2797
2798           # do not join a bare word with a minus, like between 'Send' and
2799           # '-recipients' here <<snippets/space3.in>>
2800           #   my $msg = new Fax::Send
2801           #     -recipients => $to,
2802           #     -data => $data;
2803           # This is the safest thing to do. If we had the token to the right of
2804           # the minus we could do a better check.
2805           #
2806           # And do not combine a bareword and a quote, like this:
2807           #    oops "Your login, $Bad_Login, is not valid";
2808           # It can cause a syntax error if oops is a sub
2809           || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
2810
2811           # perl is very fussy about spaces before <<
2812           || $tokenr =~ /^\<\</
2813
2814           # avoid combining tokens to create new meanings. Example:
2815           #     $a+ +$b must not become $a++$b
2816           || ( $is_digraph{$token_joined} )
2817           || $is_trigraph{$token_joined}
2818
2819           # another example: do not combine these two &'s:
2820           #     allow_options & &OPT_EXECCGI
2821           || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
2822
2823           # retain any space after possible filehandle
2824           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
2825           || $typel eq 'Z'
2826
2827           # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
2828           # space after type Y. Otherwise, it will get parsed as type 'Z' later
2829           # and any space would have to be added back manually if desired.
2830           || $typel eq 'Y'
2831
2832           # Perl is sensitive to whitespace after the + here:
2833           #  $b = xvals $a + 0.1 * yvals $a;
2834           || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
2835
2836           || (
2837             $tokenr_is_open_paren && (
2838
2839                 # keep paren separate in 'use Foo::Bar ()'
2840                 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
2841
2842                 # OLD: keep any space between filehandle and paren:
2843                 # file mangle.t with --mangle:
2844                 # NEW: this test is no longer necessary here (moved above)
2845                 ## || $typel eq 'Y'
2846
2847                 # must have space between grep and left paren; "grep(" will fail
2848                 || $is_sort_grep_map{$tokenl}
2849
2850                 # don't stick numbers next to left parens, as in:
2851                 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
2852                 || $typel eq 'n'
2853             )
2854           )    ## end $tokenr_is_open_paren
2855
2856           # retain any space after here doc operator ( hereerr.t)
2857           || $typel eq 'h'
2858
2859           # be careful with a space around ++ and --, to avoid ambiguity as to
2860           # which token it applies
2861           || $typer  =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
2862           || $typel  =~ /^(\+\+|\-\-)$/
2863           && $tokenr !~ /^[\;\}\)\]]/
2864
2865           # need space after foreach my; for example, this will fail in
2866           # older versions of Perl:
2867           # foreach my$ft(@filetypes)...
2868           || (
2869             $tokenl eq 'my'
2870
2871             #  /^(for|foreach)$/
2872             && $is_for_foreach{$tokenll}
2873             && $tokenr =~ /^\$/
2874           )
2875
2876           # We must be sure that a space between a ? and a quoted string
2877           # remains if the space before the ? remains.  [Loca.pm, lockarea]
2878           # ie,
2879           #    $b=join $comma ? ',' : ':', @_;  # ok
2880           #    $b=join $comma?',' : ':', @_;    # ok!
2881           #    $b=join $comma ?',' : ':', @_;   # error!
2882           # Not really required:
2883           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
2884
2885           # Space stacked labels...
2886           # Not really required: Perl seems to accept non-spaced labels.
2887           ## || $typel eq 'J' && $typer eq 'J'
2888
2889           ;    # the value of this long logic sequence is the result we want
2890         return $result;
2891     }
2892 } ## end closure is_essential_whitespace
2893
2894 {    ## begin closure new_secret_operator_whitespace
2895
2896     my %secret_operators;
2897     my %is_leading_secret_token;
2898
2899     BEGIN {
2900
2901         # token lists for perl secret operators as compiled by Philippe Bruhat
2902         # at: https://metacpan.org/module/perlsecret
2903         %secret_operators = (
2904             'Goatse'             => [qw#= ( ) =#],        #=( )=
2905             'Venus1'             => [qw#0 +#],            # 0+
2906             'Venus2'             => [qw#+ 0#],            # +0
2907             'Enterprise'         => [qw#) x ! !#],        # ()x!!
2908             'Kite1'              => [qw#~ ~ <>#],         # ~~<>
2909             'Kite2'              => [qw#~~ <>#],          # ~~<>
2910             'Winking Fat Comma'  => [ ( ',', '=>' ) ],    # ,=>
2911             'Bang bang         ' => [qw#! !#],            # !!
2912         );
2913
2914         # The following operators and constants are not included because they
2915         # are normally kept tight by perltidy:
2916         # ~~ <~>
2917         #
2918
2919         # Make a lookup table indexed by the first token of each operator:
2920         # first token => [list, list, ...]
2921         foreach my $value ( values(%secret_operators) ) {
2922             my $tok = $value->[0];
2923             push @{ $is_leading_secret_token{$tok} }, $value;
2924         }
2925     }
2926
2927     sub new_secret_operator_whitespace {
2928
2929         my ( $rlong_array, $rwhitespace_flags ) = @_;
2930
2931         # Loop over all tokens in this line
2932         my ( $token, $type );
2933         my $jmax = @{$rlong_array} - 1;
2934         foreach my $j ( 0 .. $jmax ) {
2935
2936             $token = $rlong_array->[$j]->[_TOKEN_];
2937             $type  = $rlong_array->[$j]->[_TYPE_];
2938
2939             # Skip unless this token might start a secret operator
2940             next if ( $type eq 'b' );
2941             next unless ( $is_leading_secret_token{$token} );
2942
2943             #      Loop over all secret operators with this leading token
2944             foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
2945                 my $jend = $j - 1;
2946                 foreach my $tok ( @{$rpattern} ) {
2947                     $jend++;
2948                     $jend++
2949
2950                       if ( $jend <= $jmax
2951                         && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
2952                     if (   $jend > $jmax
2953                         || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
2954                     {
2955                         $jend = undef;
2956                         last;
2957                     }
2958                 }
2959
2960                 if ($jend) {
2961
2962                     # set flags to prevent spaces within this operator
2963                     foreach my $jj ( $j + 1 .. $jend ) {
2964                         $rwhitespace_flags->[$jj] = WS_NO;
2965                     }
2966                     $j = $jend;
2967                     last;
2968                 }
2969             }    ##      End Loop over all operators
2970         }    ## End loop over all tokens
2971         return;
2972     }    # End sub
2973 } ## end closure new_secret_operator_whitespace
2974
2975 {    ## begin closure set_bond_strengths
2976
2977     # These routines and variables are involved in deciding where to break very
2978     # long lines.
2979
2980     my %is_good_keyword_breakpoint;
2981     my %is_lt_gt_le_ge;
2982     my %is_container_token;
2983
2984     my %binary_bond_strength_nospace;
2985     my %binary_bond_strength;
2986     my %nobreak_lhs;
2987     my %nobreak_rhs;
2988
2989     my @bias_tokens;
2990     my %bias_hash;
2991     my %bias;
2992     my $delta_bias;
2993
2994     sub initialize_bond_strength_hashes {
2995
2996         my @q;
2997         @q = qw(if unless while until for foreach);
2998         @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
2999
3000         @q = qw(lt gt le ge);
3001         @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3002
3003         @q = qw/ ( [ { } ] ) /;
3004         @is_container_token{@q} = (1) x scalar(@q);
3005
3006         # The decision about where to break a line depends upon a "bond
3007         # strength" between tokens.  The LOWER the bond strength, the MORE
3008         # likely a break.  A bond strength may be any value but to simplify
3009         # things there are several pre-defined strength levels:
3010
3011         #    NO_BREAK    => 10000;
3012         #    VERY_STRONG => 100;
3013         #    STRONG      => 2.1;
3014         #    NOMINAL     => 1.1;
3015         #    WEAK        => 0.8;
3016         #    VERY_WEAK   => 0.55;
3017
3018         # The strength values are based on trial-and-error, and need to be
3019         # tweaked occasionally to get desired results.  Some comments:
3020         #
3021         #   1. Only relative strengths are important.  small differences
3022         #      in strengths can make big formatting differences.
3023         #   2. Each indentation level adds one unit of bond strength.
3024         #   3. A value of NO_BREAK makes an unbreakable bond
3025         #   4. A value of VERY_WEAK is the strength of a ','
3026         #   5. Values below NOMINAL are considered ok break points.
3027         #   6. Values above NOMINAL are considered poor break points.
3028         #
3029         # The bond strengths should roughly follow precedence order where
3030         # possible.  If you make changes, please check the results very
3031         # carefully on a variety of scripts.  Testing with the -extrude
3032         # options is particularly helpful in exercising all of the rules.
3033
3034         # Wherever possible, bond strengths are defined in the following
3035         # tables.  There are two main stages to setting bond strengths and
3036         # two types of tables:
3037         #
3038         # The first stage involves looking at each token individually and
3039         # defining left and right bond strengths, according to if we want
3040         # to break to the left or right side, and how good a break point it
3041         # is.  For example tokens like =, ||, && make good break points and
3042         # will have low strengths, but one might want to break on either
3043         # side to put them at the end of one line or beginning of the next.
3044         #
3045         # The second stage involves looking at certain pairs of tokens and
3046         # defining a bond strength for that particular pair.  This second
3047         # stage has priority.
3048
3049         #---------------------------------------------------------------
3050         # Bond Strength BEGIN Section 1.
3051         # Set left and right bond strengths of individual tokens.
3052         #---------------------------------------------------------------
3053
3054         # NOTE: NO_BREAK's set in this section first are HINTS which will
3055         # probably not be honored. Essential NO_BREAKS's should be set in
3056         # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
3057         # of this subroutine.
3058
3059         # Note that we are setting defaults in this section.  The user
3060         # cannot change bond strengths but can cause the left and right
3061         # bond strengths of any token type to be swapped through the use of
3062         # the -wba and -wbb flags. In this way the user can determine if a
3063         # breakpoint token should appear at the end of one line or the
3064         # beginning of the next line.
3065
3066         %right_bond_strength          = ();
3067         %left_bond_strength           = ();
3068         %binary_bond_strength_nospace = ();
3069         %binary_bond_strength         = ();
3070         %nobreak_lhs                  = ();
3071         %nobreak_rhs                  = ();
3072
3073         # The hash keys in this section are token types, plus the text of
3074         # certain keywords like 'or', 'and'.
3075
3076         # no break around possible filehandle
3077         $left_bond_strength{'Z'}  = NO_BREAK;
3078         $right_bond_strength{'Z'} = NO_BREAK;
3079
3080         # never put a bare word on a new line:
3081         # example print (STDERR, "bla"); will fail with break after (
3082         $left_bond_strength{'w'} = NO_BREAK;
3083
3084         # blanks always have infinite strength to force breaks after
3085         # real tokens
3086         $right_bond_strength{'b'} = NO_BREAK;
3087
3088         # try not to break on exponentation
3089         @q                       = qw# ** .. ... <=> #;
3090         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3091         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3092
3093         # The comma-arrow has very low precedence but not a good break point
3094         $left_bond_strength{'=>'}  = NO_BREAK;
3095         $right_bond_strength{'=>'} = NOMINAL;
3096
3097         # ok to break after label
3098         $left_bond_strength{'J'}  = NO_BREAK;
3099         $right_bond_strength{'J'} = NOMINAL;
3100         $left_bond_strength{'j'}  = STRONG;
3101         $right_bond_strength{'j'} = STRONG;
3102         $left_bond_strength{'A'}  = STRONG;
3103         $right_bond_strength{'A'} = STRONG;
3104
3105         $left_bond_strength{'->'}  = STRONG;
3106         $right_bond_strength{'->'} = VERY_STRONG;
3107
3108         $left_bond_strength{'CORE::'}  = NOMINAL;
3109         $right_bond_strength{'CORE::'} = NO_BREAK;
3110
3111         # breaking AFTER modulus operator is ok:
3112         @q = qw< % >;
3113         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3114         @right_bond_strength{@q} =
3115           ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
3116
3117         # Break AFTER math operators * and /
3118         @q                       = qw< * / x  >;
3119         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3120         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3121
3122         # Break AFTER weakest math operators + and -
3123         # Make them weaker than * but a bit stronger than '.'
3124         @q = qw< + - >;
3125         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3126         @right_bond_strength{@q} =
3127           ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
3128
3129         # Define left strength of unary plus and minus (fixes case b511)
3130         $left_bond_strength{p} = $left_bond_strength{'+'};
3131         $left_bond_strength{m} = $left_bond_strength{'-'};
3132
3133         # And make right strength of unary plus and minus very high.
3134         # Fixes cases b670 b790
3135         $right_bond_strength{p} = NO_BREAK;
3136         $right_bond_strength{m} = NO_BREAK;
3137
3138         # breaking BEFORE these is just ok:
3139         @q                       = qw# >> << #;
3140         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3141         @left_bond_strength{@q}  = (NOMINAL) x scalar(@q);
3142
3143         # breaking before the string concatenation operator seems best
3144         # because it can be hard to see at the end of a line
3145         $right_bond_strength{'.'} = STRONG;
3146         $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
3147
3148         @q                       = qw< } ] ) R >;
3149         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3150         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3151
3152         # make these a little weaker than nominal so that they get
3153         # favored for end-of-line characters
3154         @q = qw< != == =~ !~ ~~ !~~ >;
3155         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3156         @right_bond_strength{@q} =
3157           ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
3158
3159         # break AFTER these
3160         @q = qw# < >  | & >= <= #;
3161         @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
3162         @right_bond_strength{@q} =
3163           ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
3164
3165         # breaking either before or after a quote is ok
3166         # but bias for breaking before a quote
3167         $left_bond_strength{'Q'}  = NOMINAL;
3168         $right_bond_strength{'Q'} = NOMINAL + 0.02;
3169         $left_bond_strength{'q'}  = NOMINAL;
3170         $right_bond_strength{'q'} = NOMINAL;
3171
3172         # starting a line with a keyword is usually ok
3173         $left_bond_strength{'k'} = NOMINAL;
3174
3175         # we usually want to bond a keyword strongly to what immediately
3176         # follows, rather than leaving it stranded at the end of a line
3177         $right_bond_strength{'k'} = STRONG;
3178
3179         $left_bond_strength{'G'}  = NOMINAL;
3180         $right_bond_strength{'G'} = STRONG;
3181
3182         # assignment operators
3183         @q = qw(
3184           = **= += *= &= <<= &&=
3185           -= /= |= >>= ||= //=
3186           .= %= ^=
3187           x=
3188         );
3189
3190         # Default is to break AFTER various assignment operators
3191         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3192         @right_bond_strength{@q} =
3193           ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
3194
3195         # Default is to break BEFORE '&&' and '||' and '//'
3196         # set strength of '||' to same as '=' so that chains like
3197         # $a = $b || $c || $d   will break before the first '||'
3198         $right_bond_strength{'||'} = NOMINAL;
3199         $left_bond_strength{'||'}  = $right_bond_strength{'='};
3200
3201         # same thing for '//'
3202         $right_bond_strength{'//'} = NOMINAL;
3203         $left_bond_strength{'//'}  = $right_bond_strength{'='};
3204
3205         # set strength of && a little higher than ||
3206         $right_bond_strength{'&&'} = NOMINAL;
3207         $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
3208
3209         $left_bond_strength{';'}  = VERY_STRONG;
3210         $right_bond_strength{';'} = VERY_WEAK;
3211         $left_bond_strength{'f'}  = VERY_STRONG;
3212
3213         # make right strength of for ';' a little less than '='
3214         # to make for contents break after the ';' to avoid this:
3215         #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
3216         #     $number_of_fields )
3217         # and make it weaker than ',' and 'and' too
3218         $right_bond_strength{'f'} = VERY_WEAK - 0.03;
3219
3220         # The strengths of ?/: should be somewhere between
3221         # an '=' and a quote (NOMINAL),
3222         # make strength of ':' slightly less than '?' to help
3223         # break long chains of ? : after the colons
3224         $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
3225         $right_bond_strength{':'} = NO_BREAK;
3226         $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
3227         $right_bond_strength{'?'} = NO_BREAK;
3228
3229         $left_bond_strength{','}  = VERY_STRONG;
3230         $right_bond_strength{','} = VERY_WEAK;
3231
3232         # remaining digraphs and trigraphs not defined above
3233         @q                       = qw( :: <> ++ --);
3234         @left_bond_strength{@q}  = (WEAK) x scalar(@q);
3235         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3236
3237         # Set bond strengths of certain keywords
3238         # make 'or', 'err', 'and' slightly weaker than a ','
3239         $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
3240         $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
3241         $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
3242         $left_bond_strength{'xor'}  = VERY_WEAK - 0.01;
3243         $right_bond_strength{'and'} = NOMINAL;
3244         $right_bond_strength{'or'}  = NOMINAL;
3245         $right_bond_strength{'err'} = NOMINAL;
3246         $right_bond_strength{'xor'} = NOMINAL;
3247
3248         #---------------------------------------------------------------
3249         # Bond Strength BEGIN Section 2.
3250         # Set binary rules for bond strengths between certain token types.
3251         #---------------------------------------------------------------
3252
3253         #  We have a little problem making tables which apply to the
3254         #  container tokens.  Here is a list of container tokens and
3255         #  their types:
3256         #
3257         #   type    tokens // meaning
3258         #      {    {, [, ( // indent
3259         #      }    }, ], ) // outdent
3260         #      [    [ // left non-structural [ (enclosing an array index)
3261         #      ]    ] // right non-structural square bracket
3262         #      (    ( // left non-structural paren
3263         #      )    ) // right non-structural paren
3264         #      L    { // left non-structural curly brace (enclosing a key)
3265         #      R    } // right non-structural curly brace
3266         #
3267         #  Some rules apply to token types and some to just the token
3268         #  itself.  We solve the problem by combining type and token into a
3269         #  new hash key for the container types.
3270         #
3271         #  If a rule applies to a token 'type' then we need to make rules
3272         #  for each of these 'type.token' combinations:
3273         #  Type    Type.Token
3274         #  {       {{, {[, {(
3275         #  [       [[
3276         #  (       ((
3277         #  L       L{
3278         #  }       }}, }], })
3279         #  ]       ]]
3280         #  )       ))
3281         #  R       R}
3282         #
3283         #  If a rule applies to a token then we need to make rules for
3284         #  these 'type.token' combinations:
3285         #  Token   Type.Token
3286         #  {       {{, L{
3287         #  [       {[, [[
3288         #  (       {(, ((
3289         #  }       }}, R}
3290         #  ]       }], ]]
3291         #  )       }), ))
3292
3293         # allow long lines before final { in an if statement, as in:
3294         #    if (..........
3295         #      ..........)
3296         #    {
3297         #
3298         # Otherwise, the line before the { tends to be too short.
3299
3300         $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
3301         $binary_bond_strength{'(('}{'{{'} = NOMINAL;
3302
3303         # break on something like '} (', but keep this stronger than a ','
3304         # example is in 'howe.pl'
3305         $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3306         $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3307
3308         # keep matrix and hash indices together
3309         # but make them a little below STRONG to allow breaking open
3310         # something like {'some-word'}{'some-very-long-word'} at the }{
3311         # (bracebrk.t)
3312         $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3313         $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3314         $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3315         $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3316
3317         # increase strength to the point where a break in the following
3318         # will be after the opening paren rather than at the arrow:
3319         #    $a->$b($c);
3320         $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
3321
3322     # Note that the following alternative strength would make the break at the
3323     # '->' rather than opening the '('.  Both have advantages and disadvantages.
3324     # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
3325
3326         $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3327         $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3328         $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3329         $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3330         $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3331         $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3332
3333         $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3334         $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3335         $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3336         $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3337
3338         #---------------------------------------------------------------
3339         # Binary NO_BREAK rules
3340         #---------------------------------------------------------------
3341
3342         # use strict requires that bare word and => not be separated
3343         $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
3344         $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
3345
3346         # Never break between a bareword and a following paren because
3347         # perl may give an error.  For example, if a break is placed
3348         # between 'to_filehandle' and its '(' the following line will
3349         # give a syntax error [Carp.pm]: my( $no) =fileno(
3350         # to_filehandle( $in)) ;
3351         $binary_bond_strength{'C'}{'(('} = NO_BREAK;
3352         $binary_bond_strength{'C'}{'{('} = NO_BREAK;
3353         $binary_bond_strength{'U'}{'(('} = NO_BREAK;
3354         $binary_bond_strength{'U'}{'{('} = NO_BREAK;
3355
3356         # use strict requires that bare word within braces not start new
3357         # line
3358         $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
3359
3360         $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
3361
3362         # The following two rules prevent a syntax error caused by breaking up
3363         # a construction like '{-y}'.  The '-' quotes the 'y' and prevents
3364         # it from being taken as a transliteration. We have to keep
3365         # token types 'L m w' together to prevent this error.
3366         $binary_bond_strength{'L{'}{'m'}        = NO_BREAK;
3367         $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
3368
3369         # keep 'bareword-' together, but only if there is no space between
3370         # the word and dash. Do not keep together if there is a space.
3371         # example 'use perl6-alpha'
3372         $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
3373
3374         # use strict requires that bare word and => not be separated
3375         $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
3376
3377         # use strict does not allow separating type info from trailing { }
3378         # testfile is readmail.pl
3379         $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
3380         $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
3381
3382         # As a defensive measure, do not break between a '(' and a
3383         # filehandle.  In some cases, this can cause an error.  For
3384         # example, the following program works:
3385         #    my $msg="hi!\n";
3386         #    print
3387         #    ( STDOUT
3388         #    $msg
3389         #    );
3390         #
3391         # But this program fails:
3392         #    my $msg="hi!\n";
3393         #    print
3394         #    (
3395         #    STDOUT
3396         #    $msg
3397         #    );
3398         #
3399         # This is normally only a problem with the 'extrude' option
3400         $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
3401         $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
3402
3403         # never break between sub name and opening paren
3404         $binary_bond_strength{'w'}{'(('} = NO_BREAK;
3405         $binary_bond_strength{'w'}{'{('} = NO_BREAK;
3406
3407         # keep '}' together with ';'
3408         $binary_bond_strength{'}}'}{';'} = NO_BREAK;
3409
3410         # Breaking before a ++ can cause perl to guess wrong. For
3411         # example the following line will cause a syntax error
3412         # with -extrude if we break between '$i' and '++' [fixstyle2]
3413         #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
3414         $nobreak_lhs{'++'} = NO_BREAK;
3415
3416         # Do not break before a possible file handle
3417         $nobreak_lhs{'Z'} = NO_BREAK;
3418
3419         # use strict hates bare words on any new line.  For
3420         # example, a break before the underscore here provokes the
3421         # wrath of use strict:
3422         # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
3423         $nobreak_rhs{'F'}      = NO_BREAK;
3424         $nobreak_rhs{'CORE::'} = NO_BREAK;
3425
3426         # To prevent the tokenizer from switching between types 'w' and 'G' we
3427         # need to avoid breaking between type 'G' and the following code block
3428         # brace. Fixes case b929.
3429         $nobreak_rhs{G} = NO_BREAK;
3430
3431         #---------------------------------------------------------------
3432         # Bond Strength BEGIN Section 3.
3433         # Define tables and values for applying a small bias to the above
3434         # values.
3435         #---------------------------------------------------------------
3436         # Adding a small 'bias' to strengths is a simple way to make a line
3437         # break at the first of a sequence of identical terms.  For
3438         # example, to force long string of conditional operators to break
3439         # with each line ending in a ':', we can add a small number to the
3440         # bond strength of each ':' (colon.t)
3441         @bias_tokens = qw( : && || f and or . );       # tokens which get bias
3442         %bias_hash   = map { $_ => 0 } @bias_tokens;
3443         $delta_bias  = 0.0001;    # a very small strength level
3444         return;
3445
3446     } ## end sub initialize_bond_strength_hashes
3447
3448     use constant DEBUG_BOND => 0;
3449
3450     sub set_bond_strengths {
3451
3452         my ($self) = @_;
3453
3454         my $rK_weld_right = $self->[_rK_weld_right_];
3455         my $rK_weld_left  = $self->[_rK_weld_left_];
3456
3457         # patch-its always ok to break at end of line
3458         $nobreak_to_go[$max_index_to_go] = 0;
3459
3460         # we start a new set of bias values for each line
3461         %bias = %bias_hash;
3462
3463         my $code_bias = -.01;    # bias for closing block braces
3464
3465         my $type         = 'b';
3466         my $token        = ' ';
3467         my $token_length = 1;
3468         my $last_type;
3469         my $last_nonblank_type  = $type;
3470         my $last_nonblank_token = $token;
3471         my $list_str            = $left_bond_strength{'?'};
3472
3473         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
3474             $next_nonblank_type, $next_token, $next_type,
3475             $total_nesting_depth, );
3476
3477         # main loop to compute bond strengths between each pair of tokens
3478         foreach my $i ( 0 .. $max_index_to_go ) {
3479             $last_type = $type;
3480             if ( $type ne 'b' ) {
3481                 $last_nonblank_type  = $type;
3482                 $last_nonblank_token = $token;
3483             }
3484             $type = $types_to_go[$i];
3485
3486             # strength on both sides of a blank is the same
3487             if ( $type eq 'b' && $last_type ne 'b' ) {
3488                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
3489                 next;
3490             }
3491
3492             $token               = $tokens_to_go[$i];
3493             $token_length        = $token_lengths_to_go[$i];
3494             $block_type          = $block_type_to_go[$i];
3495             $i_next              = $i + 1;
3496             $next_type           = $types_to_go[$i_next];
3497             $next_token          = $tokens_to_go[$i_next];
3498             $total_nesting_depth = $nesting_depth_to_go[$i_next];
3499             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
3500             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
3501             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
3502
3503             my $seqno               = $type_sequence_to_go[$i];
3504             my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
3505
3506             # We are computing the strength of the bond between the current
3507             # token and the NEXT token.
3508
3509             #---------------------------------------------------------------
3510             # Bond Strength Section 1:
3511             # First Approximation.
3512             # Use minimum of individual left and right tabulated bond
3513             # strengths.
3514             #---------------------------------------------------------------
3515             my $bsr = $right_bond_strength{$type};
3516             my $bsl = $left_bond_strength{$next_nonblank_type};
3517
3518             # define right bond strengths of certain keywords
3519             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
3520                 $bsr = $right_bond_strength{$token};
3521             }
3522             elsif ( $token eq 'ne' or $token eq 'eq' ) {
3523                 $bsr = NOMINAL;
3524             }
3525
3526             # set terminal bond strength to the nominal value
3527             # this will cause good preceding breaks to be retained
3528             if ( $i_next_nonblank > $max_index_to_go ) {
3529                 $bsl = NOMINAL;
3530             }
3531
3532             # define right bond strengths of certain keywords
3533             if ( $next_nonblank_type eq 'k'
3534                 && defined( $left_bond_strength{$next_nonblank_token} ) )
3535             {
3536                 $bsl = $left_bond_strength{$next_nonblank_token};
3537             }
3538             elsif ($next_nonblank_token eq 'ne'
3539                 or $next_nonblank_token eq 'eq' )
3540             {
3541                 $bsl = NOMINAL;
3542             }
3543             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
3544                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
3545             }
3546
3547             # Use the minimum of the left and right strengths.  Note: it might
3548             # seem that we would want to keep a NO_BREAK if either token has
3549             # this value.  This didn't work, for example because in an arrow
3550             # list, it prevents the comma from separating from the following
3551             # bare word (which is probably quoted by its arrow).  So necessary
3552             # NO_BREAK's have to be handled as special cases in the final
3553             # section.
3554             if ( !defined($bsr) ) { $bsr = VERY_STRONG }
3555             if ( !defined($bsl) ) { $bsl = VERY_STRONG }
3556             my $bond_str   = ( $bsr < $bsl ) ? $bsr : $bsl;
3557             my $bond_str_1 = $bond_str;
3558
3559             #---------------------------------------------------------------
3560             # Bond Strength Section 2:
3561             # Apply hardwired rules..
3562             #---------------------------------------------------------------
3563
3564             # Patch to put terminal or clauses on a new line: Weaken the bond
3565             # at an || followed by die or similar keyword to make the terminal
3566             # or clause fall on a new line, like this:
3567             #
3568             #   my $class = shift
3569             #     || die "Cannot add broadcast:  No class identifier found";
3570             #
3571             # Otherwise the break will be at the previous '=' since the || and
3572             # = have the same starting strength and the or is biased, like
3573             # this:
3574             #
3575             # my $class =
3576             #   shift || die "Cannot add broadcast:  No class identifier found";
3577             #
3578             # In any case if the user places a break at either the = or the ||
3579             # it should remain there.
3580             if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
3581                 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
3582                     if ( $want_break_before{$token} && $i > 0 ) {
3583                         $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
3584
3585                         # keep bond strength of a token and its following blank
3586                         # the same
3587                         if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
3588                             $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
3589                         }
3590                     }
3591                     else {
3592                         $bond_str -= $delta_bias;
3593                     }
3594                 }
3595             }
3596
3597             # good to break after end of code blocks
3598             if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
3599
3600                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
3601                 $code_bias += $delta_bias;
3602             }
3603
3604             if ( $type eq 'k' ) {
3605
3606                 # allow certain control keywords to stand out
3607                 if (   $next_nonblank_type eq 'k'
3608                     && $is_last_next_redo_return{$token} )
3609                 {
3610                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
3611                 }
3612
3613                 # Don't break after keyword my.  This is a quick fix for a
3614                 # rare problem with perl. An example is this line from file
3615                 # Container.pm:
3616
3617                 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
3618                 # $this->{'question'} ) )
3619
3620                 if ( $token eq 'my' ) {
3621                     $bond_str = NO_BREAK;
3622                 }
3623
3624             }
3625
3626             # good to break before 'if', 'unless', etc
3627             if ( $is_if_brace_follower{$next_nonblank_token} ) {
3628                 $bond_str = VERY_WEAK;
3629             }
3630
3631             if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
3632
3633                 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
3634                     $bond_str = $list_str if ( $bond_str > $list_str );
3635                 }
3636
3637                 # keywords like 'unless', 'if', etc, within statements
3638                 # make good breaks
3639                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
3640                     $bond_str = VERY_WEAK / 1.05;
3641                 }
3642             }
3643
3644             # try not to break before a comma-arrow
3645             elsif ( $next_nonblank_type eq '=>' ) {
3646                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
3647             }
3648
3649             #---------------------------------------------------------------
3650             # Additional hardwired NOBREAK rules
3651             #---------------------------------------------------------------
3652
3653             # map1.t -- correct for a quirk in perl
3654             if (   $token eq '('
3655                 && $next_nonblank_type eq 'i'
3656                 && $last_nonblank_type eq 'k'
3657                 && $is_sort_map_grep{$last_nonblank_token} )
3658
3659               #     /^(sort|map|grep)$/ )
3660             {
3661                 $bond_str = NO_BREAK;
3662             }
3663
3664             # extrude.t: do not break before paren at:
3665             #    -l pid_filename(
3666             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
3667                 $bond_str = NO_BREAK;
3668             }
3669
3670             # in older version of perl, use strict can cause problems with
3671             # breaks before bare words following opening parens.  For example,
3672             # this will fail under older versions if a break is made between
3673             # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
3674             # command"); close MAIL;
3675             if ( $type eq '{' ) {
3676
3677                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
3678
3679                     # but it's fine to break if the word is followed by a '=>'
3680                     # or if it is obviously a sub call
3681                     my $i_next_next_nonblank = $i_next_nonblank + 1;
3682                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
3683                     if (   $next_next_type eq 'b'
3684                         && $i_next_nonblank < $max_index_to_go )
3685                     {
3686                         $i_next_next_nonblank++;
3687                         $next_next_type = $types_to_go[$i_next_next_nonblank];
3688                     }
3689
3690                     # We'll check for an old breakpoint and keep a leading
3691                     # bareword if it was that way in the input file.
3692                     # Presumably it was ok that way.  For example, the
3693                     # following would remain unchanged:
3694                     #
3695                     # @months = (
3696                     #   January,   February, March,    April,
3697                     #   May,       June,     July,     August,
3698                     #   September, October,  November, December,
3699                     # );
3700                     #
3701                     # This should be sufficient:
3702                     if (
3703                         !$old_breakpoint_to_go[$i]
3704                         && (   $next_next_type eq ','
3705                             || $next_next_type eq '}' )
3706                       )
3707                     {
3708                         $bond_str = NO_BREAK;
3709                     }
3710                 }
3711             }
3712
3713             # Do not break between a possible filehandle and a ? or / and do
3714             # not introduce a break after it if there is no blank
3715             # (extrude.t)
3716             elsif ( $type eq 'Z' ) {
3717
3718                 # don't break..
3719                 if (
3720
3721                     # if there is no blank and we do not want one. Examples:
3722                     #    print $x++    # do not break after $x
3723                     #    print HTML"HELLO"   # break ok after HTML
3724                     (
3725                            $next_type ne 'b'
3726                         && defined( $want_left_space{$next_type} )
3727                         && $want_left_space{$next_type} == WS_NO
3728                     )
3729
3730                     # or we might be followed by the start of a quote,
3731                     # and this is not an existing breakpoint; fixes c039.
3732                     || !$old_breakpoint_to_go[$i]
3733                     && substr( $next_nonblank_token, 0, 1 ) eq '/'
3734
3735                   )
3736                 {
3737                     $bond_str = NO_BREAK;
3738                 }
3739             }
3740
3741             # Breaking before a ? before a quote can cause trouble if
3742             # they are not separated by a blank.
3743             # Example: a syntax error occurs if you break before the ? here
3744             #  my$logic=join$all?' && ':' || ',@regexps;
3745             # From: Professional_Perl_Programming_Code/multifind.pl
3746             if ( $next_nonblank_type eq '?' ) {
3747                 $bond_str = NO_BREAK
3748                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
3749             }
3750
3751             # Breaking before a . followed by a number
3752             # can cause trouble if there is no intervening space
3753             # Example: a syntax error occurs if you break before the .2 here
3754             #  $str .= pack($endian.2, ensurrogate($ord));
3755             # From: perl58/Unicode.pm
3756             elsif ( $next_nonblank_type eq '.' ) {
3757                 $bond_str = NO_BREAK
3758                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
3759             }
3760
3761             # Fix for c039
3762             elsif ( $type eq 'w' ) {
3763                 $bond_str = NO_BREAK
3764                   if ( !$old_breakpoint_to_go[$i]
3765                     && substr( $next_nonblank_token, 0, 1 ) eq '/' );
3766             }
3767
3768             my $bond_str_2 = $bond_str;
3769
3770             #---------------------------------------------------------------
3771             # End of hardwired rules
3772             #---------------------------------------------------------------
3773
3774             #---------------------------------------------------------------
3775             # Bond Strength Section 3:
3776             # Apply table rules. These have priority over the above
3777             # hardwired rules.
3778             #---------------------------------------------------------------
3779
3780             my $tabulated_bond_str;
3781             my $ltype = $type;
3782             my $rtype = $next_nonblank_type;
3783             if ( $seqno && $is_container_token{$token} ) {
3784                 $ltype = $type . $token;
3785             }
3786
3787             if (   $next_nonblank_seqno
3788                 && $is_container_token{$next_nonblank_token} )
3789             {
3790                 $rtype = $next_nonblank_type . $next_nonblank_token;
3791             }
3792
3793             # apply binary rules which apply regardless of space between tokens
3794             if ( $binary_bond_strength{$ltype}{$rtype} ) {
3795                 $bond_str           = $binary_bond_strength{$ltype}{$rtype};
3796                 $tabulated_bond_str = $bond_str;
3797             }
3798
3799             # apply binary rules which apply only if no space between tokens
3800             if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
3801                 $bond_str           = $binary_bond_strength{$ltype}{$next_type};
3802                 $tabulated_bond_str = $bond_str;
3803             }
3804
3805             if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
3806                 $bond_str           = NO_BREAK;
3807                 $tabulated_bond_str = $bond_str;
3808             }
3809             my $bond_str_3 = $bond_str;
3810
3811             # If the hardwired rules conflict with the tabulated bond
3812             # strength then there is an inconsistency that should be fixed
3813             DEBUG_BOND
3814               && $tabulated_bond_str
3815               && $bond_str_1
3816               && $bond_str_1 != $bond_str_2
3817               && $bond_str_2 != $tabulated_bond_str
3818               && do {
3819                 print STDERR
3820 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
3821               };
3822
3823            #-----------------------------------------------------------------
3824            # Bond Strength Section 4:
3825            # Modify strengths of certain tokens which often occur in sequence
3826            # by adding a small bias to each one in turn so that the breaks
3827            # occur from left to right.
3828            #
3829            # Note that we only changing strengths by small amounts here,
3830            # and usually increasing, so we should not be altering any NO_BREAKs.
3831            # Other routines which check for NO_BREAKs will use a tolerance
3832            # of one to avoid any problem.
3833            #-----------------------------------------------------------------
3834
3835             # The bias tables use special keys:
3836             #   $type - if not keyword
3837             #   $token - if keyword, but map some keywords together
3838             my $left_key =
3839               $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
3840             my $right_key =
3841                 $next_nonblank_type eq 'k'
3842               ? $next_nonblank_token eq 'err'
3843                   ? 'or'
3844                   : $next_nonblank_token
3845               : $next_nonblank_type;
3846
3847             if ( $type eq ',' ) {
3848
3849                 # add any bias set by sub scan_list at old comma break points
3850                 $bond_str += $bond_strength_to_go[$i];
3851
3852             }
3853
3854             # bias left token
3855             elsif ( defined( $bias{$left_key} ) ) {
3856                 if ( !$want_break_before{$left_key} ) {
3857                     $bias{$left_key} += $delta_bias;
3858                     $bond_str += $bias{$left_key};
3859                 }
3860             }
3861
3862             # bias right token
3863             if ( defined( $bias{$right_key} ) ) {
3864                 if ( $want_break_before{$right_key} ) {
3865
3866                     # for leading '.' align all but 'short' quotes; the idea
3867                     # is to not place something like "\n" on a single line.
3868                     if ( $right_key eq '.' ) {
3869                         unless (
3870                             $last_nonblank_type eq '.'
3871                             && ( $token_length <=
3872                                 $rOpts_short_concatenation_item_length )
3873                             && ( !$is_closing_token{$token} )
3874                           )
3875                         {
3876                             $bias{$right_key} += $delta_bias;
3877                         }
3878                     }
3879                     else {
3880                         $bias{$right_key} += $delta_bias;
3881                     }
3882                     $bond_str += $bias{$right_key};
3883                 }
3884             }
3885             my $bond_str_4 = $bond_str;
3886
3887             #---------------------------------------------------------------
3888             # Bond Strength Section 5:
3889             # Fifth Approximation.
3890             # Take nesting depth into account by adding the nesting depth
3891             # to the bond strength.
3892             #---------------------------------------------------------------
3893             my $strength;
3894
3895             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
3896                 if ( $total_nesting_depth > 0 ) {
3897                     $strength = $bond_str + $total_nesting_depth;
3898                 }
3899                 else {
3900                     $strength = $bond_str;
3901                 }
3902             }
3903             else {
3904                 $strength = NO_BREAK;
3905
3906                 # For critical code such as lines with here targets we must
3907                 # be absolutely sure that we do not allow a break.  So for
3908                 # these the nobreak flag exceeds 1 as a signal. Otherwise we
3909                 # can run into trouble when small tolerances are added.
3910                 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
3911             }
3912
3913             #---------------------------------------------------------------
3914             # Bond Strength Section 6:
3915             # Sixth Approximation. Welds.
3916             #---------------------------------------------------------------
3917
3918             # Do not allow a break within welds
3919             if ( $total_weld_count && $seqno ) {
3920                 my $KK = $K_to_go[$i];
3921                 if ( $rK_weld_right->{$KK} ) {
3922                     $strength = NO_BREAK;
3923                 }
3924
3925                 # But encourage breaking after opening welded tokens
3926                 elsif ($rK_weld_left->{$KK}
3927                     && $is_opening_token{$token} )
3928                 {
3929                     $strength -= 1;
3930                 }
3931             }
3932
3933             # always break after side comment
3934             if ( $type eq '#' ) { $strength = 0 }
3935
3936             $bond_strength_to_go[$i] = $strength;
3937
3938             # Fix for case c001: be sure NO_BREAK's are enforced by later
3939             # routines, except at a '?' because '?' as quote delimiter is
3940             # deprecated.
3941             if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
3942                 $nobreak_to_go[$i] ||= 1;
3943             }
3944
3945             DEBUG_BOND && do {
3946                 my $str = substr( $token, 0, 15 );
3947                 $str .= ' ' x ( 16 - length($str) );
3948                 print STDOUT
3949 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
3950             };
3951         } ## end main loop
3952         return;
3953     } ## end sub set_bond_strengths
3954 } ## end closure set_bond_strengths
3955
3956 sub bad_pattern {
3957
3958     # See if a pattern will compile. We have to use a string eval here,
3959     # but it should be safe because the pattern has been constructed
3960     # by this program.
3961     my ($pattern) = @_;
3962     eval "'##'=~/$pattern/";
3963     return $@;
3964 }
3965
3966 {    ## begin closure prepare_cuddled_block_types
3967
3968     my %no_cuddle;
3969
3970     # Add keywords here which really should not be cuddled
3971     BEGIN {
3972         my @q = qw(if unless for foreach while);
3973         @no_cuddle{@q} = (1) x scalar(@q);
3974     }
3975
3976     sub prepare_cuddled_block_types {
3977
3978         # the cuddled-else style, if used, is controlled by a hash that
3979         # we construct here
3980
3981         # Include keywords here which should not be cuddled
3982
3983         my $cuddled_string = "";
3984         if ( $rOpts->{'cuddled-else'} ) {
3985
3986             # set the default
3987             $cuddled_string = 'elsif else continue catch finally'
3988               unless ( $rOpts->{'cuddled-block-list-exclusive'} );
3989
3990             # This is the old equivalent but more complex version
3991             # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
3992
3993             # Add users other blocks to be cuddled
3994             my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
3995             if ($cuddled_block_list) {
3996                 $cuddled_string .= " " . $cuddled_block_list;
3997             }
3998
3999         }
4000
4001         # If we have a cuddled string of the form
4002         #  'try-catch-finally'
4003
4004         # we want to prepare a hash of the form
4005
4006         # $rcuddled_block_types = {
4007         #    'try' => {
4008         #        'catch'   => 1,
4009         #        'finally' => 1
4010         #    },
4011         # };
4012
4013         # use -dcbl to dump this hash
4014
4015         # Multiple such strings are input as a space or comma separated list
4016
4017         # If we get two lists with the same leading type, such as
4018         #   -cbl = "-try-catch-finally  -try-catch-otherwise"
4019         # then they will get merged as follows:
4020         # $rcuddled_block_types = {
4021         #    'try' => {
4022         #        'catch'     => 1,
4023         #        'finally'   => 2,
4024         #        'otherwise' => 1,
4025         #    },
4026         # };
4027         # This will allow either type of chain to be followed.
4028
4029         $cuddled_string =~ s/,/ /g;    # allow space or comma separated lists
4030         my @cuddled_strings = split /\s+/, $cuddled_string;
4031
4032         $rcuddled_block_types = {};
4033
4034         # process each dash-separated string...
4035         my $string_count = 0;
4036         foreach my $string (@cuddled_strings) {
4037             next unless $string;
4038             my @words = split /-+/, $string;    # allow multiple dashes
4039
4040             # we could look for and report possible errors here...
4041             next unless ( @words > 0 );
4042
4043            # allow either '-continue' or *-continue' for arbitrary starting type
4044             my $start = '*';
4045
4046             # a single word without dashes is a secondary block type
4047             if ( @words > 1 ) {
4048                 $start = shift @words;
4049             }
4050
4051             # always make an entry for the leading word. If none follow, this
4052             # will still prevent a wildcard from matching this word.
4053             if ( !defined( $rcuddled_block_types->{$start} ) ) {
4054                 $rcuddled_block_types->{$start} = {};
4055             }
4056
4057             # The count gives the original word order in case we ever want it.
4058             $string_count++;
4059             my $word_count = 0;
4060             foreach my $word (@words) {
4061                 next unless $word;
4062                 if ( $no_cuddle{$word} ) {
4063                     Warn(
4064 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
4065                     );
4066                     next;
4067                 }
4068                 $word_count++;
4069                 $rcuddled_block_types->{$start}->{$word} =
4070                   1;    #"$string_count.$word_count";
4071
4072                 # git#9: Remove this word from the list of desired one-line
4073                 # blocks
4074                 $want_one_line_block{$word} = 0;
4075             }
4076         }
4077         return;
4078     }
4079 }    ## begin closure prepare_cuddled_block_types
4080
4081 sub dump_cuddled_block_list {
4082     my ($fh) = @_;
4083
4084     # ORIGINAL METHOD: Here is the format of the cuddled block type hash
4085     # which controls this routine
4086     #    my $rcuddled_block_types = {
4087     #        'if' => {
4088     #            'else'  => 1,
4089     #            'elsif' => 1
4090     #        },
4091     #        'try' => {
4092     #            'catch'   => 1,
4093     #            'finally' => 1
4094     #        },
4095     #    };
4096
4097     # SIMPLFIED METHOD: the simplified method uses a wildcard for
4098     # the starting block type and puts all cuddled blocks together:
4099     #    my $rcuddled_block_types = {
4100     #        '*' => {
4101     #            'else'  => 1,
4102     #            'elsif' => 1
4103     #            'catch'   => 1,
4104     #            'finally' => 1
4105     #        },
4106     #    };
4107
4108     # Both methods work, but the simplified method has proven to be adequate and
4109     # easier to manage.
4110
4111     my $cuddled_string = $rOpts->{'cuddled-block-list'};
4112     $cuddled_string = '' unless $cuddled_string;
4113
4114     my $flags = "";
4115     $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
4116     $flags .= " -cbl='$cuddled_string'";
4117
4118     unless ( $rOpts->{'cuddled-else'} ) {
4119         $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
4120     }
4121
4122     $fh->print(<<EOM);
4123 ------------------------------------------------------------------------
4124 Hash of cuddled block types prepared for a run with these parameters:
4125   $flags
4126 ------------------------------------------------------------------------
4127 EOM
4128
4129     use Data::Dumper;
4130     $fh->print( Dumper($rcuddled_block_types) );
4131
4132     $fh->print(<<EOM);
4133 ------------------------------------------------------------------------
4134 EOM
4135     return;
4136 }
4137
4138 sub make_static_block_comment_pattern {
4139
4140     # create the pattern used to identify static block comments
4141     $static_block_comment_pattern = '^\s*##';
4142
4143     # allow the user to change it
4144     if ( $rOpts->{'static-block-comment-prefix'} ) {
4145         my $prefix = $rOpts->{'static-block-comment-prefix'};
4146         $prefix =~ s/^\s*//;
4147         my $pattern = $prefix;
4148
4149         # user may give leading caret to force matching left comments only
4150         if ( $prefix !~ /^\^#/ ) {
4151             if ( $prefix !~ /^#/ ) {
4152                 Die(
4153 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
4154                 );
4155             }
4156             $pattern = '^\s*' . $prefix;
4157         }
4158         if ( bad_pattern($pattern) ) {
4159             Die(
4160 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
4161             );
4162         }
4163         $static_block_comment_pattern = $pattern;
4164     }
4165     return;
4166 }
4167
4168 sub make_format_skipping_pattern {
4169     my ( $opt_name, $default ) = @_;
4170     my $param = $rOpts->{$opt_name};
4171     unless ($param) { $param = $default }
4172     $param =~ s/^\s*//;
4173     if ( $param !~ /^#/ ) {
4174         Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
4175     }
4176     my $pattern = '^' . $param . '\s';
4177     if ( bad_pattern($pattern) ) {
4178         Die(
4179 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
4180         );
4181     }
4182     return $pattern;
4183 }
4184
4185 sub make_non_indenting_brace_pattern {
4186
4187     # Create the pattern used to identify static side comments.
4188     # Note that we are ending the pattern in a \s. This will allow
4189     # the pattern to be followed by a space and some text, or a newline.
4190     # The pattern is used in sub 'non_indenting_braces'
4191     $non_indenting_brace_pattern = '^#<<<\s';
4192
4193     # allow the user to change it
4194     if ( $rOpts->{'non-indenting-brace-prefix'} ) {
4195         my $prefix = $rOpts->{'non-indenting-brace-prefix'};
4196         $prefix =~ s/^\s*//;
4197         if ( $prefix !~ /^#/ ) {
4198             Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
4199         }
4200         my $pattern = '^' . $prefix . '\s';
4201         if ( bad_pattern($pattern) ) {
4202             Die(
4203 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
4204             );
4205         }
4206         $non_indenting_brace_pattern = $pattern;
4207     }
4208     return;
4209 }
4210
4211 sub make_closing_side_comment_list_pattern {
4212
4213     # turn any input list into a regex for recognizing selected block types
4214     $closing_side_comment_list_pattern = '^\w+';
4215     if ( defined( $rOpts->{'closing-side-comment-list'} )
4216         && $rOpts->{'closing-side-comment-list'} )
4217     {
4218         $closing_side_comment_list_pattern =
4219           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
4220     }
4221     return;
4222 }
4223
4224 sub make_sub_matching_pattern {
4225
4226     # Patterns for standardizing matches to block types for regular subs and
4227     # anonymous subs. Examples
4228     #  'sub process' is a named sub
4229     #  'sub ::m' is a named sub
4230     #  'sub' is an anonymous sub
4231     #  'sub:' is a label, not a sub
4232     #  'substr' is a keyword
4233     $SUB_PATTERN    = '^sub\s+(::|\w)';    # match normal sub
4234     $ASUB_PATTERN   = '^sub$';             # match anonymous sub
4235     $ANYSUB_PATTERN = '^sub\b';            # match either type of sub
4236
4237     # Note (see also RT #133130): These patterns are used by
4238     # sub make_block_pattern, which is used for making most patterns.
4239     # So this sub needs to be called before other pattern-making routines.
4240
4241     if ( $rOpts->{'sub-alias-list'} ) {
4242
4243         # Note that any 'sub-alias-list' has been preprocessed to
4244         # be a trimmed, space-separated list which includes 'sub'
4245         # for example, it might be 'sub method fun'
4246         my $sub_alias_list = $rOpts->{'sub-alias-list'};
4247         $sub_alias_list =~ s/\s+/\|/g;
4248         $SUB_PATTERN    =~ s/sub/\($sub_alias_list\)/;
4249         $ASUB_PATTERN   =~ s/sub/\($sub_alias_list\)/;
4250         $ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4251     }
4252     return;
4253 }
4254
4255 sub make_bli_pattern {
4256
4257     # default list of block types for which -bli would apply
4258     my $bli_list_string = 'if else elsif unless while for foreach do : sub';
4259
4260     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
4261         && $rOpts->{'brace-left-and-indent-list'} )
4262     {
4263         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
4264     }
4265
4266     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
4267     return;
4268 }
4269
4270 sub make_keyword_group_list_pattern {
4271
4272     # turn any input list into a regex for recognizing selected block types.
4273     # Here are the defaults:
4274     $keyword_group_list_pattern         = '^(our|local|my|use|require|)$';
4275     $keyword_group_list_comment_pattern = '';
4276     if ( defined( $rOpts->{'keyword-group-blanks-list'} )
4277         && $rOpts->{'keyword-group-blanks-list'} )
4278     {
4279         my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
4280         my @keyword_list;
4281         my @comment_list;
4282         foreach my $word (@words) {
4283             if ( $word =~ /^(BC|SBC)$/ ) {
4284                 push @comment_list, $word;
4285                 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
4286             }
4287             else {
4288                 push @keyword_list, $word;
4289             }
4290         }
4291         $keyword_group_list_pattern =
4292           make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
4293         $keyword_group_list_comment_pattern =
4294           make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
4295     }
4296     return;
4297 }
4298
4299 sub make_block_brace_vertical_tightness_pattern {
4300
4301     # turn any input list into a regex for recognizing selected block types
4302     $block_brace_vertical_tightness_pattern =
4303       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4304     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
4305         && $rOpts->{'block-brace-vertical-tightness-list'} )
4306     {
4307         $block_brace_vertical_tightness_pattern =
4308           make_block_pattern( '-bbvtl',
4309             $rOpts->{'block-brace-vertical-tightness-list'} );
4310     }
4311     return;
4312 }
4313
4314 sub make_blank_line_pattern {
4315
4316     $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
4317     my $key = 'blank-lines-before-closing-block-list';
4318     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4319         $blank_lines_before_closing_block_pattern =
4320           make_block_pattern( '-blbcl', $rOpts->{$key} );
4321     }
4322
4323     $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
4324     $key = 'blank-lines-after-opening-block-list';
4325     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4326         $blank_lines_after_opening_block_pattern =
4327           make_block_pattern( '-blaol', $rOpts->{$key} );
4328     }
4329     return;
4330 }
4331
4332 sub make_block_pattern {
4333
4334     #  given a string of block-type keywords, return a regex to match them
4335     #  The only tricky part is that labels are indicated with a single ':'
4336     #  and the 'sub' token text may have additional text after it (name of
4337     #  sub).
4338     #
4339     #  Example:
4340     #
4341     #   input string: "if else elsif unless while for foreach do : sub";
4342     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4343
4344     #  Minor Update:
4345     #
4346     #  To distinguish between anonymous subs and named subs, use 'sub' to
4347     #   indicate a named sub, and 'asub' to indicate an anonymous sub
4348
4349     my ( $abbrev, $string ) = @_;
4350     my @list  = split_words($string);
4351     my @words = ();
4352     my %seen;
4353     for my $i (@list) {
4354         if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
4355         next if $seen{$i};
4356         $seen{$i} = 1;
4357         if ( $i eq 'sub' ) {
4358         }
4359         elsif ( $i eq 'asub' ) {
4360         }
4361         elsif ( $i eq ';' ) {
4362             push @words, ';';
4363         }
4364         elsif ( $i eq '{' ) {
4365             push @words, '\{';
4366         }
4367         elsif ( $i eq ':' ) {
4368             push @words, '\w+:';
4369         }
4370         elsif ( $i =~ /^\w/ ) {
4371             push @words, $i;
4372         }
4373         else {
4374             Warn("unrecognized block type $i after $abbrev, ignoring\n");
4375         }
4376     }
4377     my $pattern      = '(' . join( '|', @words ) . ')$';
4378     my $sub_patterns = "";
4379     if ( $seen{'sub'} ) {
4380         $sub_patterns .= '|' . $SUB_PATTERN;
4381     }
4382     if ( $seen{'asub'} ) {
4383         $sub_patterns .= '|' . $ASUB_PATTERN;
4384     }
4385     if ($sub_patterns) {
4386         $pattern = '(' . $pattern . $sub_patterns . ')';
4387     }
4388     $pattern = '^' . $pattern;
4389     return $pattern;
4390 }
4391
4392 sub make_static_side_comment_pattern {
4393
4394     # create the pattern used to identify static side comments
4395     $static_side_comment_pattern = '^##';
4396
4397     # allow the user to change it
4398     if ( $rOpts->{'static-side-comment-prefix'} ) {
4399         my $prefix = $rOpts->{'static-side-comment-prefix'};
4400         $prefix =~ s/^\s*//;
4401         my $pattern = '^' . $prefix;
4402         if ( bad_pattern($pattern) ) {
4403             Die(
4404 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
4405             );
4406         }
4407         $static_side_comment_pattern = $pattern;
4408     }
4409     return;
4410 }
4411
4412 sub make_closing_side_comment_prefix {
4413
4414     # Be sure we have a valid closing side comment prefix
4415     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
4416     my $csc_prefix_pattern;
4417     if ( !defined($csc_prefix) ) {
4418         $csc_prefix         = '## end';
4419         $csc_prefix_pattern = '^##\s+end';
4420     }
4421     else {
4422         my $test_csc_prefix = $csc_prefix;
4423         if ( $test_csc_prefix !~ /^#/ ) {
4424             $test_csc_prefix = '#' . $test_csc_prefix;
4425         }
4426
4427         # make a regex to recognize the prefix
4428         my $test_csc_prefix_pattern = $test_csc_prefix;
4429
4430         # escape any special characters
4431         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
4432
4433         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
4434
4435         # allow exact number of intermediate spaces to vary
4436         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
4437
4438         # make sure we have a good pattern
4439         # if we fail this we probably have an error in escaping
4440         # characters.
4441
4442         if ( bad_pattern($test_csc_prefix_pattern) ) {
4443
4444             # shouldn't happen..must have screwed up escaping, above
4445             report_definite_bug();
4446             Warn(
4447 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
4448             );
4449
4450             # just warn and keep going with defaults
4451             Warn("Please consider using a simpler -cscp prefix\n");
4452             Warn("Using default -cscp instead; please check output\n");
4453         }
4454         else {
4455             $csc_prefix         = $test_csc_prefix;
4456             $csc_prefix_pattern = $test_csc_prefix_pattern;
4457         }
4458     }
4459     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
4460     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
4461     return;
4462 }
4463
4464 ##################################################
4465 # CODE SECTION 4: receive lines from the tokenizer
4466 ##################################################
4467
4468 {    ## begin closure write_line
4469
4470     my $Last_line_had_side_comment;
4471     my $In_format_skipping_section;
4472     my $Saw_VERSION_in_this_file;
4473
4474     sub initialize_write_line {
4475
4476         $Last_line_had_side_comment = 0;
4477         $In_format_skipping_section = 0;
4478         $Saw_VERSION_in_this_file   = 0;
4479
4480         return;
4481     }
4482
4483     sub write_line {
4484
4485       # This routine originally received lines of code and immediately processed
4486       # them.  That was efficient when memory was limited, but now it just saves
4487       # the lines it receives.  They get processed all together after the last
4488       # line is received.
4489
4490        # As tokenized lines are received they are converted to the format needed
4491        # for the final formatting.
4492         my ( $self, $line_of_tokens_old ) = @_;
4493         my $rLL           = $self->[_rLL_];
4494         my $Klimit        = $self->[_Klimit_];
4495         my $rlines_new    = $self->[_rlines_];
4496         my $maximum_level = $self->[_maximum_level_];
4497
4498         my $Kfirst;
4499         my $line_of_tokens = {};
4500         foreach my $key (
4501             qw(
4502             _curly_brace_depth
4503             _ending_in_quote
4504             _guessed_indentation_level
4505             _line_number
4506             _line_text
4507             _line_type
4508             _paren_depth
4509             _quote_character
4510             _square_bracket_depth
4511             _starting_in_quote
4512             )
4513           )
4514         {
4515             $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
4516         }
4517
4518         # Data needed by Logger
4519         $line_of_tokens->{_level_0}          = 0;
4520         $line_of_tokens->{_ci_level_0}       = 0;
4521         $line_of_tokens->{_nesting_blocks_0} = "";
4522         $line_of_tokens->{_nesting_tokens_0} = "";
4523
4524         # Needed to avoid trimming quotes
4525         $line_of_tokens->{_ended_in_blank_token} = undef;
4526
4527         my $line_type     = $line_of_tokens_old->{_line_type};
4528         my $input_line_no = $line_of_tokens_old->{_line_number};
4529         my $CODE_type     = "";
4530         my $tee_output;
4531
4532         # Handle line of non-code
4533         if ( $line_type ne 'CODE' ) {
4534             $tee_output ||= $rOpts_tee_pod
4535               && substr( $line_type, 0, 3 ) eq 'POD';
4536         }
4537
4538         # Handle line of code
4539         else {
4540
4541             my $rtokens         = $line_of_tokens_old->{_rtokens};
4542             my $rtoken_type     = $line_of_tokens_old->{_rtoken_type};
4543             my $rblock_type     = $line_of_tokens_old->{_rblock_type};
4544             my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
4545             my $rcontainer_environment =
4546               $line_of_tokens_old->{_rcontainer_environment};
4547             my $rtype_sequence  = $line_of_tokens_old->{_rtype_sequence};
4548             my $rlevels         = $line_of_tokens_old->{_rlevels};
4549             my $rslevels        = $line_of_tokens_old->{_rslevels};
4550             my $rci_levels      = $line_of_tokens_old->{_rci_levels};
4551             my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
4552             my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
4553
4554             my $jmax = @{$rtokens} - 1;
4555             if ( $jmax >= 0 ) {
4556                 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
4557                 foreach my $j ( 0 .. $jmax ) {
4558
4559                  # Clip negative nesting depths to zero to avoid problems.
4560                  # Negative values can occur in files with unbalanced containers
4561                     my $slevel = $rslevels->[$j];
4562                     if ( $slevel < 0 ) { $slevel = 0 }
4563
4564                     if ( $rlevels->[$j] > $maximum_level ) {
4565                         $maximum_level = $rlevels->[$j];
4566                     }
4567
4568                     # But do not clip the 'level' variable yet. We will do this
4569                     # later, in sub 'store_token_to_go'. The reason is that in
4570                     # files with level errors, the logic in 'weld_cuddled_else'
4571                     # uses a stack logic that will give bad welds if we clip
4572                     # levels here.
4573                     ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
4574
4575                     my @tokary;
4576                     @tokary[
4577                       _TOKEN_,         _TYPE_,  _BLOCK_TYPE_,
4578                       _TYPE_SEQUENCE_, _LEVEL_, _SLEVEL_,
4579                       _CI_LEVEL_,      _LINE_INDEX_,
4580                       ]
4581                       = (
4582                         $rtokens->[$j],     $rtoken_type->[$j],
4583                         $rblock_type->[$j], $rtype_sequence->[$j],
4584                         $rlevels->[$j],     $slevel,
4585                         $rci_levels->[$j],  $input_line_no - 1,
4586                       );
4587                     push @{$rLL}, \@tokary;
4588                 } ## end foreach my $j ( 0 .. $jmax )
4589
4590                 $Klimit = @{$rLL} - 1;
4591
4592                 # Need to remember if we can trim the input line
4593                 $line_of_tokens->{_ended_in_blank_token} =
4594                   $rtoken_type->[$jmax] eq 'b';
4595
4596                 $line_of_tokens->{_level_0}          = $rlevels->[0];
4597                 $line_of_tokens->{_ci_level_0}       = $rci_levels->[0];
4598                 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
4599                 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
4600             } ## end if ( $jmax >= 0 )
4601
4602             $CODE_type =
4603               $self->get_CODE_type( $line_of_tokens, $Kfirst, $Klimit,
4604                 $input_line_no );
4605
4606             $tee_output ||=
4607                  $rOpts_tee_block_comments
4608               && $jmax == 0
4609               && $rLL->[$Kfirst]->[_TYPE_] eq '#';
4610
4611             $tee_output ||=
4612                  $rOpts_tee_side_comments
4613               && defined($Kfirst)
4614               && $Klimit > $Kfirst
4615               && $rLL->[$Klimit]->[_TYPE_] eq '#';
4616
4617             # Handle any requested side comment deletions. It is easier to get
4618             # this done here rather than farther down the pipeline because IO
4619             # lines take a different route, and because lines with deleted HSC
4620             # become BL lines.  An since we are deleting now, we have to also
4621             # handle any tee- requests before the side comments vanish.
4622             my $delete_side_comment =
4623                  $rOpts_delete_side_comments
4624               && defined($Kfirst)
4625               && $rLL->[$Klimit]->[_TYPE_] eq '#'
4626               && ( $Klimit > $Kfirst || $CODE_type eq 'HSC' )
4627               && (!$CODE_type
4628                 || $CODE_type eq 'HSC'
4629                 || $CODE_type eq 'IO'
4630                 || $CODE_type eq 'NIN' );
4631
4632             if (
4633                    $rOpts_delete_closing_side_comments
4634                 && !$delete_side_comment
4635                 && defined($Kfirst)
4636                 && $Klimit > $Kfirst
4637                 && $rLL->[$Klimit]->[_TYPE_] eq '#'
4638                 && (  !$CODE_type
4639                     || $CODE_type eq 'HSC'
4640                     || $CODE_type eq 'IO'
4641                     || $CODE_type eq 'NIN' )
4642               )
4643             {
4644                 my $token  = $rLL->[$Klimit]->[_TOKEN_];
4645                 my $K_m    = $Klimit - 1;
4646                 my $type_m = $rLL->[$K_m]->[_TYPE_];
4647                 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
4648                 my $last_nonblank_block_type = $rLL->[$K_m]->[_BLOCK_TYPE_];
4649                 if (   $token =~ /$closing_side_comment_prefix_pattern/
4650                     && $last_nonblank_block_type =~
4651                     /$closing_side_comment_list_pattern/ )
4652                 {
4653                     $delete_side_comment = 1;
4654                 }
4655             } ## end if ( $rOpts_delete_closing_side_comments...)
4656
4657             if ($delete_side_comment) {
4658                 pop @{$rLL};
4659                 $Klimit -= 1;
4660                 if (   $Klimit > $Kfirst
4661                     && $rLL->[$Klimit]->[_TYPE_] eq 'b' )
4662                 {
4663                     pop @{$rLL};
4664                     $Klimit -= 1;
4665                 }
4666
4667                 # The -io option outputs the line text, so we have to update
4668                 # the line text so that the comment does not reappear.
4669                 if ( $CODE_type eq 'IO' ) {
4670                     my $line = "";
4671                     foreach my $KK ( $Kfirst .. $Klimit ) {
4672                         $line .= $rLL->[$KK]->[_TOKEN_];
4673                     }
4674                     $line_of_tokens->{_line_text} = $line . "\n";
4675                 }
4676
4677                 # If we delete a hanging side comment the line becomes blank.
4678                 if ( $CODE_type eq 'HSC' ) { $CODE_type = 'BL' }
4679             }
4680
4681         } ## end if ( $line_type eq 'CODE')
4682
4683         # Finish storing line variables
4684         if ($tee_output) {
4685             my $fh_tee    = $self->[_fh_tee_];
4686             my $line_text = $line_of_tokens_old->{_line_text};
4687             $fh_tee->print($line_text) if ($fh_tee);
4688         }
4689
4690         $line_of_tokens->{_rK_range}  = [ $Kfirst, $Klimit ];
4691         $line_of_tokens->{_code_type} = $CODE_type;
4692         $self->[_Klimit_]             = $Klimit;
4693         $self->[_maximum_level_]      = $maximum_level;
4694
4695         push @{$rlines_new}, $line_of_tokens;
4696         return;
4697     }
4698
4699     sub get_CODE_type {
4700         my ( $self, $line_of_tokens, $Kfirst, $Klast, $input_line_no ) = @_;
4701
4702         # We are looking at a line of code and setting a flag to
4703         # describe any special processing that it requires
4704
4705         # Possible CODE_types
4706         # 'VB'  = Verbatim - line goes out verbatim (a quote)
4707         # 'FS'  = Format Skipping - line goes out verbatim
4708         # 'BL'  = Blank Line
4709         # 'HSC' = Hanging Side Comment - fix this hanging side comment
4710         # 'SBCX'= Static Block Comment Without Leading Space
4711         # 'SBC' = Static Block Comment
4712         # 'BC'  = Block Comment - an ordinary full line comment
4713         # 'IO'  = Indent Only - line goes out unchanged except for indentation
4714         # 'NIN' = No Internal Newlines - line does not get broken
4715         # 'VER' = VERSION statement
4716         # ''    = ordinary line of code with no restructions
4717
4718         my $rLL = $self->[_rLL_];
4719
4720         my $CODE_type  = "";
4721         my $input_line = $line_of_tokens->{_line_text};
4722         my $jmax       = defined($Kfirst) ? $Klast - $Kfirst : -1;
4723
4724         my $is_block_comment = 0;
4725         my $has_side_comment = 0;
4726
4727         if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
4728             if   ( $jmax == 0 ) { $is_block_comment = 1; }
4729             else                { $has_side_comment = 1 }
4730         }
4731
4732         # Write line verbatim if we are in a formatting skip section
4733         if ($In_format_skipping_section) {
4734
4735             # Note: extra space appended to comment simplifies pattern matching
4736             if ( $is_block_comment
4737                 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
4738                 /$format_skipping_pattern_end/ )
4739             {
4740                 $In_format_skipping_section = 0;
4741                 write_logfile_entry(
4742                     "Line $input_line_no: Exiting format-skipping section\n");
4743             }
4744             $CODE_type = 'FS';
4745             goto RETURN;
4746         }
4747
4748         # Check for a continued quote..
4749         if ( $line_of_tokens->{_starting_in_quote} ) {
4750
4751             # A line which is entirely a quote or pattern must go out
4752             # verbatim.  Note: the \n is contained in $input_line.
4753             if ( $jmax <= 0 ) {
4754                 if ( ( $input_line =~ "\t" ) ) {
4755                     my $input_line_number = $line_of_tokens->{_line_number};
4756                     $self->note_embedded_tab($input_line_number);
4757                 }
4758                 $CODE_type = 'VB';
4759                 goto RETURN;
4760             }
4761         }
4762
4763         # See if we are entering a formatting skip section
4764         if (   $rOpts_format_skipping
4765             && $is_block_comment
4766             && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
4767             /$format_skipping_pattern_begin/ )
4768         {
4769             $In_format_skipping_section = 1;
4770             write_logfile_entry(
4771                 "Line $input_line_no: Entering format-skipping section\n");
4772             $CODE_type = 'FS';
4773             goto RETURN;
4774         }
4775
4776         # ignore trailing blank tokens (they will get deleted later)
4777         if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
4778             $jmax--;
4779         }
4780
4781         # blank line..
4782         if ( $jmax < 0 ) {
4783             $CODE_type = 'BL';
4784             goto RETURN;
4785         }
4786
4787         # see if this is a static block comment (starts with ## by default)
4788         my $is_static_block_comment                       = 0;
4789         my $is_static_block_comment_without_leading_space = 0;
4790         if (   $is_block_comment
4791             && $rOpts->{'static-block-comments'}
4792             && $input_line =~ /$static_block_comment_pattern/ )
4793         {
4794             $is_static_block_comment = 1;
4795             $is_static_block_comment_without_leading_space =
4796               substr( $input_line, 0, 1 ) eq '#';
4797         }
4798
4799         # Check for comments which are line directives
4800         # Treat exactly as static block comments without leading space
4801         # reference: perlsyn, near end, section Plain Old Comments (Not!)
4802         # example: '# line 42 "new_filename.plx"'
4803         if (
4804                $is_block_comment
4805             && $input_line =~ /^\#   \s*
4806                                line \s+ (\d+)   \s*
4807                                (?:\s("?)([^"]+)\2)? \s*
4808                                $/x
4809           )
4810         {
4811             $is_static_block_comment                       = 1;
4812             $is_static_block_comment_without_leading_space = 1;
4813         }
4814
4815         # look for hanging side comment
4816         if (
4817                $is_block_comment
4818             && $Last_line_had_side_comment  # last line had side comment
4819             && $input_line =~ /^\s/         # there is some leading space
4820             && !$is_static_block_comment    # do not make static comment hanging
4821             && $rOpts->{'hanging-side-comments'}    # user is allowing
4822                                                     # hanging side comments
4823                                                     # like this
4824           )
4825         {
4826             $has_side_comment = 1;
4827             $CODE_type        = 'HSC';
4828             goto RETURN;
4829         }
4830
4831         # Handle a block (full-line) comment..
4832         if ($is_block_comment) {
4833
4834             if ($is_static_block_comment_without_leading_space) {
4835                 $CODE_type = 'SBCX';
4836                 goto RETURN;
4837             }
4838             elsif ($is_static_block_comment) {
4839                 $CODE_type = 'SBC';
4840                 goto RETURN;
4841             }
4842             elsif ($Last_line_had_side_comment
4843                 && !$rOpts_maximum_consecutive_blank_lines
4844                 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
4845             {
4846                 # Emergency fix to keep a block comment from becoming a hanging
4847                 # side comment.  This fix is for the case that blank lines
4848                 # cannot be inserted.  There is related code in sub
4849                 # 'process_line_of_CODE'
4850                 $CODE_type = 'SBCX';
4851                 goto RETURN;
4852             }
4853             else {
4854                 $CODE_type = 'BC';
4855                 goto RETURN;
4856             }
4857         }
4858
4859         # End of comments. Handle a line of normal code:
4860
4861         if ($rOpts_indent_only) {
4862             $CODE_type = 'IO';
4863             goto RETURN;
4864         }
4865
4866         if ( !$rOpts_add_newlines ) {
4867             $CODE_type = 'NIN';
4868             goto RETURN;
4869         }
4870
4871         #   Patch needed for MakeMaker.  Do not break a statement
4872         #   in which $VERSION may be calculated.  See MakeMaker.pm;
4873         #   this is based on the coding in it.
4874         #   The first line of a file that matches this will be eval'd:
4875         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
4876         #   Examples:
4877         #     *VERSION = \'1.01';
4878         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
4879         #   We will pass such a line straight through without breaking
4880         #   it unless -npvl is used.
4881
4882         #   Patch for problem reported in RT #81866, where files
4883         #   had been flattened into a single line and couldn't be
4884         #   tidied without -npvl.  There are two parts to this patch:
4885         #   First, it is not done for a really long line (80 tokens for now).
4886         #   Second, we will only allow up to one semicolon
4887         #   before the VERSION.  We need to allow at least one semicolon
4888         #   for statements like this:
4889         #      require Exporter;  our $VERSION = $Exporter::VERSION;
4890         #   where both statements must be on a single line for MakeMaker
4891
4892         my $is_VERSION_statement = 0;
4893         if (  !$Saw_VERSION_in_this_file
4894             && $jmax < 80
4895             && $input_line =~
4896             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
4897         {
4898             $Saw_VERSION_in_this_file = 1;
4899             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
4900
4901             # This code type has lower priority than others
4902             $CODE_type = 'VER';
4903             goto RETURN;
4904         }
4905
4906       RETURN:
4907         $Last_line_had_side_comment = $has_side_comment;
4908         return $CODE_type;
4909     }
4910
4911 } ## end closure write_line
4912
4913 #############################################
4914 # CODE SECTION 5: Pre-process the entire file
4915 #############################################
4916
4917 sub finish_formatting {
4918
4919     my ( $self, $severe_error ) = @_;
4920
4921     # The file has been tokenized and is ready to be formatted.
4922     # All of the relevant data is stored in $self, ready to go.
4923
4924     # Check the maximum level. If it is extremely large we will
4925     # give up and output the file verbatim.
4926     my $maximum_level       = $self->[_maximum_level_];
4927     my $maximum_table_index = $#maximum_line_length_at_level;
4928     if ( !$severe_error && $maximum_level > $maximum_table_index ) {
4929         $severe_error ||= 1;
4930         Warn(<<EOM);
4931 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
4932 Something may be wrong; formatting will be skipped.
4933 EOM
4934     }
4935
4936     # output file verbatim if severe error or no formatting requested
4937     if ( $severe_error || $rOpts->{notidy} ) {
4938         $self->dump_verbatim();
4939         $self->wrapup();
4940         return;
4941     }
4942
4943     # Update the 'save_logfile' flag based to include any tokenization errors.
4944     # We can save time by skipping logfile calls if it is not going to be saved.
4945     my $logger_object = $self->[_logger_object_];
4946     if ($logger_object) {
4947         $self->[_save_logfile_] = $logger_object->get_save_logfile();
4948     }
4949
4950     # Make a pass through all tokens, adding or deleting any whitespace as
4951     # required.  Also make any other changes, such as adding semicolons.
4952     # All token changes must be made here so that the token data structure
4953     # remains fixed for the rest of this iteration.
4954     $self->respace_tokens();
4955
4956     $self->find_multiline_qw();
4957
4958     $self->keep_old_line_breaks();
4959
4960     # Implement any welding needed for the -wn or -cb options
4961     $self->weld_containers();
4962
4963     # Locate small nested blocks which should not be broken
4964     $self->mark_short_nested_blocks();
4965
4966     $self->adjust_indentation_levels();
4967
4968     $self->set_excluded_lp_containers();
4969
4970     # Finishes formatting and write the result to the line sink.
4971     # Eventually this call should just change the 'rlines' data according to the
4972     # new line breaks and then return so that we can do an internal iteration
4973     # before continuing with the next stages of formatting.
4974     $self->process_all_lines();
4975
4976     # A final routine to tie up any loose ends
4977     $self->wrapup();
4978     return;
4979 }
4980
4981 sub dump_verbatim {
4982     my $self   = shift;
4983     my $rlines = $self->[_rlines_];
4984     foreach my $line ( @{$rlines} ) {
4985         my $input_line = $line->{_line_text};
4986         $self->write_unindented_line($input_line);
4987     }
4988     return;
4989 }
4990
4991 my %wU;
4992 my %wiq;
4993 my %is_nonlist_keyword;
4994 my %is_nonlist_type;
4995
4996 BEGIN {
4997
4998     # added 'U' to fix cases b1125 b1126 b1127
4999     my @q = qw(w U);
5000     @{wU}{@q} = (1) x scalar(@q);
5001
5002     @q = qw(w i q Q G C Z);
5003     @{wiq}{@q} = (1) x scalar(@q);
5004
5005     # Parens following these keywords will not be marked as lists. Note that
5006     # 'for' is not included and is handled separately, by including 'f' in the
5007     # hash %is_counted_type, since it may or may not be a c-style for loop.
5008     @q = qw( if elsif unless and or );
5009     @is_nonlist_keyword{@q} = (1) x scalar(@q);
5010
5011     # Parens following these types will not be marked as lists
5012     @q = qw( && || );
5013     @is_nonlist_type{@q} = (1) x scalar(@q);
5014
5015 }
5016
5017 sub respace_tokens {
5018
5019     my $self = shift;
5020     return if $rOpts->{'indent-only'};
5021
5022     # This routine is called once per file to do as much formatting as possible
5023     # before new line breaks are set.
5024
5025     # This routine makes all necessary and possible changes to the tokenization
5026     # after the initial tokenization of the file. This is a tedious routine,
5027     # but basically it consists of inserting and deleting whitespace between
5028     # nonblank tokens according to the selected parameters. In a few cases
5029     # non-space characters are added, deleted or modified.
5030
5031     # The goal of this routine is to create a new token array which only needs
5032     # the definition of new line breaks and padding to complete formatting.  In
5033     # a few cases we have to cheat a little to achieve this goal.  In
5034     # particular, we may not know if a semicolon will be needed, because it
5035     # depends on how the line breaks go.  To handle this, we include the
5036     # semicolon as a 'phantom' which can be displayed as normal or as an empty
5037     # string.
5038
5039     # Method: The old tokens are copied one-by-one, with changes, from the old
5040     # linear storage array $rLL to a new array $rLL_new.
5041
5042     my $rLL             = $self->[_rLL_];
5043     my $Klimit_old      = $self->[_Klimit_];
5044     my $rlines          = $self->[_rlines_];
5045     my $length_function = $self->[_length_function_];
5046     my $is_encoded_data = $self->[_is_encoded_data_];
5047
5048     my $rLL_new = [];    # This is the new array
5049     my $rtoken_vars;
5050     my $Ktoken_vars;                   # the old K value of $rtoken_vars
5051     my ( $Kfirst_old, $Klast_old );    # Range of old line
5052     my $Klast_old_code;                # K of last token if side comment
5053     my $Kmax = @{$rLL} - 1;
5054
5055     my $CODE_type = "";
5056     my $line_type = "";
5057
5058     # Set the whitespace flags, which indicate the token spacing preference.
5059     my $rwhitespace_flags = $self->set_whitespace_flags();
5060
5061     # we will be setting token lengths as we go
5062     my $cumulative_length = 0;
5063
5064     my %seqno_stack;
5065     my %K_old_opening_by_seqno = ();    # Note: old K index
5066     my $depth_next             = 0;
5067     my $depth_next_max         = 0;
5068
5069     my $K_closing_container       = $self->[_K_closing_container_];
5070     my $K_closing_ternary         = $self->[_K_closing_ternary_];
5071     my $K_opening_container       = $self->[_K_opening_container_];
5072     my $K_opening_ternary         = $self->[_K_opening_ternary_];
5073     my $rK_phantom_semicolons     = $self->[_rK_phantom_semicolons_];
5074     my $rchildren_of_seqno        = $self->[_rchildren_of_seqno_];
5075     my $rhas_broken_code_block    = $self->[_rhas_broken_code_block_];
5076     my $rhas_broken_list          = $self->[_rhas_broken_list_];
5077     my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
5078     my $rhas_code_block           = $self->[_rhas_code_block_];
5079     my $rhas_list                 = $self->[_rhas_list_];
5080     my $rhas_ternary              = $self->[_rhas_ternary_];
5081     my $ris_assigned_structure    = $self->[_ris_assigned_structure_];
5082     my $ris_broken_container      = $self->[_ris_broken_container_];
5083     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
5084     my $ris_list_by_seqno         = $self->[_ris_list_by_seqno_];
5085     my $ris_permanently_broken    = $self->[_ris_permanently_broken_];
5086     my $rlec_count_by_seqno       = $self->[_rlec_count_by_seqno_];
5087     my $roverride_cab3            = $self->[_roverride_cab3_];
5088     my $rparent_of_seqno          = $self->[_rparent_of_seqno_];
5089     my $rtype_count_by_seqno      = $self->[_rtype_count_by_seqno_];
5090
5091     my $last_nonblank_type       = ';';
5092     my $last_nonblank_token      = ';';
5093     my $last_nonblank_block_type = '';
5094     my $nonblank_token_count     = 0;
5095     my $last_nonblank_token_lx   = 0;
5096
5097     my %K_first_here_doc_by_seqno;
5098
5099     my $set_permanently_broken = sub {
5100         my ($seqno) = @_;
5101         while ( defined($seqno) ) {
5102             $ris_permanently_broken->{$seqno} = 1;
5103             $seqno = $rparent_of_seqno->{$seqno};
5104         }
5105         return;
5106     };
5107     my $store_token = sub {
5108         my ($item) = @_;
5109
5110         # This will be the index of this item in the new array
5111         my $KK_new = @{$rLL_new};
5112
5113         my $type     = $item->[_TYPE_];
5114         my $is_blank = $type eq 'b';
5115
5116         # Do not output consecutive blanks. This should not happen, but
5117         # is worth checking because later routines make this assumption.
5118         if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
5119             return;
5120         }
5121
5122         # check for a sequenced item (i.e., container or ?/:)
5123         my $type_sequence = $item->[_TYPE_SEQUENCE_];
5124         if ($type_sequence) {
5125
5126             my $token = $item->[_TOKEN_];
5127             if ( $is_opening_token{$token} ) {
5128
5129                 $K_opening_container->{$type_sequence} = $KK_new;
5130
5131                 # Fix for case b1100: Count a line ending in ', [' as having
5132                 # a line-ending comma.  Otherwise, these commas can be hidden
5133                 # with something like --opening-square-bracket-right
5134                 if (   $last_nonblank_type eq ','
5135                     && $Ktoken_vars == $Klast_old_code
5136                     && $Ktoken_vars > $Kfirst_old )
5137                 {
5138                     $rlec_count_by_seqno->{$type_sequence}++;
5139                 }
5140
5141                 if (   $last_nonblank_type eq '='
5142                     || $last_nonblank_type eq '=>' )
5143                 {
5144                     $ris_assigned_structure->{$type_sequence} =
5145                       $last_nonblank_type;
5146                 }
5147
5148                 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
5149                 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
5150                 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
5151                 $rparent_of_seqno->{$type_sequence}     = $seqno_parent;
5152                 $seqno_stack{$depth_next}               = $type_sequence;
5153                 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
5154                 $depth_next++;
5155
5156                 if ( $depth_next > $depth_next_max ) {
5157                     $depth_next_max = $depth_next;
5158                 }
5159             }
5160             elsif ( $is_closing_token{$token} ) {
5161
5162                 $K_closing_container->{$type_sequence} = $KK_new;
5163
5164                 # Do not include terminal commas in counts
5165                 if (   $last_nonblank_type eq ','
5166                     || $last_nonblank_type eq '=>' )
5167                 {
5168                     my $seqno = $seqno_stack{ $depth_next - 1 };
5169                     if ($seqno) {
5170                         $rtype_count_by_seqno->{$seqno}->{$last_nonblank_type}
5171                           --;
5172
5173                         if (   $Ktoken_vars == $Kfirst_old
5174                             && $last_nonblank_type eq ','
5175                             && $rlec_count_by_seqno->{$seqno} )
5176                         {
5177                             $rlec_count_by_seqno->{$seqno}--;
5178                         }
5179                     }
5180                 }
5181
5182                 # Update the stack...
5183                 $depth_next--;
5184             }
5185             else {
5186
5187                 # For ternary, note parent but do not include as child
5188                 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
5189                 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
5190                 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
5191
5192                 # These are not yet used but could be useful
5193                 if ( $token eq '?' ) {
5194                     $K_opening_ternary->{$type_sequence} = $KK_new;
5195                 }
5196                 elsif ( $token eq ':' ) {
5197                     $K_closing_ternary->{$type_sequence} = $KK_new;
5198                 }
5199                 else {
5200
5201                     # We really shouldn't arrive here, just being cautious:
5202                     # The only sequenced types output by the tokenizer are the
5203                     # opening & closing containers and the ternary types. Each
5204                     # of those was checked above. So we would only get here
5205                     # if the tokenizer has been changed to mark some other
5206                     # tokens with sequence numbers.
5207                     my $type = $item->[_TYPE_];
5208                     Fault(
5209 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
5210                     );
5211                 }
5212             }
5213         }
5214
5215         # Find the length of this token.  Later it may be adjusted if phantom
5216         # or ignoring side comment lengths.
5217         my $token_length =
5218             $is_encoded_data
5219           ? $length_function->( $item->[_TOKEN_] )
5220           : length( $item->[_TOKEN_] );
5221
5222         # handle comments
5223         my $is_comment = $type eq '#';
5224         if ($is_comment) {
5225
5226             # trim comments if necessary
5227             if ( $item->[_TOKEN_] =~ s/\s+$// ) {
5228                 $token_length = $length_function->( $item->[_TOKEN_] );
5229             }
5230
5231             # Mark length of side comments as just 1 if sc lengths are ignored
5232             if ( $rOpts_ignore_side_comment_lengths
5233                 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
5234             {
5235                 $token_length = 1;
5236             }
5237             my $seqno = $seqno_stack{ $depth_next - 1 };
5238             if ( defined($seqno)
5239                 && !$ris_permanently_broken->{$seqno} )
5240             {
5241                 $set_permanently_broken->($seqno);
5242             }
5243
5244         }
5245
5246         $item->[_TOKEN_LENGTH_] = $token_length;
5247
5248         # and update the cumulative length
5249         $cumulative_length += $token_length;
5250
5251         # Save the length sum to just AFTER this token
5252         $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
5253
5254         if ( !$is_blank && !$is_comment ) {
5255             $last_nonblank_type       = $type;
5256             $last_nonblank_token      = $item->[_TOKEN_];
5257             $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
5258             $last_nonblank_token_lx   = $item->[_LINE_INDEX_];
5259             $nonblank_token_count++;
5260
5261             # count selected types
5262             if ( $is_counted_type{$type} ) {
5263                 my $seqno = $seqno_stack{ $depth_next - 1 };
5264                 if ( defined($seqno) ) {
5265                     $rtype_count_by_seqno->{$seqno}->{$type}++;
5266
5267                     # Count line-ending commas for -bbx
5268                     if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
5269                         $rlec_count_by_seqno->{$seqno}++;
5270                     }
5271
5272                     # Remember index of first here doc target
5273                     if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
5274                         $K_first_here_doc_by_seqno{$seqno} = $KK_new;
5275                     }
5276                 }
5277             }
5278         }
5279
5280         # For reference, here is how to get the parent sequence number.
5281         # This is not used because it is slower than finding it on the fly
5282         # in sub parent_seqno_by_K:
5283
5284         # my $seqno_parent =
5285         #     $type_sequence && $is_opening_token{$token}
5286         #   ? $seqno_stack{ $depth_next - 2 }
5287         #   : $seqno_stack{ $depth_next - 1 };
5288         # my $KK = @{$rLL_new};
5289         # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
5290
5291         # and finally, add this item to the new array
5292         push @{$rLL_new}, $item;
5293     };
5294
5295     my $store_token_and_space = sub {
5296         my ( $item, $want_space ) = @_;
5297
5298         # store a token with preceding space if requested and needed
5299
5300         # First store the space
5301         if (   $want_space
5302             && @{$rLL_new}
5303             && $rLL_new->[-1]->[_TYPE_] ne 'b'
5304             && $rOpts_add_whitespace )
5305         {
5306             my $rcopy = copy_token_as_type( $item, 'b', ' ' );
5307             $rcopy->[_LINE_INDEX_] =
5308               $rLL_new->[-1]->[_LINE_INDEX_];
5309
5310             # Patch 23-Jan-2021 to fix -lp blinkers:
5311             # The level and ci_level of newly created spaces should be the same
5312             # as the previous token.  Otherwise the coding for the -lp option,
5313             # in sub set_leading_whitespace, can create a blinking state in
5314             # some rare cases.
5315             $rcopy->[_LEVEL_] =
5316               $rLL_new->[-1]->[_LEVEL_];
5317             $rcopy->[_CI_LEVEL_] =
5318               $rLL_new->[-1]->[_CI_LEVEL_];
5319
5320             $store_token->($rcopy);
5321         }
5322
5323         # then the token
5324         $store_token->($item);
5325     };
5326
5327     my $K_end_q = sub {
5328         my ($KK) = @_;
5329         my $K_end = $KK;
5330
5331         my $Kn = $KK + 1;
5332         if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
5333
5334         while ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
5335             $K_end = $Kn;
5336
5337             $Kn += 1;
5338             if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
5339         }
5340
5341         return $K_end;
5342     };
5343
5344     my $add_phantom_semicolon = sub {
5345
5346         my ($KK) = @_;
5347
5348         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
5349         return unless ( defined($Kp) );
5350
5351         # we are only adding semicolons for certain block types
5352         my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
5353         return
5354           unless ( $ok_to_add_semicolon_for_block_type{$block_type}
5355             || $block_type =~ /^(sub|package)/
5356             || $block_type =~ /^\w+\:$/ );
5357
5358         my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
5359
5360         my $previous_nonblank_type  = $rLL_new->[$Kp]->[_TYPE_];
5361         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
5362
5363         # Do not add a semicolon if...
5364         return
5365           if (
5366
5367             # it would follow a comment (and be isolated)
5368             $previous_nonblank_type eq '#'
5369
5370             # it follows a code block ( because they are not always wanted
5371             # there and may add clutter)
5372             || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
5373
5374             # it would follow a label
5375             || $previous_nonblank_type eq 'J'
5376
5377             # it would be inside a 'format' statement (and cause syntax error)
5378             || (   $previous_nonblank_type eq 'k'
5379                 && $previous_nonblank_token =~ /format/ )
5380
5381           );
5382
5383         # Do not add a semicolon if it would impede a weld with an immediately
5384         # following closing token...like this
5385         #   { ( some code ) }
5386         #                  ^--No semicolon can go here
5387
5388         # look at the previous token... note use of the _NEW rLL array here,
5389         # but sequence numbers are invariant.
5390         my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
5391
5392         # If it is also a CLOSING token we have to look closer...
5393         if (
5394                $seqno_inner
5395             && $is_closing_token{$previous_nonblank_token}
5396
5397             # we only need to look if there is just one inner container..
5398             && defined( $rchildren_of_seqno->{$type_sequence} )
5399             && @{ $rchildren_of_seqno->{$type_sequence} } == 1
5400           )
5401         {
5402
5403             # Go back and see if the corresponding two OPENING tokens are also
5404             # together.  Note that we are using the OLD K indexing here:
5405             my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
5406             if ( defined($K_outer_opening) ) {
5407                 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
5408                 if ( defined($K_nxt) ) {
5409                     my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
5410
5411                     # Is the next token after the outer opening the same as
5412                     # our inner closing (i.e. same sequence number)?
5413                     # If so, do not insert a semicolon here.
5414                     return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
5415                 }
5416             }
5417         }
5418
5419         # We will insert an empty semicolon here as a placeholder.  Later, if
5420         # it becomes the last token on a line, we will bring it to life.  The
5421         # advantage of doing this is that (1) we just have to check line
5422         # endings, and (2) the phantom semicolon has zero width and therefore
5423         # won't cause needless breaks of one-line blocks.
5424         my $Ktop = -1;
5425         if (   $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
5426             && $want_left_space{';'} == WS_NO )
5427         {
5428
5429             # convert the blank into a semicolon..
5430             # be careful: we are working on the new stack top
5431             # on a token which has been stored.
5432             my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
5433
5434             # Convert the existing blank to:
5435             #   a phantom semicolon for one_line_block option = 0 or 1
5436             #   a real semicolon    for one_line_block option = 2
5437             my $tok     = '';
5438             my $len_tok = 0;
5439             if ( $rOpts_one_line_block_semicolons == 2 ) {
5440                 $tok     = ';';
5441                 $len_tok = 1;
5442             }
5443
5444             $rLL_new->[$Ktop]->[_TOKEN_]        = $tok;
5445             $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
5446             $rLL_new->[$Ktop]->[_TYPE_]         = ';';
5447             $rLL_new->[$Ktop]->[_SLEVEL_] =
5448               $rLL->[$KK]->[_SLEVEL_];
5449
5450             # Save list of new K indexes of phantom semicolons.
5451             # This will be needed if we want to undo them for iterations in
5452             # future coding.
5453             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
5454
5455             # Then store a new blank
5456             $store_token->($rcopy);
5457         }
5458         else {
5459
5460             # insert a new token
5461             my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
5462             $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
5463             $store_token->($rcopy);
5464             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
5465         }
5466     };
5467
5468     my $check_Q = sub {
5469
5470         # Check that a quote looks okay
5471         # This sub works but needs to by sync'd with the log file output
5472         # before it can be used.
5473         my ( $KK, $Kfirst, $line_number ) = @_;
5474         my $token = $rLL->[$KK]->[_TOKEN_];
5475         $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
5476
5477         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
5478         return unless ( defined($Kp) );
5479         my $previous_nonblank_type  = $rLL_new->[$Kp]->[_TYPE_];
5480         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
5481
5482         my $previous_nonblank_type_2  = 'b';
5483         my $previous_nonblank_token_2 = "";
5484         my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
5485         if ( defined($Kpp) ) {
5486             $previous_nonblank_type_2  = $rLL_new->[$Kpp]->[_TYPE_];
5487             $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
5488         }
5489
5490         my $next_nonblank_token = "";
5491         my $Kn                  = $KK + 1;
5492         if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
5493         if ( $Kn <= $Kmax ) {
5494             $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
5495         }
5496
5497         my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
5498         my $type_0  = $rLL->[$Kfirst]->[_TYPE_];
5499
5500         # make note of something like '$var = s/xxx/yyy/;'
5501         # in case it should have been '$var =~ s/xxx/yyy/;'
5502         if (
5503                $token =~ /^(s|tr|y|m|\/)/
5504             && $previous_nonblank_token =~ /^(=|==|!=)$/
5505
5506             # preceded by simple scalar
5507             && $previous_nonblank_type_2 eq 'i'
5508             && $previous_nonblank_token_2 =~ /^\$/
5509
5510             # followed by some kind of termination
5511             # (but give complaint if we can not see far enough ahead)
5512             && $next_nonblank_token =~ /^[; \)\}]$/
5513
5514             # scalar is not declared
5515             && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
5516           )
5517         {
5518             my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
5519             complain(
5520 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
5521             );
5522         }
5523     };
5524
5525     ############################################
5526     # Main loop to respace all lines of the file
5527     ############################################
5528     my $last_K_out;
5529
5530     # Testing option to break qw.  Do not use; it can make a mess.
5531     my $ALLOW_BREAK_MULTILINE_QW = 0;
5532     my $in_multiline_qw;
5533     foreach my $line_of_tokens ( @{$rlines} ) {
5534
5535         my $input_line_number = $line_of_tokens->{_line_number};
5536         my $last_line_type    = $line_type;
5537         $line_type = $line_of_tokens->{_line_type};
5538         next unless ( $line_type eq 'CODE' );
5539         my $last_CODE_type = $CODE_type;
5540         $CODE_type = $line_of_tokens->{_code_type};
5541         my $rK_range = $line_of_tokens->{_rK_range};
5542         my ( $Kfirst, $Klast ) = @{$rK_range};
5543         next unless defined($Kfirst);
5544         ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
5545         $Klast_old_code = $Klast_old;
5546
5547         # Be sure an old K value is defined for sub $store_token
5548         $Ktoken_vars = $Kfirst;
5549
5550         # Check for correct sequence of token indexes...
5551         # An error here means that sub write_line() did not correctly
5552         # package the tokenized lines as it received them.  If we
5553         # get a fault here it has not output a continuous sequence
5554         # of K values.  Or a line of CODE may have been mismarked as
5555         # something else.
5556         if ( defined($last_K_out) ) {
5557             if ( $Kfirst != $last_K_out + 1 ) {
5558                 Fault(
5559                     "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
5560                 );
5561             }
5562         }
5563         else {
5564
5565             # The first token should always have been given index 0 by sub
5566             # write_line()
5567             if ( $Kfirst != 0 ) {
5568                 Fault("Program Bug: first K is $Kfirst but should be 0");
5569             }
5570         }
5571         $last_K_out = $Klast;
5572
5573         # Handle special lines of code
5574         if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
5575
5576             # CODE_types are as follows.
5577             # 'BL' = Blank Line
5578             # 'VB' = Verbatim - line goes out verbatim
5579             # 'FS' = Format Skipping - line goes out verbatim, no blanks
5580             # 'IO' = Indent Only - only indentation may be changed
5581             # 'NIN' = No Internal Newlines - line does not get broken
5582             # 'HSC'=Hanging Side Comment - fix this hanging side comment
5583             # 'BC'=Block Comment - an ordinary full line comment
5584             # 'SBC'=Static Block Comment - a block comment which does not get
5585             #      indented
5586             # 'SBCX'=Static Block Comment Without Leading Space
5587             # 'VER'=VERSION statement
5588             # '' or (undefined) - no restructions
5589
5590             # For a hanging side comment we insert an empty quote before
5591             # the comment so that it becomes a normal side comment and
5592             # will be aligned by the vertical aligner
5593             if ( $CODE_type eq 'HSC' ) {
5594
5595                 # Safety Check: This must be a line with one token (a comment)
5596                 my $rtoken_vars = $rLL->[$Kfirst];
5597                 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
5598
5599                     # Note that even if the flag 'noadd-whitespace' is set, we
5600                     # will make an exception here and allow a blank to be
5601                     # inserted to push the comment to the right.  We can think
5602                     # of this as an adjustment of indentation rather than
5603                     # whitespace between tokens. This will also prevent the
5604                     # hanging side comment from getting converted to a block
5605                     # comment if whitespace gets deleted, as for example with
5606                     # the -extrude and -mangle options.
5607                     my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
5608                     $store_token->($rcopy);
5609                     $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
5610                     $store_token->($rcopy);
5611                     $store_token->($rtoken_vars);
5612                     next;
5613                 }
5614                 else {
5615
5616                     # This line was mis-marked by sub scan_comment
5617                     Fault(
5618                         "Program bug. A hanging side comment has been mismarked"
5619                     );
5620                 }
5621             }
5622
5623             if ( $CODE_type eq 'BL' ) {
5624                 my $seqno = $seqno_stack{ $depth_next - 1 };
5625                 if (   defined($seqno)
5626                     && !$ris_permanently_broken->{$seqno}
5627                     && $rOpts_maximum_consecutive_blank_lines )
5628                 {
5629                     $set_permanently_broken->($seqno);
5630                 }
5631             }
5632
5633             # Copy tokens unchanged
5634             foreach my $KK ( $Kfirst .. $Klast ) {
5635                 $Ktoken_vars = $KK;
5636                 $store_token->( $rLL->[$KK] );
5637             }
5638             next;
5639         }
5640
5641         # Handle normal line..
5642
5643         # Define index of last token before any side comment for comma counts
5644         my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
5645         if ( ( $type_end eq '#' || $type_end eq 'b' )
5646             && $Klast_old_code > $Kfirst_old )
5647         {
5648             $Klast_old_code--;
5649             if (   $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
5650                 && $Klast_old_code > $Kfirst_old )
5651             {
5652                 $Klast_old_code--;
5653             }
5654         }
5655
5656         # Insert any essential whitespace between lines
5657         # if last line was normal CODE.
5658         # Patch for rt #125012: use K_previous_code rather than '_nonblank'
5659         # because comments may disappear.
5660         my $type_next  = $rLL->[$Kfirst]->[_TYPE_];
5661         my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
5662         my $Kp         = $self->K_previous_code( undef, $rLL_new );
5663         if (   $last_line_type eq 'CODE'
5664             && $type_next ne 'b'
5665             && defined($Kp) )
5666         {
5667             my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
5668             my $type_p  = $rLL_new->[$Kp]->[_TYPE_];
5669
5670             my ( $token_pp, $type_pp );
5671             my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
5672             if ( defined($Kpp) ) {
5673                 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
5674                 $type_pp  = $rLL_new->[$Kpp]->[_TYPE_];
5675             }
5676             else {
5677                 $token_pp = ";";
5678                 $type_pp  = ';';
5679             }
5680
5681             if (
5682
5683                 is_essential_whitespace(
5684                     $token_pp, $type_pp,    $token_p,
5685                     $type_p,   $token_next, $type_next,
5686                 )
5687               )
5688             {
5689
5690                 # Copy this first token as blank, but use previous line number
5691                 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
5692                 $rcopy->[_LINE_INDEX_] =
5693                   $rLL_new->[-1]->[_LINE_INDEX_];
5694
5695                 # The level and ci_level of newly created spaces should be the
5696                 # same as the previous token. Otherwise blinking states can
5697                 # be created if the -lp mode is used. See similar coding in
5698                 # sub 'store_token_and_space'.  Fixes cases b1109 b1110.
5699                 $rcopy->[_LEVEL_] =
5700                   $rLL_new->[-1]->[_LEVEL_];
5701                 $rcopy->[_CI_LEVEL_] =
5702                   $rLL_new->[-1]->[_CI_LEVEL_];
5703
5704                 $store_token->($rcopy);
5705             }
5706         }
5707
5708         ########################################################
5709         # Loop to copy all tokens on this line, with any changes
5710         ########################################################
5711         my $type_sequence;
5712         for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
5713             $Ktoken_vars = $KK;
5714             $rtoken_vars = $rLL->[$KK];
5715             my $token              = $rtoken_vars->[_TOKEN_];
5716             my $type               = $rtoken_vars->[_TYPE_];
5717             my $last_type_sequence = $type_sequence;
5718             $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
5719
5720             # Handle a blank space ...
5721             if ( $type eq 'b' ) {
5722
5723                 # Delete it if not wanted by whitespace rules
5724                 # or we are deleting all whitespace
5725                 # Note that whitespace flag is a flag indicating whether a
5726                 # white space BEFORE the token is needed
5727                 next if ( $KK >= $Klast );    # skip terminal blank
5728                 my $Knext = $KK + 1;
5729
5730                 if ($rOpts_freeze_whitespace) {
5731                     $store_token->($rtoken_vars);
5732                     next;
5733                 }
5734
5735                 my $ws = $rwhitespace_flags->[$Knext];
5736                 if (   $ws == -1
5737                     || $rOpts_delete_old_whitespace )
5738                 {
5739
5740                     my $Kp = $self->K_previous_nonblank($KK);
5741                     next unless defined($Kp);
5742                     my $token_p = $rLL->[$Kp]->[_TOKEN_];
5743                     my $type_p  = $rLL->[$Kp]->[_TYPE_];
5744
5745                     my ( $token_pp, $type_pp );
5746
5747                     my $Kpp = $self->K_previous_nonblank($Kp);
5748                     if ( defined($Kpp) ) {
5749                         $token_pp = $rLL->[$Kpp]->[_TOKEN_];
5750                         $type_pp  = $rLL->[$Kpp]->[_TYPE_];
5751                     }
5752                     else {
5753                         $token_pp = ";";
5754                         $type_pp  = ';';
5755                     }
5756                     my $token_next = $rLL->[$Knext]->[_TOKEN_];
5757                     my $type_next  = $rLL->[$Knext]->[_TYPE_];
5758
5759                     my $do_not_delete = is_essential_whitespace(
5760                         $token_pp, $type_pp,    $token_p,
5761                         $type_p,   $token_next, $type_next,
5762                     );
5763
5764                     next unless ($do_not_delete);
5765                 }
5766
5767                 # make it just one character
5768                 $rtoken_vars->[_TOKEN_] = ' ';
5769                 $store_token->($rtoken_vars);
5770                 next;
5771             }
5772
5773             # Handle a nonblank token...
5774
5775             if ($type_sequence) {
5776
5777                 if ( $is_closing_token{$token} ) {
5778
5779                     # Insert a tentative missing semicolon if the next token is
5780                     # a closing block brace
5781                     if (
5782                            $type eq '}'
5783                         && $token eq '}'
5784
5785                         # not preceded by a ';'
5786                         && $last_nonblank_type ne ';'
5787
5788                         # and this is not a VERSION stmt (is all one line, we
5789                         # are not inserting semicolons on one-line blocks)
5790                         && $CODE_type ne 'VER'
5791
5792                         # and we are allowed to add semicolons
5793                         && $rOpts->{'add-semicolons'}
5794                       )
5795                     {
5796                         $add_phantom_semicolon->($KK);
5797                     }
5798                 }
5799             }
5800
5801             # Modify certain tokens here for whitespace
5802             # The following is not yet done, but could be:
5803             #   sub (x x x)
5804             elsif ( $type =~ /^[wit]$/ ) {
5805
5806                 # Examples: <<snippets/space1.in>>
5807                 # change '$  var'  to '$var' etc
5808                 # change '@    '   to '@'
5809                 my ( $sigil, $word ) = split /\s+/, $token, 2;
5810                 if ( length($sigil) == 1
5811                     && $sigil =~ /^[\$\&\%\*\@]$/ )
5812                 {
5813                     $token = $sigil;
5814                     $token .= $word if ($word);
5815                     $rtoken_vars->[_TOKEN_] = $token;
5816                 }
5817
5818                 # Split identifiers with leading arrows, inserting blanks if
5819                 # necessary.  It is easier and safer here than in the
5820                 # tokenizer.  For example '->new' becomes two tokens, '->' and
5821                 # 'new' with a possible blank between.
5822                 #
5823                 # Note: there is a related patch in sub set_whitespace_flags
5824                 if (   substr( $token, 0, 1 ) eq '-'
5825                     && $token =~ /^\-\>(.*)$/
5826                     && $1 )
5827                 {
5828
5829                     my $token_save = $1;
5830                     my $type_save  = $type;
5831
5832                     # Change '-> new'  to '->new'
5833                     $token_save =~ s/^\s+//g;
5834
5835                     # store a blank to left of arrow if necessary
5836                     my $Kprev = $self->K_previous_nonblank($KK);
5837                     if (   defined($Kprev)
5838                         && $rLL->[$Kprev]->[_TYPE_] ne 'b'
5839                         && $rOpts_add_whitespace
5840                         && $want_left_space{'->'} == WS_YES )
5841                     {
5842                         my $rcopy =
5843                           copy_token_as_type( $rtoken_vars, 'b', ' ' );
5844                         $store_token->($rcopy);
5845                     }
5846
5847                     # then store the arrow
5848                     my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
5849                     $store_token->($rcopy);
5850
5851                     # store a blank after the arrow if requested
5852                     # added for issue git #33
5853                     if ( $want_right_space{'->'} == WS_YES ) {
5854                         my $rcopy =
5855                           copy_token_as_type( $rtoken_vars, 'b', ' ' );
5856                         $store_token->($rcopy);
5857                     }
5858
5859                     # then reset the current token to be the remainder,
5860                     # and reset the whitespace flag according to the arrow
5861                     $token = $rtoken_vars->[_TOKEN_] = $token_save;
5862                     $type  = $rtoken_vars->[_TYPE_]  = $type_save;
5863                     $store_token->($rtoken_vars);
5864                     next;
5865                 }
5866
5867                 if ( $token =~ /$ANYSUB_PATTERN/ ) {
5868
5869                     # -spp = 0 : no space before opening prototype paren
5870                     # -spp = 1 : stable (follow input spacing)
5871                     # -spp = 2 : always space before opening prototype paren
5872                     my $spp = $rOpts->{'space-prototype-paren'};
5873                     if ( defined($spp) ) {
5874                         if    ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
5875                         elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
5876                     }
5877
5878                     # one space max, and no tabs
5879                     $token =~ s/\s+/ /g;
5880                     $rtoken_vars->[_TOKEN_] = $token;
5881                 }
5882
5883                 # clean up spaces in package identifiers, like
5884                 #   "package        Bob::Dog;"
5885                 if ( $token =~ /^package\s/ ) {
5886                     $token =~ s/\s+/ /g;
5887                     $rtoken_vars->[_TOKEN_] = $token;
5888                 }
5889
5890                 # trim identifiers of trailing blanks which can occur
5891                 # under some unusual circumstances, such as if the
5892                 # identifier 'witch' has trailing blanks on input here:
5893                 #
5894                 # sub
5895                 # witch
5896                 # ()   # prototype may be on new line ...
5897                 # ...
5898                 if ( $type eq 'i' ) {
5899                     $token =~ s/\s+$//g;
5900                     $rtoken_vars->[_TOKEN_] = $token;
5901                 }
5902             }
5903
5904             # handle semicolons
5905             elsif ( $type eq ';' ) {
5906
5907                 # Remove unnecessary semicolons, but not after bare
5908                 # blocks, where it could be unsafe if the brace is
5909                 # mistokenized.
5910                 if (
5911                     $rOpts->{'delete-semicolons'}
5912                     && (
5913                         (
5914                             $last_nonblank_type eq '}'
5915                             && (
5916                                 $is_block_without_semicolon{
5917                                     $last_nonblank_block_type}
5918                                 || $last_nonblank_block_type =~ /$SUB_PATTERN/
5919                                 || $last_nonblank_block_type =~ /^\w+:$/
5920                             )
5921                         )
5922                         || $last_nonblank_type eq ';'
5923                     )
5924                   )
5925                 {
5926
5927                     # This looks like a deletable semicolon, but even if a
5928                     # semicolon can be deleted it is necessarily best to do so.
5929                     # We apply these additional rules for deletion:
5930                     # - Always ok to delete a ';' at the end of a line
5931                     # - Never delete a ';' before a '#' because it would
5932                     #   promote it to a block comment.
5933                     # - If a semicolon is not at the end of line, then only
5934                     #   delete if it is followed by another semicolon or closing
5935                     #   token.  This includes the comment rule.  It may take
5936                     #   two passes to get to a final state, but it is a little
5937                     #   safer.  For example, keep the first semicolon here:
5938                     #      eval { sub bubba { ok(0) }; ok(0) } || ok(1);
5939                     #   It is not required but adds some clarity.
5940                     my $ok_to_delete = 1;
5941                     if ( $KK < $Klast ) {
5942                         my $Kn = $self->K_next_nonblank($KK);
5943                         if ( defined($Kn) && $Kn <= $Klast ) {
5944                             my $next_nonblank_token_type =
5945                               $rLL->[$Kn]->[_TYPE_];
5946                             $ok_to_delete = $next_nonblank_token_type eq ';'
5947                               || $next_nonblank_token_type eq '}';
5948                         }
5949                     }
5950
5951                     # do not delete only nonblank token in a file
5952                     else {
5953                         my $Kn = $self->K_next_nonblank($KK);
5954                         $ok_to_delete = defined($Kn) || $nonblank_token_count;
5955                     }
5956
5957                     if ($ok_to_delete) {
5958                         $self->note_deleted_semicolon($input_line_number);
5959                         next;
5960                     }
5961                     else {
5962                         write_logfile_entry("Extra ';'\n");
5963                     }
5964                 }
5965             }
5966
5967             # patch to add space to something like "x10"
5968             # This avoids having to split this token in the pre-tokenizer
5969             elsif ( $type eq 'n' ) {
5970                 if ( $token =~ /^x\d+/ ) {
5971                     $token =~ s/x/x /;
5972                     $rtoken_vars->[_TOKEN_] = $token;
5973                 }
5974             }
5975
5976             # check for a qw quote
5977             elsif ( $type eq 'q' ) {
5978
5979                 # trim blanks from right of qw quotes
5980                 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
5981                 # this)
5982                 $token =~ s/\s*$//;
5983                 $rtoken_vars->[_TOKEN_] = $token;
5984                 $self->note_embedded_tab($input_line_number)
5985                   if ( $token =~ "\t" );
5986
5987                 if ($in_multiline_qw) {
5988
5989                     # If we are at the end of a multiline qw ..
5990                     if ( $in_multiline_qw == $KK ) {
5991
5992                  # Split off the closing delimiter character
5993                  # so that the formatter can put a line break there if necessary
5994                         my $part1 = $token;
5995                         my $part2 = substr( $part1, -1, 1, "" );
5996
5997                         if ($part1) {
5998                             my $rcopy =
5999                               copy_token_as_type( $rtoken_vars, 'q', $part1 );
6000                             $store_token->($rcopy);
6001                             $token = $part2;
6002                             $rtoken_vars->[_TOKEN_] = $token;
6003
6004                         }
6005                         $in_multiline_qw = undef;
6006
6007                         # store without preceding blank
6008                         $store_token->($rtoken_vars);
6009                         next;
6010                     }
6011                     else {
6012                         # continuing a multiline qw
6013                         $store_token->($rtoken_vars);
6014                         next;
6015                     }
6016                 }
6017
6018                 else {
6019
6020                     # we are encountered new qw token...see if multiline
6021                     if ($ALLOW_BREAK_MULTILINE_QW) {
6022                         my $K_end = $K_end_q->($KK);
6023                         if ( $K_end != $KK ) {
6024
6025                             # Starting multiline qw...
6026                             # set flag equal to the ending K
6027                             $in_multiline_qw = $K_end;
6028
6029                           # Split off the leading part so that the formatter can
6030                           # put a line break there if necessary
6031                             if ( $token =~ /^(qw\s*.)(.*)$/ ) {
6032                                 my $part1 = $1;
6033                                 my $part2 = $2;
6034                                 if ($part2) {
6035                                     my $rcopy =
6036                                       copy_token_as_type( $rtoken_vars, 'q',
6037                                         $part1 );
6038                                     $store_token_and_space->(
6039                                         $rcopy,
6040                                         $rwhitespace_flags->[$KK] == WS_YES
6041                                     );
6042                                     $token = $part2;
6043                                     $rtoken_vars->[_TOKEN_] = $token;
6044
6045                                    # Second part goes without intermediate blank
6046                                     $store_token->($rtoken_vars);
6047                                     next;
6048                                 }
6049                             }
6050                         }
6051                     }
6052                     else {
6053
6054                         # this is a new single token qw -
6055                         # store with possible preceding blank
6056                         $store_token_and_space->(
6057                             $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
6058                         );
6059                         next;
6060                     }
6061                 }
6062             } ## end if ( $type eq 'q' )
6063
6064             # change 'LABEL   :'   to 'LABEL:'
6065             elsif ( $type eq 'J' ) {
6066                 $token =~ s/\s+//g;
6067                 $rtoken_vars->[_TOKEN_] = $token;
6068             }
6069
6070             # check a quote for problems
6071             elsif ( $type eq 'Q' ) {
6072                 $check_Q->( $KK, $Kfirst, $input_line_number );
6073             }
6074
6075             # Store this token with possible previous blank
6076             $store_token_and_space->(
6077                 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
6078             );
6079
6080         }    # End token loop
6081     }    # End line loop
6082
6083     # Walk backwards through the tokens, making forward links to sequence items.
6084     if ( @{$rLL_new} ) {
6085         my $KNEXT;
6086         for ( my $KK = @{$rLL_new} - 1 ; $KK >= 0 ; $KK-- ) {
6087             $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
6088             if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
6089         }
6090         $self->[_K_first_seq_item_] = $KNEXT;
6091     }
6092
6093     # Find and remember lists by sequence number
6094     foreach my $seqno ( keys %{$K_opening_container} ) {
6095         my $K_opening = $K_opening_container->{$seqno};
6096         next unless defined($K_opening);
6097
6098         # code errors may leave undefined closing tokens
6099         my $K_closing = $K_closing_container->{$seqno};
6100         next unless defined($K_closing);
6101
6102         my $lx_open   = $rLL_new->[$K_opening]->[_LINE_INDEX_];
6103         my $lx_close  = $rLL_new->[$K_closing]->[_LINE_INDEX_];
6104         my $line_diff = $lx_close - $lx_open;
6105         $ris_broken_container->{$seqno} = $line_diff;
6106
6107         # See if this is a list
6108         my $is_list;
6109         my $rtype_count = $rtype_count_by_seqno->{$seqno};
6110         if ($rtype_count) {
6111             my $comma_count     = $rtype_count->{','};
6112             my $fat_comma_count = $rtype_count->{'=>'};
6113             my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
6114
6115             # We will define a list to be a container with one or more commas
6116             # and no semicolons. Note that we have included the semicolons
6117             # in a 'for' container in the simicolon count to keep c-style for
6118             # statements from being formatted as lists.
6119             if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
6120                 $is_list = 1;
6121
6122                 # We need to do one more check for a perenthesized list:
6123                 # At an opening paren following certain tokens, such as 'if',
6124                 # we do not want to format the contents as a list.
6125                 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
6126                     my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
6127                     if ( defined($Kp) ) {
6128                         my $type_p = $rLL_new->[$Kp]->[_TYPE_];
6129                         if ( $type_p eq 'k' ) {
6130                             my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
6131                             $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
6132                         }
6133                         else {
6134                             $is_list = 0 if ( $is_nonlist_type{$type_p} );
6135                         }
6136                     }
6137                 }
6138             }
6139         }
6140
6141         # Look for a block brace marked as uncertain.  If the tokenizer thinks
6142         # its guess is uncertain for the type of a brace following an unknown
6143         # bareword then it adds a trailing space as a signal.  We can fix the
6144         # type here now that we have had a better look at the contents of the
6145         # container. This fixes case b1085. To find the corresponding code in
6146         # Tokenizer.pm search for 'b1085' with an editor.
6147         my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
6148         if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
6149
6150             # Always remove the trailing space
6151             $block_type =~ s/\s+$//;
6152
6153             # Try to filter out parenless sub calls
6154             my ( $Knn1, $Knn2 );
6155             my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
6156             $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
6157             $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
6158             $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
6159             $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
6160
6161             #   if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
6162             if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
6163                 $is_list = 0;
6164             }
6165
6166             # Convert to a hash brace if it looks like it holds a list
6167             if ($is_list) {
6168
6169                 $block_type = "";
6170
6171                 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
6172                 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
6173             }
6174
6175             $rLL_new->[$K_opening]->[_BLOCK_TYPE_] = $block_type;
6176             $rLL_new->[$K_closing]->[_BLOCK_TYPE_] = $block_type;
6177         }
6178
6179         # Handle a list container
6180         if ( $is_list && !$block_type ) {
6181             $ris_list_by_seqno->{$seqno} = $seqno;
6182             my $seqno_parent = $rparent_of_seqno->{$seqno};
6183             my $depth        = 0;
6184             while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
6185                 $depth++;
6186
6187                 # for $rhas_list we need to save the minimum depth
6188                 if (  !$rhas_list->{$seqno_parent}
6189                     || $rhas_list->{$seqno_parent} > $depth )
6190                 {
6191                     $rhas_list->{$seqno_parent} = $depth;
6192                 }
6193
6194                 if ($line_diff) {
6195                     $rhas_broken_list->{$seqno_parent} = 1;
6196
6197                     # Patch1: We need to mark broken lists with non-terminal
6198                     # line-ending commas for the -bbx=2 parameter. This insures
6199                     # that the list will stay broken.  Otherwise the flag
6200                     # -bbx=2 can be unstable.  This fixes case b789 and b938.
6201
6202                     # Patch2: Updated to also require either one fat comma or
6203                     # one more line-ending comma.  Fixes cases b1069 b1070
6204                     # b1072 b1076.
6205                     if (
6206                         $rlec_count_by_seqno->{$seqno}
6207                         && (   $rlec_count_by_seqno->{$seqno} > 1
6208                             || $rtype_count_by_seqno->{$seqno}->{'=>'} )
6209                       )
6210                     {
6211                         $rhas_broken_list_with_lec->{$seqno_parent} = 1;
6212                     }
6213                 }
6214                 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
6215             }
6216         }
6217
6218         # Handle code blocks ...
6219         # The -lp option needs to know if a container holds a code block
6220         elsif ( $block_type && $rOpts_line_up_parentheses ) {
6221             my $seqno_parent = $rparent_of_seqno->{$seqno};
6222             while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
6223                 $rhas_code_block->{$seqno_parent}        = 1;
6224                 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
6225                 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
6226             }
6227         }
6228     }
6229
6230     # Find containers with ternaries, needed for -lp formatting.
6231     foreach my $seqno ( keys %{$K_opening_ternary} ) {
6232         my $seqno_parent = $rparent_of_seqno->{$seqno};
6233         while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
6234             $rhas_ternary->{$seqno_parent} = 1;
6235             $seqno_parent = $rparent_of_seqno->{$seqno_parent};
6236         }
6237     }
6238
6239     # Turn off -lp for containers with here-docs with text within a container,
6240     # since they have their own fixed indentation.  Fixes case b1081.
6241     if ($rOpts_line_up_parentheses) {
6242         foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
6243             my $Kh      = $K_first_here_doc_by_seqno{$seqno};
6244             my $Kc      = $K_closing_container->{$seqno};
6245             my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
6246             my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
6247             next if ( $line_Kh == $line_Kc );
6248             $ris_excluded_lp_container->{$seqno} = 1;
6249         }
6250     }
6251
6252     # Set a flag to turn off -cab=3 in complex structures.  Otherwise,
6253     # instability can occur.  When it is overridden the behavior of the closest
6254     # match, -cab=2, will be used instead.  This fixes cases b1096 b1113.
6255     if ( $rOpts_comma_arrow_breakpoints == 3 ) {
6256         foreach my $seqno ( keys %{$K_opening_container} ) {
6257
6258             my $rtype_count = $rtype_count_by_seqno->{$seqno};
6259             next unless ( $rtype_count && $rtype_count->{'=>'} );
6260
6261             # override -cab=3 if this contains a sub-list
6262             if ( $rhas_list->{$seqno} ) {
6263                 $roverride_cab3->{$seqno} = 1;
6264             }
6265
6266             # or if this is a sub-list of its parent container
6267             else {
6268                 my $seqno_parent = $rparent_of_seqno->{$seqno};
6269                 if ( defined($seqno_parent)
6270                     && $ris_list_by_seqno->{$seqno_parent} )
6271                 {
6272                     $roverride_cab3->{$seqno} = 1;
6273                 }
6274             }
6275         }
6276     }
6277
6278     # Reset memory to be the new array
6279     $self->[_rLL_] = $rLL_new;
6280     my $Klimit;
6281     if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
6282     $self->[_Klimit_] = $Klimit;
6283
6284     # DEBUG OPTION: make sure the new array looks okay.
6285     # This is no longer needed but should be retained for future development.
6286     DEVEL_MODE && $self->check_token_array();
6287
6288     # reset the token limits of each line
6289     $self->resync_lines_and_tokens();
6290
6291     return;
6292 }
6293
6294 sub copy_token_as_type {
6295
6296     # This provides a quick way to create a new token by
6297     # slightly modifying an existing token.
6298     my ( $rold_token, $type, $token ) = @_;
6299     if ( $type eq 'b' ) {
6300         $token = " " unless defined($token);
6301     }
6302     elsif ( $type eq 'q' ) {
6303         $token = '' unless defined($token);
6304     }
6305     elsif ( $type eq '->' ) {
6306         $token = '->' unless defined($token);
6307     }
6308     elsif ( $type eq ';' ) {
6309         $token = ';' unless defined($token);
6310     }
6311     else {
6312
6313         # This sub assumes it will be called with just two types, 'b' or 'q'
6314         Fault(
6315 "Programming error: copy_token_as has type $type but should be 'b' or 'q'"
6316         );
6317     }
6318
6319     my @rnew_token = @{$rold_token};
6320     $rnew_token[_TYPE_]          = $type;
6321     $rnew_token[_TOKEN_]         = $token;
6322     $rnew_token[_BLOCK_TYPE_]    = '';
6323     $rnew_token[_TYPE_SEQUENCE_] = '';
6324     return \@rnew_token;
6325 }
6326
6327 sub Debug_dump_tokens {
6328
6329     # a debug routine, not normally used
6330     my ( $self, $msg ) = @_;
6331     my $rLL   = $self->[_rLL_];
6332     my $nvars = @{$rLL};
6333     print STDERR "$msg\n";
6334     print STDERR "ntokens=$nvars\n";
6335     print STDERR "K\t_TOKEN_\t_TYPE_\n";
6336     my $K = 0;
6337
6338     foreach my $item ( @{$rLL} ) {
6339         print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
6340         $K++;
6341     }
6342     return;
6343 }
6344
6345 sub K_next_code {
6346     my ( $self, $KK, $rLL ) = @_;
6347
6348     # return the index K of the next nonblank, non-comment token
6349     return unless ( defined($KK) && $KK >= 0 );
6350
6351     # use the standard array unless given otherwise
6352     $rLL = $self->[_rLL_] unless ( defined($rLL) );
6353     my $Num  = @{$rLL};
6354     my $Knnb = $KK + 1;
6355     while ( $Knnb < $Num ) {
6356         if ( !defined( $rLL->[$Knnb] ) ) {
6357
6358             # We seem to have encountered a gap in our array.
6359             # This shouldn't happen because sub write_line() pushed
6360             # items into the $rLL array.
6361             Fault("Undefined entry for k=$Knnb");
6362         }
6363         if (   $rLL->[$Knnb]->[_TYPE_] ne 'b'
6364             && $rLL->[$Knnb]->[_TYPE_] ne '#' )
6365         {
6366             return $Knnb;
6367         }
6368         $Knnb++;
6369     }
6370     return;
6371 }
6372
6373 sub K_next_nonblank {
6374     my ( $self, $KK, $rLL ) = @_;
6375
6376     # return the index K of the next nonblank token, or
6377     # return undef if none
6378     return unless ( defined($KK) && $KK >= 0 );
6379
6380     # The third arg allows this routine to be used on any array.  This is
6381     # useful in sub respace_tokens when we are copying tokens from an old $rLL
6382     # to a new $rLL array.  But usually the third arg will not be given and we
6383     # will just use the $rLL array in $self.
6384     $rLL = $self->[_rLL_] unless ( defined($rLL) );
6385     my $Num  = @{$rLL};
6386     my $Knnb = $KK + 1;
6387     return unless ( $Knnb < $Num );
6388     return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
6389     return unless ( ++$Knnb < $Num );
6390     return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
6391
6392     # Backup loop. Very unlikely to get here; it means we have neighboring
6393     # blanks in the token stream.
6394     $Knnb++;
6395     while ( $Knnb < $Num ) {
6396
6397         # Safety check, this fault shouldn't happen:  The $rLL array is the
6398         # main array of tokens, so all entries should be used.  It is
6399         # initialized in sub write_line, and then re-initialized by sub
6400         # $store_token() within sub respace_tokens.  Tokens are pushed on
6401         # so there shouldn't be any gaps.
6402         if ( !defined( $rLL->[$Knnb] ) ) {
6403             Fault("Undefined entry for k=$Knnb");
6404         }
6405         if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
6406         $Knnb++;
6407     }
6408     return;
6409 }
6410
6411 sub K_previous_code {
6412
6413     # return the index K of the previous nonblank, non-comment token
6414     # Call with $KK=undef to start search at the top of the array
6415     my ( $self, $KK, $rLL ) = @_;
6416
6417     # use the standard array unless given otherwise
6418     $rLL = $self->[_rLL_] unless ( defined($rLL) );
6419     my $Num = @{$rLL};
6420     if    ( !defined($KK) ) { $KK = $Num }
6421     elsif ( $KK > $Num ) {
6422
6423         # This fault can be caused by a programming error in which a bad $KK is
6424         # given.  The caller should make the first call with KK_new=undef to
6425         # avoid this error.
6426         Fault(
6427 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
6428         );
6429     }
6430     my $Kpnb = $KK - 1;
6431     while ( $Kpnb >= 0 ) {
6432         if (   $rLL->[$Kpnb]->[_TYPE_] ne 'b'
6433             && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
6434         {
6435             return $Kpnb;
6436         }
6437         $Kpnb--;
6438     }
6439     return;
6440 }
6441
6442 sub K_previous_nonblank {
6443
6444     # return index of previous nonblank token before item K;
6445     # Call with $KK=undef to start search at the top of the array
6446     my ( $self, $KK, $rLL ) = @_;
6447
6448     # use the standard array unless given otherwise
6449     $rLL = $self->[_rLL_] unless ( defined($rLL) );
6450     my $Num = @{$rLL};
6451     if    ( !defined($KK) ) { $KK = $Num }
6452     elsif ( $KK > $Num ) {
6453
6454         # This fault can be caused by a programming error in which a bad $KK is
6455         # given.  The caller should make the first call with KK_new=undef to
6456         # avoid this error.
6457         Fault(
6458 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
6459         );
6460     }
6461     my $Kpnb = $KK - 1;
6462     return unless ( $Kpnb >= 0 );
6463     return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
6464     return unless ( --$Kpnb >= 0 );
6465     return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
6466
6467     # Backup loop. We should not get here unless some routine
6468     # slipped repeated blanks into the token stream.
6469     return unless ( --$Kpnb >= 0 );
6470     while ( $Kpnb >= 0 ) {
6471         if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
6472         $Kpnb--;
6473     }
6474     return;
6475 }
6476
6477 sub get_old_line_index {
6478
6479     # return index of the original line that token K was on
6480     my ( $self, $K ) = @_;
6481     my $rLL = $self->[_rLL_];
6482     return 0 unless defined($K);
6483     return $rLL->[$K]->[_LINE_INDEX_];
6484 }
6485
6486 sub get_old_line_count {
6487
6488     # return number of input lines separating two tokens
6489     my ( $self, $Kbeg, $Kend ) = @_;
6490     my $rLL = $self->[_rLL_];
6491     return 0 unless defined($Kbeg);
6492     return 0 unless defined($Kend);
6493     return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
6494 }
6495
6496 sub parent_seqno_by_K {
6497
6498     # Return the sequence number of the parent container of token K, if any.
6499
6500     my ( $self, $KK ) = @_;
6501     return unless defined($KK);
6502
6503     # Note: This routine is relatively slow. I tried replacing it with a hash
6504     # which is easily created in sub respace_tokens. But the total time with a
6505     # hash was greater because this routine is called once per line whereas a
6506     # hash must be created token-by-token.
6507
6508     my $rLL   = $self->[_rLL_];
6509     my $KNEXT = $KK;
6510
6511     # For example, consider the following with seqno=5 of the '[' and ']'
6512     # being called with index K of the first token of each line:
6513
6514     #                                              # result
6515     #    push @tests,                              # -
6516     #      [                                       # -
6517     #        sub { 99 },   'do {&{%s} for 1,2}',   # 5
6518     #        '(&{})(&{})', undef,                  # 5
6519     #        [ 2, 2, 0 ],  0                       # 5
6520     #      ];                                      # -
6521
6522     # NOTE: The ending parent will be SEQ_ROOT for a balanced file.  For
6523     # unbalanced files, last sequence number will either be undefined or it may
6524     # be at a deeper level.  In either case we will just return SEQ_ROOT to
6525     # have a defined value and allow formatting to proceed.
6526     my $parent_seqno = SEQ_ROOT;
6527     while ( defined($KNEXT) ) {
6528         my $Kt = $KNEXT;
6529         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
6530         my $rtoken_vars   = $rLL->[$Kt];
6531         my $type          = $rtoken_vars->[_TYPE_];
6532         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6533
6534         # if next container token is closing, it is the parent seqno
6535         if ( $is_closing_type{$type} ) {
6536             if ( $Kt > $KK ) {
6537                 $parent_seqno = $type_sequence;
6538             }
6539             else {
6540                 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
6541             }
6542             last;
6543         }
6544
6545         # if next container token is opening, we want its parent container
6546         elsif ( $is_opening_type{$type} ) {
6547             $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
6548             last;
6549         }
6550
6551         # not a container - must be ternary - keep going
6552     }
6553
6554     $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
6555     return $parent_seqno;
6556 }
6557
6558 sub is_in_block_by_i {
6559     my ( $self, $i ) = @_;
6560
6561     # returns true if
6562     #     token at i is contained in a BLOCK
6563     #     or is at root level
6564     #     or there is some kind of error (i.e. unbalanced file)
6565     # returns false otherwise
6566     my $seqno = $parent_seqno_to_go[$i];
6567     return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
6568     my $Kopening = $self->[_K_opening_container_]->{$seqno};
6569     return 1 unless defined($Kopening);
6570     my $rLL = $self->[_rLL_];
6571     return 1 if $rLL->[$Kopening]->[_BLOCK_TYPE_];
6572     return;
6573 }
6574
6575 sub is_in_list_by_i {
6576     my ( $self, $i ) = @_;
6577
6578     # returns true if token at i is contained in a LIST
6579     # returns false otherwise
6580     my $seqno = $parent_seqno_to_go[$i];
6581     return unless ( $seqno && $seqno ne SEQ_ROOT );
6582     if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
6583         return 1;
6584     }
6585     return;
6586 }
6587
6588 sub is_list_by_K {
6589
6590     # Return true if token K is in a list
6591     my ( $self, $KK ) = @_;
6592
6593     my $parent_seqno = $self->parent_seqno_by_K($KK);
6594     return unless defined($parent_seqno);
6595     return $self->[_ris_list_by_seqno_]->{$parent_seqno};
6596 }
6597
6598 sub is_list_by_seqno {
6599
6600     # Return true if the immediate contents of a container appears to be a
6601     # list.
6602     my ( $self, $seqno ) = @_;
6603     return unless defined($seqno);
6604     return $self->[_ris_list_by_seqno_]->{$seqno};
6605 }
6606
6607 sub resync_lines_and_tokens {
6608
6609     my $self   = shift;
6610     my $rLL    = $self->[_rLL_];
6611     my $Klimit = $self->[_Klimit_];
6612     my $rlines = $self->[_rlines_];
6613     my @Krange_code_without_comments;
6614     my @Klast_valign_code;
6615
6616     # Re-construct the arrays of tokens associated with the original input lines
6617     # since they have probably changed due to inserting and deleting blanks
6618     # and a few other tokens.
6619
6620     my $Kmax = -1;
6621
6622     # This is the next token and its line index:
6623     my $Knext = 0;
6624     my $inext;
6625     if ( defined($rLL) && @{$rLL} ) {
6626         $Kmax  = @{$rLL} - 1;
6627         $inext = $rLL->[$Knext]->[_LINE_INDEX_];
6628     }
6629
6630     # Remember the most recently output token index
6631     my $Klast_out;
6632
6633     my $iline = -1;
6634     foreach my $line_of_tokens ( @{$rlines} ) {
6635         $iline++;
6636         my $line_type = $line_of_tokens->{_line_type};
6637         my $CODE_type = $line_of_tokens->{_code_type};
6638         if ( $line_type eq 'CODE' ) {
6639
6640             my @K_array;
6641             my $rK_range;
6642             if ( $Knext <= $Kmax ) {
6643                 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
6644                 while ( $inext <= $iline ) {
6645                     push @K_array, $Knext;
6646                     $Knext += 1;
6647                     if ( $Knext > $Kmax ) {
6648                         $inext = undef;
6649                         last;
6650                     }
6651                     $inext = $rLL->[$Knext]->[_LINE_INDEX_];
6652                 }
6653             }
6654
6655             # Delete any terminal blank token
6656             if (@K_array) {
6657                 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
6658                     pop @K_array;
6659                 }
6660             }
6661
6662             # Define the range of K indexes for the line:
6663             # $Kfirst = index of first token on line
6664             # $Klast_out = index of last token on line
6665             my ( $Kfirst, $Klast );
6666             if (@K_array) {
6667                 $Kfirst    = $K_array[0];
6668                 $Klast     = $K_array[-1];
6669                 $Klast_out = $Klast;
6670
6671                 if ( defined($Kfirst) ) {
6672
6673                     # Save ranges of non-comment code. This will be used by
6674                     # sub keep_old_line_breaks.
6675                     if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
6676                         push @Krange_code_without_comments, [ $Kfirst, $Klast ];
6677                     }
6678
6679                     # Only save ending K indexes of code types which are blank
6680                     # or 'VER'.  These will be used for a convergence check.
6681                     # See related code in sub 'send_lines_to_vertical_aligner'.
6682                     if (  !$CODE_type
6683                         || $CODE_type eq 'VER' )
6684                     {
6685                         push @Klast_valign_code, $Klast;
6686                     }
6687                 }
6688             }
6689
6690             # It is only safe to trim the actual line text if the input
6691             # line had a terminal blank token. Otherwise, we may be
6692             # in a quote.
6693             if ( $line_of_tokens->{_ended_in_blank_token} ) {
6694                 $line_of_tokens->{_line_text} =~ s/\s+$//;
6695             }
6696             $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
6697
6698             # Deleting semicolons can create new empty code lines
6699             # which should be marked as blank
6700             if ( !defined($Kfirst) ) {
6701                 my $code_type = $line_of_tokens->{_code_type};
6702                 if ( !$code_type ) {
6703                     $line_of_tokens->{_code_type} = 'BL';
6704                 }
6705             }
6706         }
6707     }
6708
6709     # There shouldn't be any nodes beyond the last one.  This routine is
6710     # relinking lines and tokens after the tokens have been respaced.  A fault
6711     # here indicates some kind of bug has been introduced into the above loops.
6712     if ( defined($inext) ) {
6713
6714         Fault("unexpected tokens at end of file when reconstructing lines");
6715     }
6716     $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
6717
6718     # Setup the convergence test in the FileWriter based on line-ending indexes
6719     my $file_writer_object = $self->[_file_writer_object_];
6720     $file_writer_object->setup_convergence_test( \@Klast_valign_code );
6721
6722     # Mark essential old breakpoints if combination -iob -lp is used.  These
6723     # two options do not work well together, but we can avoid turning -iob off
6724     # by ignoring -iob at certain essential line breaks.
6725     # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
6726     if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
6727         my %is_assignment_or_fat_comma = %is_assignment;
6728         $is_assignment_or_fat_comma{'=>'} = 1;
6729         my $ris_essential_old_breakpoint =
6730           $self->[_ris_essential_old_breakpoint_];
6731         my $iline = -1;
6732         my ( $Kfirst, $Klast );
6733         foreach my $line_of_tokens ( @{$rlines} ) {
6734             $iline++;
6735             my $line_type = $line_of_tokens->{_line_type};
6736             if ( $line_type ne 'CODE' ) {
6737                 ( $Kfirst, $Klast ) = ( undef, undef );
6738                 next;
6739             }
6740             my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
6741             ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
6742
6743             next unless defined($Klast_prev);
6744             next unless defined($Kfirst);
6745             my $type_last  = $rLL->[$Klast_prev]->[_TOKEN_];
6746             my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
6747             next
6748               unless ( $is_assignment_or_fat_comma{$type_last}
6749                 || $is_assignment_or_fat_comma{$type_first} );
6750             $ris_essential_old_breakpoint->{$Klast_prev} = 1;
6751         }
6752     }
6753
6754     return;
6755 }
6756
6757 sub keep_old_line_breaks {
6758
6759     # Called once per file to find and mark any old line breaks which
6760     # should be kept.  We will be translating the input hashes into
6761     # token indexes.
6762
6763     # A flag is set as follows:
6764     # = 1 make a hard break (flush the current batch)
6765     #     best for something like leading commas (-kbb=',')
6766     # = 2 make a soft break (keep building current batch)
6767     #     best for something like leading ->
6768
6769     my ($self) = @_;
6770
6771     my $rLL = $self->[_rLL_];
6772     my $rKrange_code_without_comments =
6773       $self->[_rKrange_code_without_comments_];
6774     my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
6775     my $rbreak_after_Klast   = $self->[_rbreak_after_Klast_];
6776     my $rwant_container_open = $self->[_rwant_container_open_];
6777     my $K_opening_container  = $self->[_K_opening_container_];
6778     my $ris_broken_container = $self->[_ris_broken_container_];
6779     my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
6780
6781     # This code moved here from sub scan_list to fix b1120
6782     if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
6783         foreach my $item ( @{$rKrange_code_without_comments} ) {
6784             my ( $Kfirst, $Klast ) = @{$item};
6785             my $type  = $rLL->[$Kfirst]->[_TYPE_];
6786             my $token = $rLL->[$Kfirst]->[_TOKEN_];
6787
6788             # leading '->' use a value of 2 which causes a soft
6789             # break rather than a hard break
6790             if ( $type eq '->' ) {
6791                 $rbreak_before_Kfirst->{$Kfirst} = 2;
6792             }
6793
6794             # leading ')->' use a special flag to insure that both
6795             # opening and closing parens get opened
6796             # Fix for b1120: only for parens, not braces
6797             elsif ( $token eq ')' ) {
6798                 my $Kn = $self->K_next_nonblank($Kfirst);
6799                 next
6800                   unless ( defined($Kn)
6801                     && $Kn <= $Klast
6802                     && $rLL->[$Kn]->[_TYPE_] eq '->' );
6803                 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
6804                 next unless ($seqno);
6805
6806                 # Patch to avoid blinkers: but do not do this unless the
6807                 # container holds a list, or the opening and closing parens are
6808                 # separated by more than one line.
6809                 # Fixes case b977.
6810                 next
6811                   if (
6812                     !$ris_list_by_seqno->{$seqno}
6813                     && (  !$ris_broken_container->{$seqno}
6814                         || $ris_broken_container->{$seqno} <= 1 )
6815                   );
6816                 $rwant_container_open->{$seqno} = 1;
6817             }
6818         }
6819     }
6820
6821     return unless ( %keep_break_before_type || %keep_break_after_type );
6822
6823     foreach my $item ( @{$rKrange_code_without_comments} ) {
6824         my ( $Kfirst, $Klast ) = @{$item};
6825
6826         my $type_first = $rLL->[$Kfirst]->[_TYPE_];
6827         if ( $keep_break_before_type{$type_first} ) {
6828             $rbreak_before_Kfirst->{$Kfirst} = 1;
6829         }
6830
6831         my $type_last = $rLL->[$Klast]->[_TYPE_];
6832         if ( $keep_break_after_type{$type_last} ) {
6833             $rbreak_after_Klast->{$Klast} = 1;
6834         }
6835     }
6836     return;
6837 }
6838
6839 sub weld_containers {
6840
6841     # Called once per file to do any welding operations requested by --weld*
6842     # flags.
6843     my ($self) = @_;
6844
6845     # This count is used to eliminate needless calls for weld checks elsewere
6846     $total_weld_count = 0;
6847
6848     return if ( $rOpts->{'indent-only'} );
6849     return unless ($rOpts_add_newlines);
6850
6851     # Important: sub 'weld_cuddled_blocks' must be called before
6852     # sub 'weld_nested_containers'. This is because the cuddled option needs to
6853     # use the original _LEVEL_ values of containers, but the weld nested
6854     # containers changes _LEVEL_ of welded containers.
6855
6856     # Here is a good test case to be sure that both cuddling and welding
6857     # are working and not interfering with each other: <<snippets/ce_wn1.in>>
6858
6859     #   perltidy -wn -ce
6860
6861    # if ($BOLD_MATH) { (
6862    #     $labels, $comment,
6863    #     join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
6864    # ) } else { (
6865    #     &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
6866    #     $after
6867    # ) }
6868
6869     $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
6870
6871     if ( $rOpts->{'weld-nested-containers'} ) {
6872
6873         $self->weld_nested_containers();
6874
6875         $self->weld_nested_quotes();
6876     }
6877
6878     ##############################################################
6879     # All welding is done. Finish setting up weld data structures.
6880     ##############################################################
6881
6882     my $rLL                  = $self->[_rLL_];
6883     my $rK_weld_left         = $self->[_rK_weld_left_];
6884     my $rK_weld_right        = $self->[_rK_weld_right_];
6885     my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
6886
6887     my @K_multi_weld;
6888     my @keys = keys %{$rK_weld_right};
6889     $total_weld_count = @keys;
6890
6891     # Note that this loop is processed in unsorted order for efficiency
6892     foreach my $Kstart (@keys) {
6893         my $Kend = $rK_weld_right->{$Kstart};
6894
6895         # An error here would be due to an incorrect initialization introduced
6896         # in one of the above weld routines, like sub weld_nested.
6897         if ( $Kend <= $Kstart ) {
6898             Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n");
6899         }
6900
6901         $rweld_len_right_at_K->{$Kstart} =
6902           $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
6903           $rLL->[$Kstart]->[_CUMULATIVE_LENGTH_];
6904
6905         $rK_weld_left->{$Kend} = $Kstart;    # fix in case of missing left link
6906
6907         # Remember the leftmost index of welds which continue to the right
6908         if ( defined( $rK_weld_right->{$Kend} )
6909             && !defined( $rK_weld_left->{$Kstart} ) )
6910         {
6911             push @K_multi_weld, $Kstart;
6912         }
6913     }
6914
6915     # Update the end index and lengths of any long welds to extend to the far
6916     # end.  This has to be processed in sorted order.
6917     # Left links added for b1173.
6918     my $Kend = -1;
6919     foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
6920
6921         # skip any interior K which was originally missing a left link
6922         next if ( $Kstart <= $Kend );
6923
6924         my @Klist;
6925         push @Klist, $Kstart;
6926         $Kend = $rK_weld_right->{$Kstart};
6927         $rK_weld_left->{$Kend} = $Kstart;
6928         my $Knext = $rK_weld_right->{$Kend};
6929         while ( defined($Knext) ) {
6930             push @Klist, $Kend;
6931             $Kend                  = $Knext;
6932             $rK_weld_left->{$Kend} = $Kstart;
6933             $Knext                 = $rK_weld_right->{$Kend};
6934         }
6935         pop @Klist;    #  values for last entry are already correct
6936         foreach my $KK (@Klist) {
6937
6938             # Ending indexes must only be shifted to the right for long welds.
6939             # An error here would be due to a programming error introduced in
6940             # the code immediately above.
6941             my $Kend_old = $rK_weld_right->{$KK};
6942             if ( !defined($Kend_old) || $Kend < $Kend_old ) {
6943                 Fault(
6944 "Bad weld link at K=$KK, old end is K=$Kend_old, new end is $Kend\n"
6945                 );
6946             }
6947
6948             $rK_weld_right->{$KK} = $Kend;
6949             $rweld_len_right_at_K->{$KK} =
6950               $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
6951               $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
6952         }
6953     }
6954
6955     return;
6956 }
6957
6958 sub cumulative_length_before_K {
6959     my ( $self, $KK ) = @_;
6960     my $rLL = $self->[_rLL_];
6961     return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
6962 }
6963
6964 sub weld_cuddled_blocks {
6965     my ($self) = @_;
6966
6967     # Called once per file to handle cuddled formatting
6968
6969     my $rK_weld_left  = $self->[_rK_weld_left_];
6970     my $rK_weld_right = $self->[_rK_weld_right_];
6971
6972     # This routine implements the -cb flag by finding the appropriate
6973     # closing and opening block braces and welding them together.
6974     return unless ( %{$rcuddled_block_types} );
6975
6976     my $rLL = $self->[_rLL_];
6977     return unless ( defined($rLL) && @{$rLL} );
6978     my $rbreak_container = $self->[_rbreak_container_];
6979
6980     my $K_opening_container = $self->[_K_opening_container_];
6981     my $K_closing_container = $self->[_K_closing_container_];
6982
6983     my $length_to_opening_seqno = sub {
6984         my ($seqno) = @_;
6985         my $KK      = $K_opening_container->{$seqno};
6986         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
6987         return $lentot;
6988     };
6989     my $length_to_closing_seqno = sub {
6990         my ($seqno) = @_;
6991         my $KK      = $K_closing_container->{$seqno};
6992         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
6993         return $lentot;
6994     };
6995
6996     my $is_broken_block = sub {
6997
6998         # a block is broken if the input line numbers of the braces differ
6999         # we can only cuddle between broken blocks
7000         my ($seqno) = @_;
7001         my $K_opening = $K_opening_container->{$seqno};
7002         return unless ( defined($K_opening) );
7003         my $K_closing = $K_closing_container->{$seqno};
7004         return unless ( defined($K_closing) );
7005         return $rbreak_container->{$seqno}
7006           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
7007           $rLL->[$K_opening]->[_LINE_INDEX_];
7008     };
7009
7010     # A stack to remember open chains at all levels: This is a hash rather than
7011     # an array for safety because negative levels can occur in files with
7012     # errors.  This allows us to keep processing with negative levels.
7013     # $in_chain{$level} = [$chain_type, $type_sequence];
7014     my %in_chain;
7015     my $CBO = $rOpts->{'cuddled-break-option'};
7016
7017     # loop over structure items to find cuddled pairs
7018     my $level = 0;
7019     my $KNEXT = $self->[_K_first_seq_item_];
7020     while ( defined($KNEXT) ) {
7021         my $KK = $KNEXT;
7022         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
7023         my $rtoken_vars   = $rLL->[$KK];
7024         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
7025         if ( !$type_sequence ) {
7026             next if ( $KK == 0 );    # first token in file may not be container
7027
7028             # A fault here implies that an error was made in the little loop at
7029             # the bottom of sub 'respace_tokens' which set the values of
7030             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
7031             # loop control lines above.
7032             Fault("sequence = $type_sequence not defined at K=$KK");
7033         }
7034
7035         # NOTE: we must use the original levels here. They can get changed
7036         # by sub 'weld_nested_containers', so this routine must be called
7037         # before sub 'weld_nested_containers'.
7038         my $last_level = $level;
7039         $level = $rtoken_vars->[_LEVEL_];
7040
7041         if    ( $level < $last_level ) { $in_chain{$last_level} = undef }
7042         elsif ( $level > $last_level ) { $in_chain{$level}      = undef }
7043
7044         # We are only looking at code blocks
7045         my $token = $rtoken_vars->[_TOKEN_];
7046         my $type  = $rtoken_vars->[_TYPE_];
7047         next unless ( $type eq $token );
7048
7049         if ( $token eq '{' ) {
7050
7051             my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
7052             if ( !$block_type ) {
7053
7054                 # patch for unrecognized block types which may not be labeled
7055                 my $Kp = $self->K_previous_nonblank($KK);
7056                 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
7057                     $Kp = $self->K_previous_nonblank($Kp);
7058                 }
7059                 next unless $Kp;
7060                 $block_type = $rLL->[$Kp]->[_TOKEN_];
7061
7062             }
7063             if ( $in_chain{$level} ) {
7064
7065                 # we are in a chain and are at an opening block brace.
7066                 # See if we are welding this opening brace with the previous
7067                 # block brace.  Get their identification numbers:
7068                 my $closing_seqno = $in_chain{$level}->[1];
7069                 my $opening_seqno = $type_sequence;
7070
7071                 # The preceding block must be on multiple lines so that its
7072                 # closing brace will start a new line.
7073                 if ( !$is_broken_block->($closing_seqno) ) {
7074                     next unless ( $CBO == 2 );
7075                     $rbreak_container->{$closing_seqno} = 1;
7076                 }
7077
7078                 # we will let the trailing block be either broken or intact
7079                 ## && $is_broken_block->($opening_seqno);
7080
7081                 # We can weld the closing brace to its following word ..
7082                 my $Ko = $K_closing_container->{$closing_seqno};
7083                 my $Kon;
7084                 if ( defined($Ko) ) {
7085                     $Kon = $self->K_next_nonblank($Ko);
7086                 }
7087
7088                 # ..unless it is a comment
7089                 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
7090
7091                     # OK to weld these two tokens...
7092                     $rK_weld_right->{$Ko} = $Kon;
7093                     $rK_weld_left->{$Kon} = $Ko;
7094
7095                     # Set flag that we want to break the next container
7096                     # so that the cuddled line is balanced.
7097                     $rbreak_container->{$opening_seqno} = 1
7098                       if ($CBO);
7099                 }
7100
7101             }
7102             else {
7103
7104                 # We are not in a chain. Start a new chain if we see the
7105                 # starting block type.
7106                 if ( $rcuddled_block_types->{$block_type} ) {
7107                     $in_chain{$level} = [ $block_type, $type_sequence ];
7108                 }
7109                 else {
7110                     $block_type = '*';
7111                     $in_chain{$level} = [ $block_type, $type_sequence ];
7112                 }
7113             }
7114         }
7115         elsif ( $token eq '}' ) {
7116             if ( $in_chain{$level} ) {
7117
7118                 # We are in a chain at a closing brace.  See if this chain
7119                 # continues..
7120                 my $Knn = $self->K_next_code($KK);
7121                 next unless $Knn;
7122
7123                 my $chain_type          = $in_chain{$level}->[0];
7124                 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
7125                 if (
7126                     $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
7127                   )
7128                 {
7129
7130                     # Note that we do not weld yet because we must wait until
7131                     # we we are sure that an opening brace for this follows.
7132                     $in_chain{$level}->[1] = $type_sequence;
7133                 }
7134                 else { $in_chain{$level} = undef }
7135             }
7136         }
7137     }
7138     return;
7139 }
7140
7141 sub find_nested_pairs {
7142     my $self = shift;
7143
7144     # This routine is called once per file to do preliminary work needed for
7145     # the --weld-nested option.  This information is also needed for adding
7146     # semicolons.
7147
7148     my $rLL = $self->[_rLL_];
7149     return unless ( defined($rLL) && @{$rLL} );
7150     my $Num = @{$rLL};
7151
7152     my $K_opening_container = $self->[_K_opening_container_];
7153     my $K_closing_container = $self->[_K_closing_container_];
7154
7155     # We define an array of pairs of nested containers
7156     my @nested_pairs;
7157
7158     # Names of calling routines can either be marked as 'i' or 'w',
7159     # and they may invoke a sub call with an '->'. We will consider
7160     # any consecutive string of such types as a single unit when making
7161     # weld decisions.  We also allow a leading !
7162     my $is_name_type = {
7163         'i'  => 1,
7164         'w'  => 1,
7165         'U'  => 1,
7166         '->' => 1,
7167         '!'  => 1,
7168     };
7169
7170     # Loop over all closing container tokens
7171     foreach my $inner_seqno ( keys %{$K_closing_container} ) {
7172         my $K_inner_closing = $K_closing_container->{$inner_seqno};
7173
7174         # See if it is immediately followed by another, outer closing token
7175         my $K_outer_closing = $K_inner_closing + 1;
7176         $K_outer_closing += 1
7177           if ( $K_outer_closing < $Num
7178             && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
7179
7180         next unless ( $K_outer_closing < $Num );
7181         my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
7182         next unless ($outer_seqno);
7183         my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
7184         next unless ( $is_closing_token{$token_outer_closing} );
7185
7186         # Now we have to check the opening tokens.
7187         my $K_outer_opening = $K_opening_container->{$outer_seqno};
7188         my $K_inner_opening = $K_opening_container->{$inner_seqno};
7189         next unless defined($K_outer_opening) && defined($K_inner_opening);
7190
7191         # Verify that the inner opening token is the next container after the
7192         # outer opening token.
7193         my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
7194         next unless defined($K_io_check);
7195         if ( $K_io_check != $K_inner_opening ) {
7196
7197             # The inner opening container does not immediately follow the outer
7198             # opening container, but we may still allow a weld if they are
7199             # separated by a sub signature.  For example, we may have something
7200             # like this, where $K_io_check may be at the first 'x' instead of
7201             # 'io'.  So we need to hop over the signature and see if we arrive
7202             # at 'io'.
7203
7204             #            oo               io
7205             #             |     x       x |
7206             #   $obj->then( sub ( $code ) {
7207             #       ...
7208             #       return $c->render(text => '', status => $code);
7209             #   } );
7210             #   | |
7211             #  ic oc
7212
7213             next if $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] ne 'sub';
7214             next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
7215             my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
7216             next unless defined($seqno_signature);
7217             my $K_signature_closing = $K_closing_container->{$seqno_signature};
7218             next unless defined($K_signature_closing);
7219             my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
7220             next
7221               unless ( defined($K_test) && $K_test == $K_inner_opening );
7222
7223             # OK, we have arrived at 'io' in the above diagram.  We should put
7224             # a limit on the length or complexity of the signature here.  There
7225             # is no perfect way to do this, one way is to put a limit on token
7226             # count.  For consistency with older versions, we should allow a
7227             # signature with a single variable to weld, but not with
7228             # multiple variables.  A single variable as in 'sub ($code) {' can
7229             # have a $Kdiff of 2 to 4, depending on spacing.
7230
7231             # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
7232             # 7, depending on spacing. So to keep formatting consistent with
7233             # previous versions, we will also avoid welding if there is a comma
7234             # in the signature.
7235
7236             my $Kdiff = $K_signature_closing - $K_io_check;
7237             next if ( $Kdiff > 4 );
7238
7239             my $saw_comma;
7240             foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
7241                 if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
7242             }
7243             next if ($saw_comma);
7244         }
7245
7246         # Yes .. this is a possible nesting pair.
7247         # They can be separated by a small amount.
7248         my $K_diff = $K_inner_opening - $K_outer_opening;
7249
7250         # Count nonblank characters separating them.
7251         if ( $K_diff < 0 ) { next }    # Shouldn't happen
7252         my $Kn             = $K_outer_opening;
7253         my $nonblank_count = 0;
7254         my $type;
7255         my $is_name;
7256
7257         # Here is an example of a long identifier chain which counts as a
7258         # single nonblank here (this spans about 10 K indexes):
7259         #     if ( !Boucherot::SetOfConnections->new->handler->execute(
7260         #        ^--K_o_o                                             ^--K_i_o
7261         #       @array) )
7262         my $Kn_first = $K_outer_opening;
7263         my $Kn_last_nonblank;
7264         for (
7265             my $Kn = $K_outer_opening + 1 ;
7266             $Kn <= $K_inner_opening ;
7267             $Kn += 1
7268           )
7269         {
7270             next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
7271             if ( !$nonblank_count )        { $Kn_first = $Kn }
7272             if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
7273             $Kn_last_nonblank = $Kn;
7274
7275             # skip chain of identifier tokens
7276             my $last_type    = $type;
7277             my $last_is_name = $is_name;
7278             $type    = $rLL->[$Kn]->[_TYPE_];
7279             $is_name = $is_name_type->{$type};
7280             next if ( $is_name && $last_is_name );
7281
7282             $nonblank_count++;
7283             last if ( $nonblank_count > 2 );
7284         }
7285
7286         # Patch for b1104: do not weld to a paren preceded by sort/map/grep
7287         # because the special line break rules may cause a blinking state
7288         if (   defined($Kn_last_nonblank)
7289             && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
7290             && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
7291         {
7292             my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
7293
7294             # Turn off welding at sort/map/grep (
7295             if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
7296         }
7297
7298         if (
7299
7300             # adjacent opening containers, like: do {{
7301             $nonblank_count == 1
7302
7303             # short item following opening paren, like:  fun( yyy (
7304             || (   $nonblank_count == 2
7305                 && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
7306
7307             # anonymous sub + prototype or sig:  )->then( sub ($code) {
7308             # ... but it seems best not to stack two structural blocks, like
7309             # this
7310             #    sub make_anon_with_my_sub { sub {
7311             # because it probably hides the structure a little too much.
7312             || (   $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] eq 'sub'
7313                 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
7314                 && !$rLL->[$K_outer_opening]->[_BLOCK_TYPE_] )
7315           )
7316         {
7317             push @nested_pairs,
7318               [ $inner_seqno, $outer_seqno, $K_inner_closing ];
7319         }
7320         next;
7321     }
7322
7323     # The weld routine expects the pairs in order in the form
7324     #   [$seqno_inner, $seqno_outer]
7325     # And they must be in the same order as the inner closing tokens
7326     # (otherwise, welds of three or more adjacent tokens will not work).  The K
7327     # value of this inner closing token has temporarily been stored for
7328     # sorting.
7329     @nested_pairs =
7330
7331       # Drop the K index after sorting (it would cause trouble downstream)
7332       map { [ $_->[0], $_->[1] ] }
7333
7334       # Sort on the K values
7335       sort { $a->[2] <=> $b->[2] } @nested_pairs;
7336
7337     return \@nested_pairs;
7338 }
7339
7340 sub is_excluded_weld {
7341
7342     # decide if this weld is excluded by user request
7343     my ( $self, $KK, $is_leading ) = @_;
7344     my $rLL         = $self->[_rLL_];
7345     my $rtoken_vars = $rLL->[$KK];
7346     my $token       = $rtoken_vars->[_TOKEN_];
7347     my $rflags      = $weld_nested_exclusion_rules{$token};
7348     return 0 unless ( defined($rflags) );
7349     my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
7350     return 0 unless ( defined($flag) );
7351     return 1 if $flag eq '*';
7352
7353     my ( $is_f, $is_k, $is_w );
7354     my $Kp = $self->K_previous_nonblank($KK);
7355     if ( defined($Kp) ) {
7356         my $seqno  = $rtoken_vars->[_TYPE_SEQUENCE_];
7357         my $type_p = $rLL->[$Kp]->[_TYPE_];
7358
7359         # keyword?
7360         $is_k = $type_p eq 'k';
7361
7362         # function call?
7363         $is_f = $self->[_ris_function_call_paren_]->{$seqno};
7364
7365         # either keyword or function call?
7366         $is_w = $is_k || $is_f;
7367     }
7368
7369     my $match;
7370     if    ( $flag eq 'k' ) { $match = $is_k }
7371     elsif ( $flag eq 'K' ) { $match = !$is_k }
7372     elsif ( $flag eq 'f' ) { $match = $is_f }
7373     elsif ( $flag eq 'F' ) { $match = !$is_f }
7374     elsif ( $flag eq 'w' ) { $match = $is_w }
7375     elsif ( $flag eq 'W' ) { $match = !$is_w }
7376     return $match;
7377 }
7378
7379 # types needed for welding RULE 6
7380 my %type_ok_after_bareword;
7381
7382 BEGIN {
7383
7384     my @q = qw# => -> { ( [ #;
7385     @type_ok_after_bareword{@q} = (1) x scalar(@q);
7386 }
7387
7388 use constant DEBUG_WELD => 0;
7389
7390 sub setup_new_weld_measurements {
7391
7392     # Define quantities to check for excess line lengths when welded.
7393     # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
7394
7395     my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
7396
7397     # Given indexes of outer and inner opening containers to be welded:
7398     #   $Kouter_opening, $Kinner_opening
7399
7400     # Returns these variables:
7401     #   $new_weld_ok = true (new weld ok) or false (do not start new weld)
7402     #   $starting_indent = starting indentation
7403     #   $starting_lentot = starting cumulative length
7404     #   $msg = diagnostic message for debugging
7405
7406     my $rLL    = $self->[_rLL_];
7407     my $rlines = $self->[_rlines_];
7408
7409     my $starting_level;
7410     my $starting_ci;
7411     my $starting_lentot;
7412     my $maximum_text_length;
7413     my $msg = "";
7414
7415     my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
7416     my $rK_range = $rlines->[$iline_oo]->{_rK_range};
7417     my ( $Kfirst, $Klast ) = @{$rK_range};
7418
7419     # Define a reference index from which to start measuring
7420     my $Kref  = $Kfirst;
7421     my $Kprev = $self->K_previous_nonblank($Kfirst);
7422     if ( defined($Kprev) ) {
7423
7424         # The -iob and -wn flags do not work well together. To avoid
7425         # blinking states we have to override -iob at certain key line
7426         # breaks.
7427         $self->[_ris_essential_old_breakpoint_]->{$Kprev} = 1;
7428
7429         # Back up and count length from a token like '=' or '=>' if -lp
7430         # is used (this fixes b520)
7431         # ...or if a break is wanted before there
7432         my $type_prev = $rLL->[$Kprev]->[_TYPE_];
7433         if (   $rOpts_line_up_parentheses
7434             || $want_break_before{$type_prev} )
7435         {
7436             if ( substr( $type_prev, 0, 1 ) eq '=' ) {
7437                 $Kref = $Kprev;
7438
7439                 # Fix for b1144 and b1112: backup to the first nonblank
7440                 # character before the =>, or to the start of its line.
7441                 if ( $type_prev eq '=>' ) {
7442                     my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
7443                     my $rK_range   = $rlines->[$iline_prev]->{_rK_range};
7444                     my ( $Kfirst, $Klast ) = @{$rK_range};
7445                     for ( my $KK = $Kref - 1 ; $KK >= $Kfirst ; $KK-- ) {
7446                         next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
7447                         $Kref = $KK;
7448                         last;
7449                     }
7450                 }
7451             }
7452         }
7453     }
7454
7455     # Define the starting measurements we will need
7456     $starting_lentot =
7457       $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
7458     $starting_level = $rLL->[$Kref]->[_LEVEL_];
7459     $starting_ci    = $rLL->[$Kref]->[_CI_LEVEL_];
7460
7461     $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
7462       $starting_ci * $rOpts_continuation_indentation;
7463
7464     # Now fix these if necessary to avoid known problems...
7465
7466     # FIX1: Switch to using the outer opening token as the reference
7467     # point if a line break before it would make a longer line.
7468     # Fixes case b1055 and is also an alternate fix for b1065.
7469     my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
7470     if ( $Kref < $Kouter_opening ) {
7471         my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
7472         my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
7473         my $maximum_text_length_oo =
7474           $maximum_text_length_at_level[$starting_level_oo] -
7475           $starting_ci_oo * $rOpts_continuation_indentation;
7476
7477         # The excess length to any cumulative length K = lenK is either
7478         #     $excess = $lenk - ($lentot    + $maximum_text_length),     or
7479         #     $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
7480         # so the worst case (maximum excess) corresponds to the configuration
7481         # with minimum value of the sum: $lentot + $maximum_text_length
7482         if ( $lentot_oo + $maximum_text_length_oo <
7483             $starting_lentot + $maximum_text_length )
7484         {
7485             $Kref                = $Kouter_opening;
7486             $starting_level      = $starting_level_oo;
7487             $starting_ci         = $starting_ci_oo;
7488             $starting_lentot     = $lentot_oo;
7489             $maximum_text_length = $maximum_text_length_oo;
7490         }
7491     }
7492
7493     my $new_weld_ok = 1;
7494
7495     # FIX2 for b1020: Avoid problem areas with the -wn -lp combination.  The
7496     # combination -wn -lp -dws -naws does not work well and can cause blinkers.
7497     # It will probably only occur in stress testing.  For this situation we
7498     # will only start a new weld if we start at a 'good' location.
7499     # - Added 'if' to fix case b1032.
7500     # - Require blank before certain previous characters to fix b1111.
7501     # - Add ';' to fix case b1139
7502     # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
7503     if (   $starting_ci
7504         && $rOpts_line_up_parentheses
7505         && $rOpts_delete_old_whitespace
7506         && !$rOpts_add_whitespace
7507         && defined($Kprev) )
7508     {
7509         my $type_first  = $rLL->[$Kfirst]->[_TYPE_];
7510         my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
7511         my $type_prev   = $rLL->[$Kprev]->[_TYPE_];
7512         my $type_pp     = 'b';
7513         if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
7514         unless (
7515                $type_prev  =~ /^[\,\.\;]/
7516             || $type_prev  =~ /^[=\{\[\(\L]/ && $type_pp eq 'b'
7517             || $type_first =~ /^[=\,\.\;\{\[\(\L]/
7518             || $type_first eq '||'
7519             || (   $type_first eq 'k' && $token_first eq 'if'
7520                 || $token_first eq 'or' )
7521           )
7522         {
7523             $msg =
7524 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev'\n";
7525             $new_weld_ok = 0;
7526         }
7527     }
7528
7529     return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
7530 }
7531
7532 sub excess_line_length_for_Krange {
7533     my ( $self, $Kfirst, $Klast ) = @_;
7534
7535     # returns $excess_length =
7536     #   by how many characters a line composed of tokens $Kfirst .. $Klast will
7537     #   exceed the allowed line length
7538
7539     my $rLL = $self->[_rLL_];
7540     my $length_before_Kfirst =
7541       $Kfirst <= 0
7542       ? 0
7543       : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
7544
7545     # backup before a side comment if necessary
7546     my $Kend = $Klast;
7547     if (   $rOpts_ignore_side_comment_lengths
7548         && $rLL->[$Klast]->[_TYPE_] eq '#' )
7549     {
7550         my $Kprev = $self->K_previous_nonblank($Klast);
7551         if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
7552     }
7553
7554     # get the length of the text
7555     my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
7556
7557     # get the size of the text window
7558     my $level           = $rLL->[$Kfirst]->[_LEVEL_];
7559     my $ci_level        = $rLL->[$Kfirst]->[_CI_LEVEL_];
7560     my $max_text_length = $maximum_text_length_at_level[$level] -
7561       $ci_level * $rOpts_continuation_indentation;
7562
7563     my $excess_length = $length - $max_text_length;
7564
7565     DEBUG_WELD
7566       && print
7567 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
7568     return ($excess_length);
7569 }
7570
7571 sub weld_nested_containers {
7572     my ($self) = @_;
7573
7574     # Called once per file for option '--weld-nested-containers'
7575
7576     my $rK_weld_left  = $self->[_rK_weld_left_];
7577     my $rK_weld_right = $self->[_rK_weld_right_];
7578
7579     # This routine implements the -wn flag by "welding together"
7580     # the nested closing and opening tokens which were previously
7581     # identified by sub 'find_nested_pairs'.  "welding" simply
7582     # involves setting certain hash values which will be checked
7583     # later during formatting.
7584
7585     my $rLL                 = $self->[_rLL_];
7586     my $rlines              = $self->[_rlines_];
7587     my $K_opening_container = $self->[_K_opening_container_];
7588     my $K_closing_container = $self->[_K_closing_container_];
7589
7590     # Find nested pairs of container tokens for any welding.
7591     my $rnested_pairs = $self->find_nested_pairs();
7592
7593     # Return unless there are nested pairs to weld
7594     return unless defined($rnested_pairs) && @{$rnested_pairs};
7595
7596     my $rOpts_break_at_old_method_breakpoints =
7597       $rOpts->{'break-at-old-method-breakpoints'};
7598
7599     # This array will hold the sequence numbers of the tokens to be welded.
7600     my @welds;
7601
7602     # Variables needed for estimating line lengths
7603     my $maximum_text_length;    # maximum spaces available for text
7604     my $starting_lentot;        # cumulative text to start of current line
7605
7606     my $iline_outer_opening   = -1;
7607     my $weld_count_this_start = 0;
7608
7609     my $multiline_tol =
7610       1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
7611
7612     my $length_to_opening_seqno = sub {
7613         my ($seqno) = @_;
7614         my $KK      = $K_opening_container->{$seqno};
7615         my $lentot  = defined($KK)
7616           && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
7617         return $lentot;
7618     };
7619
7620     my $length_to_closing_seqno = sub {
7621         my ($seqno) = @_;
7622         my $KK      = $K_closing_container->{$seqno};
7623         my $lentot  = defined($KK)
7624           && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
7625         return $lentot;
7626     };
7627
7628     # Abbreviations:
7629     #  _oo=outer opening, i.e. first of  { {
7630     #  _io=inner opening, i.e. second of { {
7631     #  _oc=outer closing, i.e. second of } {
7632     #  _ic=inner closing, i.e. first of  } }
7633
7634     my $previous_pair;
7635
7636     # Main loop over nested pairs...
7637     # We are working from outermost to innermost pairs so that
7638     # level changes will be complete when we arrive at the inner pairs.
7639     while ( my $item = pop( @{$rnested_pairs} ) ) {
7640         my ( $inner_seqno, $outer_seqno ) = @{$item};
7641
7642         my $Kouter_opening = $K_opening_container->{$outer_seqno};
7643         my $Kinner_opening = $K_opening_container->{$inner_seqno};
7644         my $Kouter_closing = $K_closing_container->{$outer_seqno};
7645         my $Kinner_closing = $K_closing_container->{$inner_seqno};
7646
7647         # RULE: do not weld if inner container has <= 3 tokens unless the next
7648         # token is a heredoc (so we know there will be multiple lines)
7649         if ( $Kinner_closing - $Kinner_opening <= 4 ) {
7650             my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
7651             next unless defined($Knext_nonblank);
7652             my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
7653             next unless ( $type eq 'h' );
7654         }
7655
7656         my $outer_opening = $rLL->[$Kouter_opening];
7657         my $inner_opening = $rLL->[$Kinner_opening];
7658         my $outer_closing = $rLL->[$Kouter_closing];
7659         my $inner_closing = $rLL->[$Kinner_closing];
7660
7661         # RULE: do not weld to a hash brace.  The reason is that it has a very
7662         # strong bond strength to the next token, so a line break after it
7663         # may not work.  Previously we allowed welding to something like @{
7664         # but that caused blinking states (cases b751, b779).
7665         if ( $inner_opening->[_TYPE_] eq 'L' ) {
7666             next;
7667         }
7668
7669         # RULE: do not weld to a square bracket which does not contain commas
7670         if ( $inner_opening->[_TYPE_] eq '[' ) {
7671             my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
7672             next unless ($rtype_count);
7673             my $comma_count = $rtype_count->{','};
7674             next unless ($comma_count);
7675
7676             # Do not weld if there is text before a '[' such as here:
7677             #      curr_opt ( @beg [2,5] )
7678             # It will not break into the desired sandwich structure.
7679             # This fixes case b109, 110.
7680             my $Kdiff = $Kinner_opening - $Kouter_opening;
7681             next if ( $Kdiff > 2 );
7682             next
7683               if ( $Kdiff == 2
7684                 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
7685
7686         }
7687
7688         # Set flag saying if this pair starts a new weld
7689         my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
7690
7691         # Set flag saying if this pair is adjacent to the previous nesting pair
7692         # (even if previous pair was rejected as a weld)
7693         my $touch_previous_pair =
7694           defined($previous_pair) && $outer_seqno == $previous_pair->[0];
7695         $previous_pair = $item;
7696
7697         my $do_not_weld_rule = 0;
7698         my $Msg              = "";
7699         my $is_one_line_weld;
7700
7701         my $iline_oo = $outer_opening->[_LINE_INDEX_];
7702         my $iline_io = $inner_opening->[_LINE_INDEX_];
7703         my $iline_ic = $inner_closing->[_LINE_INDEX_];
7704         my $iline_oc = $outer_closing->[_LINE_INDEX_];
7705         my $token_oo = $outer_opening->[_TOKEN_];
7706
7707         my $is_multiline_weld =
7708              $iline_oo == $iline_io
7709           && $iline_ic == $iline_oc
7710           && $iline_io != $iline_ic;
7711
7712         if (DEBUG_WELD) {
7713             my $token_io = $rLL->[$Kinner_opening]->[_TOKEN_];
7714             my $len_oo   = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
7715             my $len_io   = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
7716             $Msg .= <<EOM;
7717 Pair seqo=$outer_seqno seqi=$inner_seqno  lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
7718 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
7719 tokens '$token_oo' .. '$token_io'
7720 EOM
7721         }
7722
7723         # If this pair is not adjacent to the previous pair (skipped or not),
7724         # then measure lengths from the start of line of oo.
7725         if (
7726             !$touch_previous_pair
7727
7728             # Also do this if restarting at a new line; fixes case b965, s001
7729             || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
7730           )
7731         {
7732
7733             # Remember the line we are using as a reference
7734             $iline_outer_opening   = $iline_oo;
7735             $weld_count_this_start = 0;
7736
7737             ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
7738               = $self->setup_new_weld_measurements( $Kouter_opening,
7739                 $Kinner_opening );
7740
7741             if (
7742                 !$new_weld_ok
7743                 && (   $iline_oo != $iline_io
7744                     || $iline_ic != $iline_oc )
7745               )
7746             {
7747                 if (DEBUG_WELD) { print $msg}
7748                 next;
7749             }
7750
7751             my $rK_range = $rlines->[$iline_oo]->{_rK_range};
7752             my ( $Kfirst, $Klast ) = @{$rK_range};
7753
7754             # An existing one-line weld is a line in which
7755             # (1) the containers are all on one line, and
7756             # (2) the line does not exceed the allowable length, and
7757             # This flag is used to avoid creating blinkers.
7758             # FIX1: Changed 'excess_length_to_K' to 'excess_length_of_line'
7759             # to get exact lengths and fix b604 b605.
7760             if ( $iline_oo == $iline_oc ) {
7761
7762                 # All the tokens are on one line, now check their length
7763                 my $excess =
7764                   $self->excess_line_length_for_Krange( $Kfirst, $Klast );
7765                 if ( $excess <= 0 ) {
7766
7767                     # All tokens are on one line and fit. This is a valid
7768                     # existing one-line weld except for some edge cases
7769                     # involving -lp:
7770
7771                     # FIX2: Patch for b1114: add a tolerance of one level if
7772                     # this line has an unbalanced start.  This helps prevent
7773                     # blinkers in unusual cases for lines near the length limit
7774                     # by making it more likely that RULE 2 will prevent a weld.
7775                     # FIX3: for b1131: only use level difference in -lp mode.
7776                     # FIX4: for b1141, b1142: reduce the tolerance for longer
7777                     # leading tokens
7778                     if (   $rOpts_line_up_parentheses
7779                         && $outer_opening->[_LEVEL_] -
7780                         $rLL->[$Kfirst]->[_LEVEL_] )
7781                     {
7782
7783                         # We only need a tolerance if the leading text before
7784                         # the first opening token is shorter than the
7785                         # indentation length.  For simplicity we just use the
7786                         # length of the first token here.  If necessary, we
7787                         # could be more exact in the future and find the
7788                         # total length up to the first opening token.
7789                         # See cases b1114, b1141, b1142.
7790                         my $tolx = max( 0,
7791                             $rOpts_indent_columns -
7792                               $rLL->[$Kfirst]->[_TOKEN_LENGTH_] );
7793
7794                         if ( $excess + $tolx <= 0 ) {
7795                             $is_one_line_weld = 1;
7796                         }
7797                     }
7798                     else {
7799                         $is_one_line_weld = 1;
7800                     }
7801                 }
7802             }
7803
7804             # DO-NOT-WELD RULE 1:
7805             # Do not weld something that looks like the start of a two-line
7806             # function call, like this: <<snippets/wn6.in>>
7807             #    $trans->add_transformation(
7808             #        PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
7809             # We will look for a semicolon after the closing paren.
7810
7811             # We want to weld something complex, like this though
7812             # my $compass = uc( opposite_direction( line_to_canvas_direction(
7813             #     @{ $coords[0] }, @{ $coords[1] } ) ) );
7814             # Otherwise we will get a 'blinker'. For example, the following
7815             # would become a blinker without this rule:
7816             #        $Self->_Add( $SortOrderDisplay{ $Field
7817             #              ->GenerateFieldForSelectSQL() } );
7818             # But it is okay to weld a two-line statement if it looks like
7819             # it was already welded, meaning that the two opening containers are
7820             # on a different line that the two closing containers.  This is
7821             # necessary to prevent blinking of something like this with
7822             # perltidy -wn -pbp (starting indentation two levels deep):
7823
7824             # $top_label->set_text( gettext(
7825             #    "Unable to create personal directory - check permissions.") );
7826
7827             if (   $iline_oc == $iline_oo + 1
7828                 && $iline_io == $iline_ic
7829                 && $token_oo eq '(' )
7830             {
7831
7832                 # Look for following semicolon...
7833                 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
7834                 my $next_nonblank_type =
7835                   defined($Knext_nonblank)
7836                   ? $rLL->[$Knext_nonblank]->[_TYPE_]
7837                   : 'b';
7838                 if ( $next_nonblank_type eq ';' ) {
7839
7840                     # Then do not weld if no other containers between inner
7841                     # opening and closing.
7842                     my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
7843                     if ( $Knext_seq_item == $Kinner_closing ) {
7844                         $do_not_weld_rule = 1;
7845                     }
7846                 }
7847             }
7848         } ## end starting new weld sequence
7849
7850         # DO-NOT-WELD RULE 2:
7851         # Do not weld an opening paren to an inner one line brace block
7852         # We will just use old line numbers for this test and require
7853         # iterations if necessary for convergence
7854
7855         # For example, otherwise we could cause the opening paren
7856         # in the following example to separate from the caller name
7857         # as here:
7858
7859         #    $_[0]->code_handler
7860         #       ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
7861
7862         # Here is another example where we do not want to weld:
7863         #  $wrapped->add_around_modifier(
7864         #    sub { push @tracelog => 'around 1'; $_[0]->(); } );
7865
7866         # If the one line sub block gets broken due to length or by the
7867         # user, then we can weld.  The result will then be:
7868         # $wrapped->add_around_modifier( sub {
7869         #    push @tracelog => 'around 1';
7870         #    $_[0]->();
7871         # } );
7872
7873         # Updated to fix cases b1082 b1102 b1106 b1115:
7874         # Also, do not weld to an intact inner block if the outer opening token
7875         # is on a different line. For example, this prevents oscillation
7876         # between these two states in case b1106:
7877
7878         #    return map{
7879         #        ($_,[$self->$_(@_[1..$#_])])
7880         #    }@every;
7881
7882         #    return map { (
7883         #        $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
7884         #    ) } @every;
7885
7886         # The effect of this change on typical code is very minimal.  Sometimes
7887         # it may take a second iteration to converge, but this gives protection
7888         # against blinking.
7889
7890         if (   !$do_not_weld_rule
7891             && !$is_one_line_weld
7892             && $iline_ic == $iline_io )
7893         {
7894             $do_not_weld_rule = 2
7895               if ( $token_oo eq '(' || $iline_oo != $iline_io );
7896         }
7897
7898         # DO-NOT-WELD RULE 3:
7899         # Do not weld if this makes our line too long.
7900         # Use a tolerance which depends on if the old tokens were welded
7901         # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
7902         if ( !$do_not_weld_rule ) {
7903
7904             # Measure to a little beyond the inner opening token if it is
7905             # followed by a bare word, which may have unusual line break rules.
7906
7907             # NOTE: Originally this was OLD RULE 6: do not weld to a container
7908             # which is followed on the same line by an unknown bareword token.
7909             # This can cause blinkers (cases b626, b611).  But OK to weld one
7910             # line welds to fix cases b1057 b1064.  For generality, OLD RULE 6
7911             # has been merged into RULE 3 here to also fix cases b1078 b1091.
7912
7913             my $K_for_length = $Kinner_opening;
7914             my $Knext_io     = $self->K_next_nonblank($Kinner_opening);
7915             next unless ( defined($Knext_io) );    # shouldn't happen
7916             my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
7917
7918             # Note: may need to eventually also include other types here,
7919             # such as 'Z' and 'Y':   if ($type_io_next =~ /^[ZYw]$/) {
7920             if ( $type_io_next eq 'w' ) {
7921                 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
7922                 next unless ( defined($Knext_io2) );
7923                 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
7924                 if ( !$type_ok_after_bareword{$type_io_next2} ) {
7925                     $K_for_length = $Knext_io2;
7926                 }
7927             }
7928
7929             # Use a tolerance for welds over multiple lines to avoid blinkers.
7930             # We can use zero tolerance if it looks like we are working on an
7931             # existing weld.
7932             my $tol =
7933               $is_one_line_weld || $is_multiline_weld
7934               ? 0
7935               : $multiline_tol;
7936
7937             # By how many characters does this exceed the text window?
7938             my $excess =
7939               $self->cumulative_length_before_K($K_for_length) -
7940               $starting_lentot + 1 + $tol -
7941               $maximum_text_length;
7942
7943             # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
7944             # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
7945             # Revised patch: New tolerance definition allows going back to '> 0'
7946             # here.  This fixes case b1124.  See also cases b1087 and b1087a.
7947             if ( $excess > 0 ) { $do_not_weld_rule = 3 }
7948
7949             if (DEBUG_WELD) {
7950                 $Msg .=
7951 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
7952             }
7953         }
7954
7955         # DO-NOT-WELD RULE 4; implemented for git#10:
7956         # Do not weld an opening -ce brace if the next container is on a single
7957         # line, different from the opening brace. (This is very rare).  For
7958         # example, given the following with -ce, we will avoid joining the {
7959         # and [
7960
7961         #  } else {
7962         #      [ $_, length($_) ]
7963         #  }
7964
7965         # because this would produce a terminal one-line block:
7966
7967         #  } else { [ $_, length($_) ]  }
7968
7969         # which may not be what is desired. But given this input:
7970
7971         #  } else { [ $_, length($_) ]  }
7972
7973         # then we will do the weld and retain the one-line block
7974         if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
7975             my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
7976             if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
7977                 my $io_line = $inner_opening->[_LINE_INDEX_];
7978                 my $ic_line = $inner_closing->[_LINE_INDEX_];
7979                 my $oo_line = $outer_opening->[_LINE_INDEX_];
7980                 if ( $oo_line < $io_line && $ic_line == $io_line ) {
7981                     $do_not_weld_rule = 4;
7982                 }
7983             }
7984         }
7985
7986         # DO-NOT-WELD RULE 5: do not include welds excluded by user
7987         if (
7988               !$do_not_weld_rule
7989             && %weld_nested_exclusion_rules
7990             && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
7991                 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
7992           )
7993         {
7994             $do_not_weld_rule = 5;
7995         }
7996
7997         # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
7998
7999         # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
8000         # (case b973)
8001         if (  !$do_not_weld_rule
8002             && $rOpts_break_at_old_method_breakpoints
8003             && $iline_io > $iline_oo )
8004         {
8005
8006             foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
8007                 my $rK_range = $rlines->[$iline]->{_rK_range};
8008                 next unless defined($rK_range);
8009                 my ( $Kfirst, $Klast ) = @{$rK_range};
8010                 next unless defined($Kfirst);
8011                 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
8012                     $do_not_weld_rule = 7;
8013                     last;
8014                 }
8015             }
8016         }
8017
8018         if ($do_not_weld_rule) {
8019
8020             # After neglecting a pair, we start measuring from start of point io
8021             my $starting_level    = $inner_opening->[_LEVEL_];
8022             my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
8023             $starting_lentot =
8024               $self->cumulative_length_before_K($Kinner_opening);
8025             $maximum_text_length =
8026               $maximum_text_length_at_level[$starting_level] -
8027               $starting_ci_level * $rOpts_continuation_indentation;
8028
8029             if (DEBUG_WELD) {
8030                 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
8031                 print $Msg;
8032             }
8033
8034             # Normally, a broken pair should not decrease indentation of
8035             # intermediate tokens:
8036             ##      if ( $last_pair_broken ) { next }
8037             # However, for long strings of welded tokens, such as '{{{{{{...'
8038             # we will allow broken pairs to also remove indentation.
8039             # This will keep very long strings of opening and closing
8040             # braces from marching off to the right.  We will do this if the
8041             # number of tokens in a weld before the broken weld is 4 or more.
8042             # This rule will mainly be needed for test scripts, since typical
8043             # welds have fewer than about 4 welded tokens.
8044             if ( !@welds || @{ $welds[-1] } < 4 ) { next }
8045         }
8046
8047         # otherwise start new weld ...
8048         elsif ($starting_new_weld) {
8049             $weld_count_this_start++;
8050             if (DEBUG_WELD) {
8051                 $Msg .= "Starting new weld\n";
8052                 print $Msg;
8053             }
8054             push @welds, $item;
8055
8056             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
8057             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
8058
8059             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
8060             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
8061         }
8062
8063         # ... or extend current weld
8064         else {
8065             $weld_count_this_start++;
8066             if (DEBUG_WELD) {
8067                 $Msg .= "Extending current weld\n";
8068                 print $Msg;
8069             }
8070             unshift @{ $welds[-1] }, $inner_seqno;
8071             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
8072             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
8073
8074             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
8075             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
8076         }
8077
8078         # After welding, reduce the indentation level if all intermediate tokens
8079         my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
8080         if ( $dlevel != 0 ) {
8081             my $Kstart = $Kinner_opening;
8082             my $Kstop  = $Kinner_closing;
8083             for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
8084                 $rLL->[$KK]->[_LEVEL_] += $dlevel;
8085             }
8086
8087             # Copy opening ci level to help break at = for -lp mode (case b1124)
8088             $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
8089               $rLL->[$Kouter_opening]->[_CI_LEVEL_];
8090
8091             # But do not copy the closing ci level ... it can give poor results
8092             ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
8093             ##  $rLL->[$Kouter_closing]->[_CI_LEVEL_];
8094         }
8095     }
8096
8097     return;
8098 }
8099
8100 sub weld_nested_quotes {
8101
8102     # Called once per file for option '--weld-nested-containers'. This
8103     # does welding on qw quotes.
8104
8105     my $self = shift;
8106
8107     # See if quotes are excluded from welding
8108     my $rflags = $weld_nested_exclusion_rules{'q'};
8109     return if ( defined($rflags) && defined( $rflags->[1] ) );
8110
8111     my $rK_weld_left  = $self->[_rK_weld_left_];
8112     my $rK_weld_right = $self->[_rK_weld_right_];
8113
8114     my $rLL = $self->[_rLL_];
8115     return unless ( defined($rLL) && @{$rLL} );
8116     my $Num = @{$rLL};
8117
8118     my $K_opening_container = $self->[_K_opening_container_];
8119     my $K_closing_container = $self->[_K_closing_container_];
8120     my $rlines              = $self->[_rlines_];
8121
8122     my $starting_lentot;
8123     my $maximum_text_length;
8124
8125     my $is_single_quote = sub {
8126         my ( $Kbeg, $Kend, $quote_type ) = @_;
8127         foreach my $K ( $Kbeg .. $Kend ) {
8128             my $test_type = $rLL->[$K]->[_TYPE_];
8129             next   if ( $test_type eq 'b' );
8130             return if ( $test_type ne $quote_type );
8131         }
8132         return 1;
8133     };
8134
8135     # Length tolerance - same as previously used for sub weld_nested
8136     my $multiline_tol =
8137       1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
8138
8139     # look for single qw quotes nested in containers
8140     my $KNEXT = $self->[_K_first_seq_item_];
8141     while ( defined($KNEXT) ) {
8142         my $KK = $KNEXT;
8143         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8144         my $rtoken_vars = $rLL->[$KK];
8145         my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
8146         if ( !$outer_seqno ) {
8147             next if ( $KK == 0 );    # first token in file may not be container
8148
8149             # A fault here implies that an error was made in the little loop at
8150             # the bottom of sub 'respace_tokens' which set the values of
8151             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
8152             # loop control lines above.
8153             Fault("sequence = $outer_seqno not defined at K=$KK");
8154         }
8155
8156         my $token = $rtoken_vars->[_TOKEN_];
8157         if ( $is_opening_token{$token} ) {
8158
8159             # see if the next token is a quote of some type
8160             my $Kn = $KK + 1;
8161             $Kn += 1
8162               if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
8163             next unless ( $Kn < $Num );
8164
8165             my $next_token = $rLL->[$Kn]->[_TOKEN_];
8166             my $next_type  = $rLL->[$Kn]->[_TYPE_];
8167             next
8168               unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
8169                 && $next_token =~ /^q/ );
8170
8171             # The token before the closing container must also be a quote
8172             my $Kouter_closing = $K_closing_container->{$outer_seqno};
8173             my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
8174             next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
8175
8176             # This is an inner opening container
8177             my $Kinner_opening = $Kn;
8178
8179             # Do not weld to single-line quotes. Nothing is gained, and it may
8180             # look bad.
8181             next if ( $Kinner_closing == $Kinner_opening );
8182
8183             # Only weld to quotes delimited with container tokens. This is
8184             # because welding to arbitrary quote delimiters can produce code
8185             # which is less readable than without welding.
8186             my $closing_delimiter =
8187               substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
8188             next
8189               unless ( $is_closing_token{$closing_delimiter}
8190                 || $closing_delimiter eq '>' );
8191
8192             # Now make sure that there is just a single quote in the container
8193             next
8194               unless (
8195                 $is_single_quote->(
8196                     $Kinner_opening + 1,
8197                     $Kinner_closing - 1,
8198                     $next_type
8199                 )
8200               );
8201
8202             # OK: This is a candidate for welding
8203             my $Msg = "";
8204             my $do_not_weld;
8205
8206             my $Kouter_opening = $K_opening_container->{$outer_seqno};
8207             my $iline_oo       = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
8208             my $iline_io       = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
8209             my $iline_oc       = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
8210             my $iline_ic       = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
8211             my $is_old_weld =
8212               ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
8213
8214             # If welded, the line must not exceed allowed line length
8215             ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
8216               = $self->setup_new_weld_measurements( $Kouter_opening,
8217                 $Kinner_opening );
8218             if ( !$ok_to_weld ) {
8219                 if (DEBUG_WELD) { print $msg}
8220                 next;
8221             }
8222
8223             my $length =
8224               $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
8225             my $excess = $length + $multiline_tol - $maximum_text_length;
8226
8227             my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
8228             if ( $excess >= $excess_max ) {
8229                 $do_not_weld = 1;
8230             }
8231
8232             if (DEBUG_WELD) {
8233                 if ( !$is_old_weld ) { $is_old_weld = "" }
8234                 $Msg .=
8235 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
8236             }
8237
8238             # Check weld exclusion rules for outer container
8239             if ( !$do_not_weld ) {
8240                 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
8241                 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
8242                     if (DEBUG_WELD) {
8243                         $Msg .=
8244 "No qw weld due to weld exclusion rules for outer container\n";
8245                     }
8246                     $do_not_weld = 1;
8247                 }
8248             }
8249
8250             # Check the length of the last line (fixes case b1039)
8251             if ( !$do_not_weld ) {
8252                 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
8253                 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
8254                 my $excess_ic =
8255                   $self->excess_line_length_for_Krange( $Kfirst_ic,
8256                     $Kouter_closing );
8257
8258                 # Allow extra space for additional welded closing container(s)
8259                 # and a space and comma or semicolon.
8260                 # NOTE: weld len has not been computed yet. Use 2 spaces
8261                 # for now, correct for a single weld. This estimate could
8262                 # be made more accurate if necessary.
8263                 my $weld_len =
8264                   defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
8265                 if ( $excess_ic + $weld_len + 2 > 0 ) {
8266                     if (DEBUG_WELD) {
8267                         $Msg .=
8268 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
8269                     }
8270                     $do_not_weld = 1;
8271                 }
8272             }
8273
8274             if ($do_not_weld) {
8275                 if (DEBUG_WELD) {
8276                     $Msg .= "Not Welding QW\n";
8277                     print $Msg;
8278                 }
8279                 next;
8280             }
8281
8282             # OK to weld
8283             if (DEBUG_WELD) {
8284                 $Msg .= "Welding QW\n";
8285                 print $Msg;
8286             }
8287
8288             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
8289             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
8290
8291             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
8292             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
8293
8294             # Undo one indentation level if an extra level was added to this
8295             # multiline quote
8296             my $qw_seqno =
8297               $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
8298             if (   $qw_seqno
8299                 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
8300             {
8301                 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
8302                     $rLL->[$K]->[_LEVEL_] -= 1;
8303                 }
8304                 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
8305                 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
8306             }
8307
8308             # undo CI for other welded quotes
8309             else {
8310
8311                 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
8312                     $rLL->[$K]->[_CI_LEVEL_] = 0;
8313                 }
8314             }
8315
8316             # Change the level of a closing qw token to be that of the outer
8317             # containing token. This will allow -lp indentation to function
8318             # correctly in the vertical aligner.
8319             # Patch to fix c002: but not if it contains text
8320             if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
8321                 $rLL->[$Kinner_closing]->[_LEVEL_] =
8322                   $rLL->[$Kouter_closing]->[_LEVEL_];
8323             }
8324         }
8325     }
8326     return;
8327 }
8328
8329 sub is_welded_right_at_i {
8330     my ( $self, $i ) = @_;
8331     return unless ( $total_weld_count && $i >= 0 );
8332
8333     # Back up at a blank.  This routine is sometimes called at blanks.
8334     # TODO: this routine can eventually be eliminated by setting the weld flags
8335     # for all K indexes between the start and end of a weld, not just at
8336     # sequenced items.
8337     if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
8338     return defined( $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
8339 }
8340
8341 sub is_welded_at_seqno {
8342
8343     my ( $self, $seqno ) = @_;
8344
8345     # given a sequence number:
8346     #   return true if it is welded either left or right
8347     #   return false otherwise
8348     return unless ( $total_weld_count && defined($seqno) );
8349     my $KK_o = $self->[_K_opening_container_]->{$seqno};
8350     return unless defined($KK_o);
8351     return defined( $self->[_rK_weld_left_]->{$KK_o} )
8352       || defined( $self->[_rK_weld_right_]->{$KK_o} );
8353 }
8354
8355 sub mark_short_nested_blocks {
8356
8357     # This routine looks at the entire file and marks any short nested blocks
8358     # which should not be broken.  The results are stored in the hash
8359     #     $rshort_nested->{$type_sequence}
8360     # which will be true if the container should remain intact.
8361     #
8362     # For example, consider the following line:
8363
8364     #   sub cxt_two { sort { $a <=> $b } test_if_list() }
8365
8366     # The 'sort' block is short and nested within an outer sub block.
8367     # Normally, the existence of the 'sort' block will force the sub block to
8368     # break open, but this is not always desirable. Here we will set a flag for
8369     # the sort block to prevent this.  To give the user control, we will
8370     # follow the input file formatting.  If either of the blocks is broken in
8371     # the input file then we will allow it to remain broken. Otherwise we will
8372     # set a flag to keep it together in later formatting steps.
8373
8374     # The flag which is set here will be checked in two places:
8375     # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
8376
8377     my $self = shift;
8378     return if $rOpts->{'indent-only'};
8379
8380     my $rLL = $self->[_rLL_];
8381     return unless ( defined($rLL) && @{$rLL} );
8382
8383     return unless ( $rOpts->{'one-line-block-nesting'} );
8384
8385     my $K_opening_container = $self->[_K_opening_container_];
8386     my $K_closing_container = $self->[_K_closing_container_];
8387     my $rbreak_container    = $self->[_rbreak_container_];
8388     my $rshort_nested       = $self->[_rshort_nested_];
8389     my $rlines              = $self->[_rlines_];
8390
8391     # Variables needed for estimating line lengths
8392     my $maximum_text_length;
8393     my $starting_lentot;
8394     my $length_tol = 1;
8395
8396     my $excess_length_to_K = sub {
8397         my ($K) = @_;
8398
8399         # Estimate the length from the line start to a given token
8400         my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
8401         my $excess_length = $length + $length_tol - $maximum_text_length;
8402         return ($excess_length);
8403     };
8404
8405     my $is_broken_block = sub {
8406
8407         # a block is broken if the input line numbers of the braces differ
8408         my ($seqno) = @_;
8409         my $K_opening = $K_opening_container->{$seqno};
8410         return unless ( defined($K_opening) );
8411         my $K_closing = $K_closing_container->{$seqno};
8412         return unless ( defined($K_closing) );
8413         return $rbreak_container->{$seqno}
8414           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8415           $rLL->[$K_opening]->[_LINE_INDEX_];
8416     };
8417
8418     # loop over all containers
8419     my @open_block_stack;
8420     my $iline = -1;
8421     my $KNEXT = $self->[_K_first_seq_item_];
8422     while ( defined($KNEXT) ) {
8423         my $KK = $KNEXT;
8424         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8425         my $rtoken_vars   = $rLL->[$KK];
8426         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8427         if ( !$type_sequence ) {
8428             next if ( $KK == 0 );    # first token in file may not be container
8429
8430             # A fault here implies that an error was made in the little loop at
8431             # the bottom of sub 'respace_tokens' which set the values of
8432             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
8433             # loop control lines above.
8434             Fault("sequence = $type_sequence not defined at K=$KK");
8435         }
8436
8437         # Patch: do not mark short blocks with welds.
8438         # In some cases blinkers can form (case b690).
8439         if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
8440             next;
8441         }
8442
8443         # We are just looking at code blocks
8444         my $token = $rtoken_vars->[_TOKEN_];
8445         my $type  = $rtoken_vars->[_TYPE_];
8446         next unless ( $type eq $token );
8447         my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
8448         next unless ($block_type);
8449
8450         # Keep a stack of all acceptable block braces seen.
8451         # Only consider blocks entirely on one line so dump the stack when line
8452         # changes.
8453         my $iline_last = $iline;
8454         $iline = $rLL->[$KK]->[_LINE_INDEX_];
8455         if ( $iline != $iline_last ) { @open_block_stack = () }
8456
8457         if ( $token eq '}' ) {
8458             if (@open_block_stack) { pop @open_block_stack }
8459         }
8460         next unless ( $token eq '{' );
8461
8462         # block must be balanced (bad scripts may be unbalanced)
8463         my $K_opening = $K_opening_container->{$type_sequence};
8464         my $K_closing = $K_closing_container->{$type_sequence};
8465         next unless ( defined($K_opening) && defined($K_closing) );
8466
8467         # require that this block be entirely on one line
8468         next if ( $is_broken_block->($type_sequence) );
8469
8470         # See if this block fits on one line of allowed length (which may
8471         # be different from the input script)
8472         $starting_lentot =
8473           $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8474         my $level    = $rLL->[$KK]->[_LEVEL_];
8475         my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
8476         $maximum_text_length =
8477           $maximum_text_length_at_level[$level] -
8478           $ci_level * $rOpts_continuation_indentation;
8479
8480         # Dump the stack if block is too long and skip this block
8481         if ( $excess_length_to_K->($K_closing) > 0 ) {
8482             @open_block_stack = ();
8483             next;
8484         }
8485
8486         # OK, Block passes tests, remember it
8487         push @open_block_stack, $type_sequence;
8488
8489         # We are only marking nested code blocks,
8490         # so check for a previous block on the stack
8491         next unless ( @open_block_stack > 1 );
8492
8493         # Looks OK, mark this as a short nested block
8494         $rshort_nested->{$type_sequence} = 1;
8495
8496     }
8497     return;
8498 }
8499
8500 sub adjust_indentation_levels {
8501
8502     my ($self) = @_;
8503
8504     # Called once per file to do special indentation adjustments.
8505     # These routines adjust levels either by changing _CI_LEVEL_ directly or
8506     # by setting modified levels in the array $self->[_radjusted_levels_].
8507
8508     # Initialize the adjusted levels. These will be the levels actually used
8509     # for computing indentation.
8510
8511     # NOTE: This routine is called after the weld routines, which may have
8512     # already adjusted _LEVEL_, so we are making adjustments on top of those
8513     # levels.  It would be much nicer to have the weld routines also use this
8514     # adjustment, but that gets complicated when we combine -gnu -wn and have
8515     # some welded quotes.
8516     my $radjusted_levels = $self->[_radjusted_levels_];
8517     my $rLL              = $self->[_rLL_];
8518     foreach my $KK ( 0 .. @{$rLL} - 1 ) {
8519         $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
8520     }
8521
8522     # First set adjusted levels for any non-indenting braces.
8523     $self->non_indenting_braces();
8524
8525     # Adjust breaks and indentation list containers
8526     $self->break_before_list_opening_containers();
8527
8528     # Set adjusted levels for the whitespace cycle option.
8529     $self->whitespace_cycle_adjustment();
8530
8531     # Adjust continuation indentation if -bli is set
8532     $self->bli_adjustment();
8533
8534     $self->extended_ci()
8535       if ( $rOpts->{'extended-continuation-indentation'} );
8536
8537     # Now clip any adjusted levels to be non-negative
8538     $self->clip_adjusted_levels();
8539
8540     return;
8541 }
8542
8543 sub clip_adjusted_levels {
8544
8545     # Replace any negative adjusted levels with zero.
8546     # Negative levels can occur in files with brace errors.
8547     my ($self) = @_;
8548     my $radjusted_levels = $self->[_radjusted_levels_];
8549     return unless defined($radjusted_levels) && @{$radjusted_levels};
8550     foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
8551     return;
8552 }
8553
8554 sub non_indenting_braces {
8555
8556     # Called once per file to handle the --non-indenting-braces parameter.
8557     # Remove indentation within marked braces if requested
8558     my ($self) = @_;
8559     return unless ( $rOpts->{'non-indenting-braces'} );
8560
8561     my $rLL = $self->[_rLL_];
8562     return unless ( defined($rLL) && @{$rLL} );
8563
8564     my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
8565
8566     my $radjusted_levels = $self->[_radjusted_levels_];
8567     my $Kmax             = @{$rLL} - 1;
8568     my @seqno_stack;
8569
8570     my $is_non_indenting_brace = sub {
8571         my ($KK) = @_;
8572
8573         # looking for an opening block brace
8574         my $token      = $rLL->[$KK]->[_TOKEN_];
8575         my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
8576         return unless ( $token eq '{' && $block_type );
8577
8578         # followed by a comment
8579         my $K_sc = $KK + 1;
8580         $K_sc += 1
8581           if ( $K_sc <= $Kmax && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
8582         return unless ( $K_sc <= $Kmax );
8583         my $type_sc = $rLL->[$K_sc]->[_TYPE_];
8584         return unless ( $type_sc eq '#' );
8585
8586         # on the same line
8587         my $line_index    = $rLL->[$KK]->[_LINE_INDEX_];
8588         my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
8589         return unless ( $line_index_sc == $line_index );
8590
8591         # get the side comment text
8592         my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
8593
8594         # The pattern ends in \s but we have removed the newline, so
8595         # we added it back for the match. That way we require an exact
8596         # match to the special string and also allow additional text.
8597         $token_sc .= "\n";
8598         my $is_nib = ( $token_sc =~ /$non_indenting_brace_pattern/ );
8599         if ($is_nib) { $rspecial_side_comment_type->{$K_sc} = 'NIB' }
8600         return $is_nib;
8601     };
8602
8603     foreach my $KK ( 0 .. $Kmax ) {
8604         my $num   = @seqno_stack;
8605         my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8606         if ($seqno) {
8607             my $token = $rLL->[$KK]->[_TOKEN_];
8608             if ( $token eq '{' && $is_non_indenting_brace->($KK) ) {
8609                 push @seqno_stack, $seqno;
8610             }
8611             if ( $token eq '}' && @seqno_stack && $seqno_stack[-1] == $seqno ) {
8612                 pop @seqno_stack;
8613                 $num -= 1;
8614             }
8615         }
8616         next unless $num;
8617         $radjusted_levels->[$KK] -= $num;
8618     }
8619     return;
8620 }
8621
8622 sub whitespace_cycle_adjustment {
8623
8624     my $self = shift;
8625
8626     # Called once per file to implement the --whitespace-cycle option
8627     my $rLL = $self->[_rLL_];
8628     return unless ( defined($rLL) && @{$rLL} );
8629     my $radjusted_levels = $self->[_radjusted_levels_];
8630
8631     my $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
8632     if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
8633
8634         my $Kmax = @{$rLL} - 1;
8635
8636         my $whitespace_last_level  = -1;
8637         my @whitespace_level_stack = ();
8638         my $last_nonblank_type     = 'b';
8639         my $last_nonblank_token    = '';
8640         foreach my $KK ( 0 .. $Kmax ) {
8641             my $level_abs = $radjusted_levels->[$KK];
8642             my $level     = $level_abs;
8643             if ( $level_abs < $whitespace_last_level ) {
8644                 pop(@whitespace_level_stack);
8645             }
8646             if ( !@whitespace_level_stack ) {
8647                 push @whitespace_level_stack, $level_abs;
8648             }
8649             elsif ( $level_abs > $whitespace_last_level ) {
8650                 $level = $whitespace_level_stack[-1] +
8651                   ( $level_abs - $whitespace_last_level );
8652
8653                 if (
8654                     # 1 Try to break at a block brace
8655                     (
8656                            $level > $rOpts_whitespace_cycle
8657                         && $last_nonblank_type eq '{'
8658                         && $last_nonblank_token eq '{'
8659                     )
8660
8661                     # 2 Then either a brace or bracket
8662                     || (   $level > $rOpts_whitespace_cycle + 1
8663                         && $last_nonblank_token =~ /^[\{\[]$/ )
8664
8665                     # 3 Then a paren too
8666                     || $level > $rOpts_whitespace_cycle + 2
8667                   )
8668                 {
8669                     $level = 1;
8670                 }
8671                 push @whitespace_level_stack, $level;
8672             }
8673             $level = $whitespace_level_stack[-1];
8674             $radjusted_levels->[$KK] = $level;
8675
8676             $whitespace_last_level = $level_abs;
8677             my $type  = $rLL->[$KK]->[_TYPE_];
8678             my $token = $rLL->[$KK]->[_TOKEN_];
8679             if ( $type ne 'b' ) {
8680                 $last_nonblank_type  = $type;
8681                 $last_nonblank_token = $token;
8682             }
8683         }
8684     }
8685     return;
8686 }
8687
8688 use constant DEBUG_BBX => 0;
8689
8690 sub break_before_list_opening_containers {
8691
8692     my ($self) = @_;
8693
8694     # This routine is called once per batch to implement parameters
8695     # --break-before-hash-brace=n and similar -bbx=n flags
8696     #    and their associated indentation flags:
8697     # --break-before-hash-brace-and-indent and similar -bbxi=n
8698
8699     # Nothing to do if none of the -bbx=n parameters has been set
8700     return unless %break_before_container_types;
8701
8702     my $rLL = $self->[_rLL_];
8703     return unless ( defined($rLL) && @{$rLL} );
8704
8705     # Loop over all opening container tokens
8706     my $K_opening_container       = $self->[_K_opening_container_];
8707     my $K_closing_container       = $self->[_K_closing_container_];
8708     my $ris_broken_container      = $self->[_ris_broken_container_];
8709     my $ris_permanently_broken    = $self->[_ris_permanently_broken_];
8710     my $rhas_list                 = $self->[_rhas_list_];
8711     my $rhas_broken_list          = $self->[_rhas_broken_list_];
8712     my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
8713     my $radjusted_levels          = $self->[_radjusted_levels_];
8714     my $rparent_of_seqno          = $self->[_rparent_of_seqno_];
8715     my $rlines                    = $self->[_rlines_];
8716     my $rtype_count_by_seqno      = $self->[_rtype_count_by_seqno_];
8717     my $rlec_count_by_seqno       = $self->[_rlec_count_by_seqno_];
8718     my $rno_xci_by_seqno          = $self->[_rno_xci_by_seqno_];
8719     my $rK_weld_right             = $self->[_rK_weld_right_];
8720
8721     my $length_tol =
8722       max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
8723     if ($rOpts_ignore_old_breakpoints) {
8724         $length_tol += $rOpts_maximum_line_length;
8725     }
8726
8727     my $rbreak_before_container_by_seqno = {};
8728     my $rwant_reduced_ci                 = {};
8729     foreach my $seqno ( keys %{$K_opening_container} ) {
8730
8731         #################################################################
8732         # Part 1: Examine any -bbx=n flags
8733         #################################################################
8734
8735         my $KK = $K_opening_container->{$seqno};
8736         next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
8737
8738         # This must be a list or contain a list.
8739         # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
8740         # Note2: 'has_list' holds the depth to the sub-list.  We will require
8741         #  a depth of just 1
8742         my $is_list  = $self->is_list_by_seqno($seqno);
8743         my $has_list = $rhas_list->{$seqno};
8744
8745         # Fix for b1173: if welded opening container, use flag of innermost
8746         # seqno.  Otherwise, the restriction $has_list==1 prevents triple and
8747         # higher welds from following the -BBX parameters.
8748         if ($total_weld_count) {
8749             my $KK_test = $rK_weld_right->{$KK};
8750             if ( defined($KK_test) ) {
8751                 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
8752                 $is_list ||= $self->is_list_by_seqno($seqno_inner);
8753                 $has_list = $rhas_list->{$seqno_inner};
8754             }
8755         }
8756
8757         next unless ( $is_list || $has_list && $has_list == 1 );
8758
8759         my $has_broken_list   = $rhas_broken_list->{$seqno};
8760         my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
8761
8762         # Only for types of container tokens with a non-default break option
8763         my $token        = $rLL->[$KK]->[_TOKEN_];
8764         my $break_option = $break_before_container_types{$token};
8765         next unless ($break_option);
8766
8767         # Require previous nonblank to be '=' or '=>'
8768         my $Kprev = $KK - 1;
8769         next if ( $Kprev < 0 );
8770         my $prev_type = $rLL->[$Kprev]->[_TYPE_];
8771         if ( $prev_type eq 'b' ) {
8772             $Kprev--;
8773             next if ( $Kprev < 0 );
8774             $prev_type = $rLL->[$Kprev]->[_TYPE_];
8775         }
8776         next unless ( $is_equal_or_fat_comma{$prev_type} );
8777
8778         my $ci = $rLL->[$KK]->[_CI_LEVEL_];
8779
8780         DEBUG_BBX
8781           && print STDOUT
8782 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
8783
8784         # -bbx=1 = stable, try to follow input
8785         if ( $break_option == 1 ) {
8786
8787             my $iline    = $rLL->[$KK]->[_LINE_INDEX_];
8788             my $rK_range = $rlines->[$iline]->{_rK_range};
8789             my ( $Kfirst, $Klast ) = @{$rK_range};
8790             next unless ( $KK == $Kfirst );
8791         }
8792
8793         # -bbx=2 => apply this style only for a 'complex' list
8794         elsif ( $break_option == 2 ) {
8795
8796             #  break if this list contains a broken list with line-ending comma
8797             my $ok_to_break;
8798             my $Msg = "";
8799             if ($has_list_with_lec) {
8800                 $ok_to_break = 1;
8801                 DEBUG_BBX && do { $Msg = "has list with lec;" };
8802             }
8803
8804             if ( !$ok_to_break ) {
8805
8806                 # Turn off -xci if -bbx=2 and this container has a sublist but
8807                 # not a broken sublist. This avoids creating blinkers.  The
8808                 # problem is that -xci can cause one-line lists to break open,
8809                 # and thereby creating formatting instability.
8810                 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
8811                 # b1045 b1046 b1047 b1051 b1052 b1061.
8812                 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
8813
8814                 my $parent = $rparent_of_seqno->{$seqno};
8815                 if ( $self->is_list_by_seqno($parent) ) {
8816                     DEBUG_BBX && do { $Msg = "parent is list" };
8817                     $ok_to_break = 1;
8818                 }
8819             }
8820
8821             # Patch to fix b1099 for -lp
8822             #  ok in -lp mode if this is a list which contains a list
8823             if ( !$ok_to_break && $rOpts_line_up_parentheses ) {
8824                 if ( $is_list && $has_list ) {
8825                     $ok_to_break = 1;
8826                     DEBUG_BBX && do { $Msg = "is list or has list" };
8827                 }
8828             }
8829
8830             if ( !$ok_to_break ) {
8831                 DEBUG_BBX
8832                   && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
8833                 next;
8834             }
8835
8836             DEBUG_BBX
8837               && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
8838
8839             # Patch: turn off -xci if -bbx=2 and -lp
8840             # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
8841             $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
8842         }
8843
8844         # -bbx=3 = always break
8845         elsif ( $break_option == 3 ) {
8846
8847             # ok to break
8848         }
8849
8850         # Shouldn't happen! Bad flag, but make behavior same as 3
8851         else {
8852             # ok to break
8853         }
8854
8855         # Set a flag for actual implementation later in
8856         # sub insert_breaks_before_list_opening_containers
8857         $rbreak_before_container_by_seqno->{$seqno} = 1;
8858         DEBUG_BBX
8859           && print STDOUT "BBX: ok to break at seqno=$seqno\n";
8860
8861         # -bbxi=0: Nothing more to do if the ci value remains unchanged
8862         my $ci_flag = $container_indentation_options{$token};
8863         next unless ($ci_flag);
8864
8865         # -bbxi=1: This option removes ci and is handled in
8866         # later sub set_adjusted_indentation
8867         if ( $ci_flag == 1 ) {
8868             $rwant_reduced_ci->{$seqno} = 1;
8869             next;
8870         }
8871
8872         # -bbxi=2 ...
8873
8874         #################################################################
8875         # Part 2: Perform tests before committing to changing ci and level
8876         #################################################################
8877
8878         # Before changing the ci level of the opening container, we need
8879         # to be sure that the container will be broken in the later stages of
8880         # formatting.  We have to do this because we are working early in the
8881         # formatting pipeline.  A problem can occur if we change the ci or
8882         # level of the opening token but do not actually break the container
8883         # open as expected.  In most cases it wouldn't make any difference if
8884         # we changed ci or not, but there are some edge cases where this
8885         # can cause blinking states, so we need to try to only change ci if
8886         # the container will really be broken.
8887
8888         # Only consider containers already broken
8889         next if ( !$ris_broken_container->{$seqno} );
8890
8891         # Always ok to change ci for permanently broken containers
8892         if ( $ris_permanently_broken->{$seqno} ) {
8893             goto OK;
8894         }
8895
8896         # Always OK if this list contains a broken sub-container with
8897         # a non-terminal line-ending comma
8898         if ($has_list_with_lec) { goto OK }
8899
8900         # From here on we are considering a single container...
8901
8902         # A single container must have at least 1 line-ending comma:
8903         next unless ( $rlec_count_by_seqno->{$seqno} );
8904
8905         # Since it has a line-ending comma, it will stay broken if the -boc
8906         # flag is set
8907         if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
8908
8909         # OK if the container contains multiple fat commas
8910         # Better: multiple lines with fat commas
8911         if ( !$rOpts_ignore_old_breakpoints ) {
8912             my $rtype_count = $rtype_count_by_seqno->{$seqno};
8913             next unless ($rtype_count);
8914             my $fat_comma_count = $rtype_count->{'=>'};
8915             DEBUG_BBX
8916               && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
8917             if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
8918         }
8919
8920         # The last check we can make is to see if this container could fit on a
8921         # single line.  Use the least possble indentation in the estmate (ci=0),
8922         # so we are not subtracting $ci * $rOpts_continuation_indentation from
8923         # tablulated $maximum_text_length  value.
8924         my $level               = $rLL->[$KK]->[_LEVEL_];
8925         my $maximum_text_length = $maximum_text_length_at_level[$level];
8926         my $K_closing           = $K_closing_container->{$seqno};
8927         my $length = $self->cumulative_length_before_K($K_closing) -
8928           $self->cumulative_length_before_K($KK);
8929         my $excess_length = $length - $maximum_text_length;
8930         DEBUG_BBX
8931           && print STDOUT
8932 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
8933
8934         # OK if the net container definitely breaks on length
8935         if ( $excess_length > $length_tol ) {
8936             DEBUG_BBX
8937               && print STDOUT "BBX: excess_length=$excess_length\n";
8938             goto OK;
8939         }
8940
8941         # Otherwise skip it
8942         next;
8943
8944         #################################################################
8945         # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
8946         #################################################################
8947
8948       OK:
8949
8950         DEBUG_BBX && print STDOUT "BBX: OK to break\n";
8951
8952         # -bbhbi=n
8953         # -bbsbi=n
8954         # -bbpi=n
8955
8956         # where:
8957
8958         # n=0  default indentation (usually one ci)
8959         # n=1  outdent one ci
8960         # n=2  indent one level (minus one ci)
8961         # n=3  indent one extra ci [This may be dropped]
8962
8963         # NOTE: We are adjusting indentation of the opening container. The
8964         # closing container will normally follow the indentation of the opening
8965         # container automatically, so this is not currently done.
8966         next unless ($ci);
8967
8968         # option 1: outdent
8969         if ( $ci_flag == 1 ) {
8970             $ci -= 1;
8971         }
8972
8973         # option 2: indent one level
8974         elsif ( $ci_flag == 2 ) {
8975             $ci -= 1;
8976             $radjusted_levels->[$KK] += 1;
8977         }
8978
8979         # unknown option
8980         else {
8981             # Shouldn't happen - leave ci unchanged
8982         }
8983
8984         $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
8985     }
8986
8987     $self->[_rbreak_before_container_by_seqno_] =
8988       $rbreak_before_container_by_seqno;
8989     $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
8990     return;
8991 }
8992
8993 use constant DEBUG_XCI => 0;
8994
8995 sub extended_ci {
8996
8997     # This routine implements the -xci (--extended-continuation-indentation)
8998     # flag.  We add CI to interior tokens of a container which itself has CI but
8999     # only if a token does not already have CI.
9000
9001     # To do this, we will locate opening tokens which themselves have
9002     # continuation indentation (CI).  We track them with their sequence
9003     # numbers.  These sequence numbers are called 'controlling sequence
9004     # numbers'.  They apply continuation indentation to the tokens that they
9005     # contain.  These inner tokens remember their controlling sequence numbers.
9006     # Later, when these inner tokens are output, they have to see if the output
9007     # lines with their controlling tokens were output with CI or not.  If not,
9008     # then they must remove their CI too.
9009
9010     # The controlling CI concept works hierarchically.  But CI itself is not
9011     # hierarchical; it is either on or off. There are some rare instances where
9012     # it would be best to have hierarchical CI too, but not enough to be worth
9013     # the programming effort.
9014
9015     # The operations to remove unwanted CI are done in sub 'undo_ci'.
9016
9017     my ($self) = @_;
9018
9019     my $rLL = $self->[_rLL_];
9020     return unless ( defined($rLL) && @{$rLL} );
9021
9022     my $ris_list_by_seqno        = $self->[_ris_list_by_seqno_];
9023     my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
9024     my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
9025     my $rlines                   = $self->[_rlines_];
9026     my $rno_xci_by_seqno         = $self->[_rno_xci_by_seqno_];
9027     my $ris_bli_container        = $self->[_ris_bli_container_];
9028
9029     my %available_space;
9030
9031     # Loop over all opening container tokens
9032     my $K_opening_container  = $self->[_K_opening_container_];
9033     my $K_closing_container  = $self->[_K_closing_container_];
9034     my $ris_broken_container = $self->[_ris_broken_container_];
9035     my @seqno_stack;
9036     my $seqno_top;
9037     my $KLAST;
9038     my $KNEXT = $self->[_K_first_seq_item_];
9039
9040     # The following variable can be used to allow a little extra space to
9041     # avoid blinkers.  A value $len_tol = 20 fixed the following
9042     # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
9043     # It turned out that the real problem was misparsing a list brace as
9044     # a code block in a 'use' statement when the line length was extremely
9045     # small.  A value of 0 works now, but a slightly larger value can
9046     # be used to minimize the chance of a blinker.
9047     my $len_tol = 0;
9048
9049     while ( defined($KNEXT) ) {
9050
9051         # Fix all tokens up to the next sequence item if we are changing CI
9052         if ($seqno_top) {
9053
9054             my $is_list = $ris_list_by_seqno->{$seqno_top};
9055             my $space   = $available_space{$seqno_top};
9056             my $length  = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
9057             my $count   = 0;
9058             for ( my $Kt = $KLAST + 1 ; $Kt < $KNEXT ; $Kt++ ) {
9059
9060                 # But do not include tokens which might exceed the line length
9061                 # and are not in a list.
9062                 # ... This fixes case b1031
9063                 my $length_before = $length;
9064                 $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
9065                 if (
9066                     !$rLL->[$Kt]->[_CI_LEVEL_]
9067                     && (   $is_list
9068                         || $length - $length_before < $space
9069                         || $rLL->[$Kt]->[_TYPE_] eq '#' )
9070                   )
9071                 {
9072                     $rLL->[$Kt]->[_CI_LEVEL_] = 1;
9073                     $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
9074                     $count++;
9075                 }
9076             }
9077             $ris_seqno_controlling_ci->{$seqno_top} += $count;
9078         }
9079
9080         $KLAST = $KNEXT;
9081         my $KK = $KNEXT;
9082         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9083
9084         my $seqno     = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9085         my $K_opening = $K_opening_container->{$seqno};
9086
9087         # see if we have reached the end of the current controlling container
9088         if ( $seqno_top && $seqno == $seqno_top ) {
9089             $seqno_top = pop @seqno_stack;
9090         }
9091
9092         # Patch to fix some block types...
9093         # Certain block types arrive from the tokenizer without CI but should
9094         # have it for this option.  These include anonymous subs and
9095         #     do sort map grep eval
9096         my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
9097         if ( $block_type && $is_block_with_ci{$block_type} ) {
9098             $rLL->[$KK]->[_CI_LEVEL_] = 1;
9099             if ($seqno_top) {
9100                 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
9101                 $ris_seqno_controlling_ci->{$seqno_top}++;
9102             }
9103         }
9104
9105         # If this does not have ci, update ci if necessary and continue looking
9106         if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
9107             if ($seqno_top) {
9108                 $rLL->[$KK]->[_CI_LEVEL_] = 1;
9109                 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
9110                 $ris_seqno_controlling_ci->{$seqno_top}++;
9111             }
9112             next;
9113         }
9114
9115         # Skip if requested by -bbx to avoid blinkers
9116         if ( $rno_xci_by_seqno->{$seqno} ) {
9117             next;
9118         }
9119
9120         # Skip if this is a -bli container (this fixes case b1065) Note: case
9121         # b1065 is also fixed by the update for b1055, so this update is not
9122         # essential now.  But there does not seem to be a good reason to add
9123         # xci and bli together, so the update is retained.
9124         if ( $ris_bli_container->{$seqno} ) {
9125             next;
9126         }
9127
9128         # We are looking for opening container tokens with ci
9129         next unless ( defined($K_opening) && $KK == $K_opening );
9130
9131         # Make sure there is a corresponding closing container
9132         # (could be missing if the script has a brace error)
9133         my $K_closing = $K_closing_container->{$seqno};
9134         next unless defined($K_closing);
9135
9136         # Require different input lines. This will filter out a large number
9137         # of small hash braces and array brackets.  If we accidentally filter
9138         # out an important container, it will get fixed on the next pass.
9139         if (
9140             $rLL->[$K_opening]->[_LINE_INDEX_] ==
9141             $rLL->[$K_closing]->[_LINE_INDEX_]
9142             && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
9143                 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
9144                 $rOpts_maximum_line_length )
9145           )
9146         {
9147             DEBUG_XCI
9148               && print "XCI: Skipping seqno=$seqno, require different lines\n";
9149             next;
9150         }
9151
9152         # Do not apply -xci if adding extra ci will put the container contents
9153         # beyond the line length limit (fixes cases b899 b935)
9154         my $level    = $rLL->[$K_opening]->[_LEVEL_];
9155         my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
9156         my $maximum_text_length =
9157           $maximum_text_length_at_level[$level] -
9158           $ci_level * $rOpts_continuation_indentation;
9159
9160         # remember how much space is available for patch b1031 above
9161         my $space =
9162           $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
9163
9164         if ( $space < 0 ) {
9165             DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
9166             next;
9167         }
9168         DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
9169
9170         $available_space{$seqno} = $space;
9171
9172         # This becomes the next controlling container
9173         push @seqno_stack, $seqno_top if ($seqno_top);
9174         $seqno_top = $seqno;
9175     }
9176     return;
9177 }
9178
9179 sub bli_adjustment {
9180
9181     # Called once per file to implement the --brace-left-and-indent option.
9182     # If -bli is set, adds one continuation indentation for certain braces
9183     my $self = shift;
9184     return unless ( $rOpts->{'brace-left-and-indent'} );
9185     my $rLL = $self->[_rLL_];
9186     return unless ( defined($rLL) && @{$rLL} );
9187     my $ris_bli_container   = $self->[_ris_bli_container_];
9188     my $K_opening_container = $self->[_K_opening_container_];
9189     my $KNEXT               = $self->[_K_first_seq_item_];
9190
9191     while ( defined($KNEXT) ) {
9192         my $KK = $KNEXT;
9193         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9194         my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
9195         if ( $block_type && $block_type =~ /$bli_pattern/ ) {
9196             my $seqno     = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9197             my $K_opening = $K_opening_container->{$seqno};
9198             if ( defined($K_opening) ) {
9199                 if ( $KK eq $K_opening ) {
9200                     $rLL->[$KK]->[_CI_LEVEL_]++;
9201                     $ris_bli_container->{$seqno} = 1;
9202                 }
9203                 else {
9204                     $rLL->[$KK]->[_CI_LEVEL_] =
9205                       $rLL->[$K_opening]->[_CI_LEVEL_];
9206                 }
9207             }
9208         }
9209     }
9210     return;
9211 }
9212
9213 sub find_multiline_qw {
9214
9215     my $self = shift;
9216
9217     # Multiline qw quotes are not sequenced items like containers { [ (
9218     # but behave in some respects in a similar way. So this routine finds them
9219     # and creates a separate sequence number system for later use.
9220
9221     # This is straightforward because they always begin at the end of one line
9222     # and and at the beginning of a later line. This is true no matter how we
9223     # finally make our line breaks, so we can find them before deciding on new
9224     # line breaks.
9225
9226     my $rstarting_multiline_qw_seqno_by_K = {};
9227     my $rending_multiline_qw_seqno_by_K   = {};
9228     my $rKrange_multiline_qw_by_seqno     = {};
9229     my $rmultiline_qw_has_extra_level     = {};
9230
9231     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
9232
9233     my $rlines = $self->[_rlines_];
9234     my $rLL    = $self->[_rLL_];
9235     my $qw_seqno;
9236     my $num_qw_seqno = 0;
9237     my $K_start_multiline_qw;
9238
9239     foreach my $line_of_tokens ( @{$rlines} ) {
9240
9241         my $line_type = $line_of_tokens->{_line_type};
9242         next unless ( $line_type eq 'CODE' );
9243         my $rK_range = $line_of_tokens->{_rK_range};
9244         my ( $Kfirst, $Klast ) = @{$rK_range};
9245         next unless ( defined($Kfirst) && defined($Klast) );   # skip blank line
9246         if ( defined($K_start_multiline_qw) ) {
9247             my $type = $rLL->[$Kfirst]->[_TYPE_];
9248
9249             # shouldn't happen
9250             if ( $type ne 'q' ) {
9251                 DEVEL_MODE && print STDERR <<EOM;
9252 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
9253 EOM
9254                 $K_start_multiline_qw = undef;
9255                 next;
9256             }
9257             my $Kprev  = $self->K_previous_nonblank($Kfirst);
9258             my $Knext  = $self->K_next_nonblank($Kfirst);
9259             my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
9260             my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
9261             if ( $type_m eq 'q' && $type_p ne 'q' ) {
9262                 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
9263                 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
9264                   [ $K_start_multiline_qw, $Kfirst ];
9265                 $K_start_multiline_qw = undef;
9266                 $qw_seqno             = undef;
9267             }
9268         }
9269         if ( !defined($K_start_multiline_qw)
9270             && $rLL->[$Klast]->[_TYPE_] eq 'q' )
9271         {
9272             my $Kprev  = $self->K_previous_nonblank($Klast);
9273             my $Knext  = $self->K_next_nonblank($Klast);
9274             my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
9275             my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
9276             if ( $type_m ne 'q' && $type_p eq 'q' ) {
9277                 $num_qw_seqno++;
9278                 $qw_seqno             = 'q' . $num_qw_seqno;
9279                 $K_start_multiline_qw = $Klast;
9280                 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
9281             }
9282         }
9283     }
9284
9285     # Give multiline qw lists extra indentation instead of CI.  This option
9286     # works well but is currently only activated when the -xci flag is set.
9287     # The reason is to avoid unexpected changes in formatting.
9288     if ( $rOpts->{'extended-continuation-indentation'} ) {
9289         while ( my ( $qw_seqno, $rKrange ) =
9290             each %{$rKrange_multiline_qw_by_seqno} )
9291         {
9292             my ( $Kbeg, $Kend ) = @{$rKrange};
9293
9294             # require isolated closing token
9295             my $token_end = $rLL->[$Kend]->[_TOKEN_];
9296             next
9297               unless ( length($token_end) == 1
9298                 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
9299
9300             # require isolated opening token
9301             my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
9302
9303             # allow space(s) after the qw
9304             if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) eq ' ' ) {
9305                 $token_beg =~ s/\s+//;
9306             }
9307
9308             next unless ( length($token_beg) == 3 );
9309
9310             foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
9311                 $rLL->[$KK]->[_LEVEL_]++;
9312                 $rLL->[$KK]->[_CI_LEVEL_] = 0;
9313             }
9314
9315             # set flag for -wn option, which will remove the level
9316             $rmultiline_qw_has_extra_level->{$qw_seqno} = 1;
9317         }
9318     }
9319
9320     # For the -lp option we need to mark all parent containers of
9321     # multiline quotes
9322     if ($rOpts_line_up_parentheses) {
9323
9324         while ( my ( $qw_seqno, $rKrange ) =
9325             each %{$rKrange_multiline_qw_by_seqno} )
9326         {
9327             my ( $Kbeg, $Kend ) = @{$rKrange};
9328             my $parent_seqno = $self->parent_seqno_by_K($Kend);
9329             next unless ($parent_seqno);
9330
9331             # If the parent container exactly surrounds this qw, then -lp
9332             # formatting seems to work so we will not mark it.
9333             my $is_tightly_contained;
9334             my $Kn      = $self->K_next_nonblank($Kend);
9335             my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
9336             if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
9337
9338                 my $Kp = $self->K_previous_nonblank($Kbeg);
9339                 my $seqno_p =
9340                   defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
9341                 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
9342                     $is_tightly_contained = 1;
9343                 }
9344             }
9345
9346             $ris_excluded_lp_container->{$parent_seqno} = 1
9347               unless ($is_tightly_contained);
9348
9349             # continue up the tree marking parent containers
9350             while (1) {
9351                 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
9352                 last
9353                   unless ( defined($parent_seqno)
9354                     && $parent_seqno ne SEQ_ROOT );
9355                 $ris_excluded_lp_container->{$parent_seqno} = 1;
9356             }
9357         }
9358     }
9359
9360     $self->[_rstarting_multiline_qw_seqno_by_K_] =
9361       $rstarting_multiline_qw_seqno_by_K;
9362     $self->[_rending_multiline_qw_seqno_by_K_] =
9363       $rending_multiline_qw_seqno_by_K;
9364     $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
9365     $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
9366
9367     return;
9368 }
9369
9370 sub is_excluded_lp {
9371
9372     # decide if this container is excluded by user request
9373     # returns true if this token is excluded (i.e., may not use -lp)
9374     # returns false otherwise
9375
9376     # note similarity with sub 'is_excluded_weld'
9377     my ( $self, $KK ) = @_;
9378     my $rLL         = $self->[_rLL_];
9379     my $rtoken_vars = $rLL->[$KK];
9380     my $token       = $rtoken_vars->[_TOKEN_];
9381     my $rflags      = $line_up_parentheses_exclusion_rules{$token};
9382     return 0 unless ( defined($rflags) );
9383     my ( $flag1, $flag2 ) = @{$rflags};
9384
9385     # There are two flags:
9386     # flag1 excludes based on the preceding nonblank word
9387     # flag2 excludes based on the contents of the container
9388     return 0 unless ( defined($flag1) );
9389     return 1 if $flag1 eq '*';
9390
9391     # Find the previous token
9392     my ( $is_f, $is_k, $is_w );
9393     my $Kp = $self->K_previous_nonblank($KK);
9394     if ( defined($Kp) ) {
9395         my $type_p = $rLL->[$Kp]->[_TYPE_];
9396         my $seqno  = $rtoken_vars->[_TYPE_SEQUENCE_];
9397
9398         # keyword?
9399         $is_k = $type_p eq 'k';
9400
9401         # function call?
9402         $is_f = $self->[_ris_function_call_paren_]->{$seqno};
9403
9404         # either keyword or function call?
9405         $is_w = $is_k || $is_f;
9406     }
9407
9408     # Check for exclusion based on flag1 and the previous token:
9409     my $match;
9410     if    ( $flag1 eq 'k' ) { $match = $is_k }
9411     elsif ( $flag1 eq 'K' ) { $match = !$is_k }
9412     elsif ( $flag1 eq 'f' ) { $match = $is_f }
9413     elsif ( $flag1 eq 'F' ) { $match = !$is_f }
9414     elsif ( $flag1 eq 'w' ) { $match = $is_w }
9415     elsif ( $flag1 eq 'W' ) { $match = !$is_w }
9416     return $match if ($match);
9417
9418     # Check for exclusion based on flag2 and the container contents
9419     # Current options to filter on contents:
9420     # 0 or blank: ignore container contents
9421     # 1 exclude non-lists or lists with sublists
9422     # 2 same as 1 but also exclude lists with code blocks
9423
9424     # Note:
9425     # Containers with multiline-qw containers are automatically
9426     # excluded so do not need to be checked.
9427     if ($flag2) {
9428
9429         my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9430
9431         my $is_list        = $self->[_ris_list_by_seqno_]->{$seqno};
9432         my $has_list       = $self->[_rhas_list_]->{$seqno};
9433         my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
9434         my $has_ternary    = $self->[_rhas_ternary_]->{$seqno};
9435         if (  !$is_list
9436             || $has_list
9437             || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
9438         {
9439             $match = 1;
9440         }
9441     }
9442     return $match;
9443 }
9444
9445 sub set_excluded_lp_containers {
9446
9447     my ($self) = @_;
9448     return unless ($rOpts_line_up_parentheses);
9449     my $rLL = $self->[_rLL_];
9450     return unless ( defined($rLL) && @{$rLL} );
9451
9452     my $K_opening_container       = $self->[_K_opening_container_];
9453     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
9454
9455     foreach my $seqno ( keys %{$K_opening_container} ) {
9456         my $KK = $K_opening_container->{$seqno};
9457         next unless defined($KK);
9458
9459         # code blocks are always excluded by the -lp coding so we can skip them
9460         next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
9461
9462         # see if a user exclusion rule turns off -lp for this container
9463         if ( $self->is_excluded_lp($KK) ) {
9464             $ris_excluded_lp_container->{$seqno} = 1;
9465         }
9466     }
9467     return;
9468 }
9469
9470 ######################################
9471 # CODE SECTION 6: Process line-by-line
9472 ######################################
9473
9474 sub process_all_lines {
9475
9476     # Main loop over all lines of a file.
9477     # Lines are processed according to type.
9478
9479     my $self                       = shift;
9480     my $rlines                     = $self->[_rlines_];
9481     my $sink_object                = $self->[_sink_object_];
9482     my $fh_tee                     = $self->[_fh_tee_];
9483     my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
9484     my $file_writer_object         = $self->[_file_writer_object_];
9485     my $logger_object              = $self->[_logger_object_];
9486     my $vertical_aligner_object    = $self->[_vertical_aligner_object_];
9487     my $save_logfile               = $self->[_save_logfile_];
9488
9489     # Note for RT#118553, leave only one newline at the end of a file.
9490     # Example code to do this is in comments below:
9491     # my $Opt_trim_ending_blank_lines = 0;
9492     # if ($Opt_trim_ending_blank_lines) {
9493     #     while ( my $line_of_tokens = pop @{$rlines} ) {
9494     #         my $line_type = $line_of_tokens->{_line_type};
9495     #         if ( $line_type eq 'CODE' ) {
9496     #             my $CODE_type = $line_of_tokens->{_code_type};
9497     #             next if ( $CODE_type eq 'BL' );
9498     #         }
9499     #         push @{$rlines}, $line_of_tokens;
9500     #         last;
9501     #     }
9502     # }
9503
9504    # But while this would be a trivial update, it would have very undesirable
9505    # side effects when perltidy is run from within an editor on a small snippet.
9506    # So this is best done with a separate filter, such
9507    # as 'delete_ending_blank_lines.pl' in the examples folder.
9508
9509     # Flag to prevent blank lines when POD occurs in a format skipping sect.
9510     my $in_format_skipping_section;
9511
9512     # set locations for blanks around long runs of keywords
9513     my $rwant_blank_line_after = $self->keyword_group_scan();
9514
9515     my $line_type      = "";
9516     my $i_last_POD_END = -10;
9517     my $i              = -1;
9518     foreach my $line_of_tokens ( @{$rlines} ) {
9519         $i++;
9520
9521         # insert blank lines requested for keyword sequences
9522         if (   $i > 0
9523             && defined( $rwant_blank_line_after->{ $i - 1 } )
9524             && $rwant_blank_line_after->{ $i - 1 } == 1 )
9525         {
9526             $self->want_blank_line();
9527         }
9528
9529         my $last_line_type = $line_type;
9530         $line_type = $line_of_tokens->{_line_type};
9531         my $input_line = $line_of_tokens->{_line_text};
9532
9533         # _line_type codes are:
9534         #   SYSTEM         - system-specific code before hash-bang line
9535         #   CODE           - line of perl code (including comments)
9536         #   POD_START      - line starting pod, such as '=head'
9537         #   POD            - pod documentation text
9538         #   POD_END        - last line of pod section, '=cut'
9539         #   HERE           - text of here-document
9540         #   HERE_END       - last line of here-doc (target word)
9541         #   FORMAT         - format section
9542         #   FORMAT_END     - last line of format section, '.'
9543         #   DATA_START     - __DATA__ line
9544         #   DATA           - unidentified text following __DATA__
9545         #   END_START      - __END__ line
9546         #   END            - unidentified text following __END__
9547         #   ERROR          - we are in big trouble, probably not a perl script
9548
9549         # put a blank line after an =cut which comes before __END__ and __DATA__
9550         # (required by podchecker)
9551         if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
9552             $i_last_POD_END = $i;
9553             $file_writer_object->reset_consecutive_blank_lines();
9554             if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
9555                 $self->want_blank_line();
9556             }
9557         }
9558
9559         # handle line of code..
9560         if ( $line_type eq 'CODE' ) {
9561
9562             my $CODE_type = $line_of_tokens->{_code_type};
9563             $in_format_skipping_section = $CODE_type eq 'FS';
9564
9565             # Handle blank lines
9566             if ( $CODE_type eq 'BL' ) {
9567
9568                 # Keep this blank? Start with the flag -kbl=n, where
9569                 #   n=0 ignore all old blank lines
9570                 #   n=1 stable: keep old blanks, but limited by -mbl=n
9571                 #   n=2 keep all old blank lines, regardless of -mbl=n
9572                 # If n=0 we delete all old blank lines and let blank line
9573                 # rules generate any needed blank lines.
9574                 my $kgb_keep = $rOpts_keep_old_blank_lines;
9575
9576                 # Then delete lines requested by the keyword-group logic if
9577                 # allowed
9578                 if (   $kgb_keep == 1
9579                     && defined( $rwant_blank_line_after->{$i} )
9580                     && $rwant_blank_line_after->{$i} == 2 )
9581                 {
9582                     $kgb_keep = 0;
9583                 }
9584
9585                 # But always keep a blank line following an =cut
9586                 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
9587                     $kgb_keep = 1;
9588                 }
9589
9590                 if ($kgb_keep) {
9591                     $self->flush($CODE_type);
9592                     $file_writer_object->write_blank_code_line(
9593                         $rOpts_keep_old_blank_lines == 2 );
9594                     $self->[_last_line_leading_type_] = 'b';
9595                 }
9596                 next;
9597             }
9598             else {
9599
9600           # Let logger see all non-blank lines of code. This is a slow operation
9601           # so we avoid it if it is not going to be saved.
9602                 if ( $save_logfile && $logger_object ) {
9603                     $logger_object->black_box( $line_of_tokens,
9604                         $vertical_aligner_object->get_output_line_number );
9605                 }
9606             }
9607
9608             # Handle Format Skipping (FS) and Verbatim (VB) Lines
9609             if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
9610                 $self->write_unindented_line("$input_line");
9611                 $file_writer_object->reset_consecutive_blank_lines();
9612                 next;
9613             }
9614
9615             # Handle all other lines of code
9616             $self->process_line_of_CODE($line_of_tokens);
9617         }
9618
9619         # handle line of non-code..
9620         else {
9621
9622             # set special flags
9623             my $skip_line = 0;
9624             if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
9625
9626                 # Pod docs should have a preceding blank line.  But stay
9627                 # out of __END__ and __DATA__ sections, because
9628                 # the user may be using this section for any purpose whatsoever
9629                 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
9630                 if ( $rOpts->{'trim-pod'} )   { $input_line =~ s/\s+$// }
9631                 if (   !$skip_line
9632                     && !$in_format_skipping_section
9633                     && $line_type eq 'POD_START'
9634                     && !$self->[_saw_END_or_DATA_] )
9635                 {
9636                     $self->want_blank_line();
9637                 }
9638             }
9639
9640             # leave the blank counters in a predictable state
9641             # after __END__ or __DATA__
9642             elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
9643                 $file_writer_object->reset_consecutive_blank_lines();
9644                 $self->[_saw_END_or_DATA_] = 1;
9645             }
9646
9647             # write unindented non-code line
9648             if ( !$skip_line ) {
9649                 $self->write_unindented_line($input_line);
9650             }
9651         }
9652     }
9653     return;
9654
9655 } ## end sub process_all_lines
9656
9657 sub keyword_group_scan {
9658     my $self = shift;
9659
9660     # Called once per file to process the --keyword-group-blanks-* parameters.
9661
9662     # Manipulate blank lines around keyword groups (kgb* flags)
9663     # Scan all lines looking for runs of consecutive lines beginning with
9664     # selected keywords.  Example keywords are 'my', 'our', 'local', ... but
9665     # they may be anything.  We will set flags requesting that blanks be
9666     # inserted around and within them according to input parameters.  Note
9667     # that we are scanning the lines as they came in in the input stream, so
9668     # they are not necessarily well formatted.
9669
9670     # The output of this sub is a return hash ref whose keys are the indexes of
9671     # lines after which we desire a blank line.  For line index i:
9672     #     $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
9673     #     $rhash_of_desires->{$i} = 2 means we want blank line $i removed
9674     my $rhash_of_desires = {};
9675
9676     # Nothing to do if no blanks can be output. This test added to fix
9677     # case b760.
9678     if ( !$rOpts_maximum_consecutive_blank_lines ) {
9679         return $rhash_of_desires;
9680     }
9681
9682     my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'};   # '-kgbb'
9683     my $Opt_blanks_after  = $rOpts->{'keyword-group-blanks-after'};    # '-kgba'
9684     my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'};   # '-kgbi'
9685     my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'};   # '-kgbd'
9686     my $Opt_size          = $rOpts->{'keyword-group-blanks-size'};     # '-kgbs'
9687
9688     # A range of sizes can be input with decimal notation like 'min.max' with
9689     # any number of dots between the two numbers. Examples:
9690     #    string    =>    min    max  matches
9691     #    1.1             1      1    exactly 1
9692     #    1.3             1      3    1,2, or 3
9693     #    1..3            1      3    1,2, or 3
9694     #    5               5      -    5 or more
9695     #    6.              6      -    6 or more
9696     #    .2              -      2    up to 2
9697     #    1.0             1      0    nothing
9698     my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
9699     if (   $Opt_size_min && $Opt_size_min !~ /^\d+$/
9700         || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
9701     {
9702         Warn(<<EOM);
9703 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
9704 ignoring all -kgb flags
9705 EOM
9706
9707         # Turn this option off so that this message does not keep repeating
9708         # during iterations and other files.
9709         $rOpts->{'keyword-group-blanks-size'} = "";
9710         return $rhash_of_desires;
9711     }
9712     $Opt_size_min = 1 unless ($Opt_size_min);
9713
9714     if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
9715         return $rhash_of_desires;
9716     }
9717
9718     # codes for $Opt_blanks_before and $Opt_blanks_after:
9719     # 0 = never (delete if exist)
9720     # 1 = stable (keep unchanged)
9721     # 2 = always (insert if missing)
9722
9723     return $rhash_of_desires
9724       unless $Opt_size_min > 0
9725       && ( $Opt_blanks_before != 1
9726         || $Opt_blanks_after != 1
9727         || $Opt_blanks_inside
9728         || $Opt_blanks_delete );
9729
9730     my $Opt_pattern         = $keyword_group_list_pattern;
9731     my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
9732     my $Opt_repeat_count =
9733       $rOpts->{'keyword-group-blanks-repeat-count'};    # '-kgbr'
9734
9735     my $rlines              = $self->[_rlines_];
9736     my $rLL                 = $self->[_rLL_];
9737     my $K_closing_container = $self->[_K_closing_container_];
9738
9739     # variables for the current group and subgroups:
9740     my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
9741         @subgroup );
9742
9743     # Definitions:
9744     # ($ibeg, $iend) = starting and ending line indexes of this entire group
9745     #         $count = total number of keywords seen in this entire group
9746     #     $level_beg = indententation level of this group
9747     #         @group = [ $i, $token, $count ] =list of all keywords & blanks
9748     #      @subgroup =  $j, index of group where token changes
9749     #       @iblanks = line indexes of blank lines in input stream in this group
9750     #  where i=starting line index
9751     #        token (the keyword)
9752     #        count = number of this token in this subgroup
9753     #            j = index in group where token changes
9754     #
9755     # These vars will contain values for the most recently seen line:
9756     my ( $line_type, $CODE_type, $K_first, $K_last );
9757
9758     my $number_of_groups_seen = 0;
9759
9760     ####################
9761     # helper subroutines
9762     ####################
9763
9764     my $insert_blank_after = sub {
9765         my ($i) = @_;
9766         $rhash_of_desires->{$i} = 1;
9767         my $ip = $i + 1;
9768         if ( defined( $rhash_of_desires->{$ip} )
9769             && $rhash_of_desires->{$ip} == 2 )
9770         {
9771             $rhash_of_desires->{$ip} = 0;
9772         }
9773         return;
9774     };
9775
9776     my $split_into_sub_groups = sub {
9777
9778         # place blanks around long sub-groups of keywords
9779         # ...if requested
9780         return unless ($Opt_blanks_inside);
9781
9782         # loop over sub-groups, index k
9783         push @subgroup, scalar @group;
9784         my $kbeg = 1;
9785         my $kend = @subgroup - 1;
9786         for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
9787
9788             # index j runs through all keywords found
9789             my $j_b = $subgroup[ $k - 1 ];
9790             my $j_e = $subgroup[$k] - 1;
9791
9792             # index i is the actual line number of a keyword
9793             my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
9794             my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
9795             my $num = $count_e - $count_b + 1;
9796
9797             # This subgroup runs from line $ib to line $ie-1, but may contain
9798             # blank lines
9799             if ( $num >= $Opt_size_min ) {
9800
9801                 # if there are blank lines, we require that at least $num lines
9802                 # be non-blank up to the boundary with the next subgroup.
9803                 my $nog_b = my $nog_e = 1;
9804                 if ( @iblanks && !$Opt_blanks_delete ) {
9805                     my $j_bb = $j_b + $num - 1;
9806                     my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
9807                     $nog_b = $count_bb - $count_b + 1 == $num;
9808
9809                     my $j_ee = $j_e - ( $num - 1 );
9810                     my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
9811                     $nog_e = $count_e - $count_ee + 1 == $num;
9812                 }
9813                 if ( $nog_b && $k > $kbeg ) {
9814                     $insert_blank_after->( $i_b - 1 );
9815                 }
9816                 if ( $nog_e && $k < $kend ) {
9817                     my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
9818                     $insert_blank_after->( $i_ep - 1 );
9819                 }
9820             }
9821         }
9822     };
9823
9824     my $delete_if_blank = sub {
9825         my ($i) = @_;
9826
9827         # delete line $i if it is blank
9828         return unless ( $i >= 0 && $i < @{$rlines} );
9829         my $line_type = $rlines->[$i]->{_line_type};
9830         return if ( $line_type ne 'CODE' );
9831         my $code_type = $rlines->[$i]->{_code_type};
9832         if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
9833         return;
9834     };
9835
9836     my $delete_inner_blank_lines = sub {
9837
9838         # always remove unwanted trailing blank lines from our list
9839         return unless (@iblanks);
9840         while ( my $ibl = pop(@iblanks) ) {
9841             if ( $ibl < $iend ) { push @iblanks, $ibl; last }
9842             $iend = $ibl;
9843         }
9844
9845         # now mark mark interior blank lines for deletion if requested
9846         return unless ($Opt_blanks_delete);
9847
9848         while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
9849
9850     };
9851
9852     my $end_group = sub {
9853
9854         # end a group of keywords
9855         my ($bad_ending) = @_;
9856         if ( defined($ibeg) && $ibeg >= 0 ) {
9857
9858             # then handle sufficiently large groups
9859             if ( $count >= $Opt_size_min ) {
9860
9861                 $number_of_groups_seen++;
9862
9863                 # do any blank deletions regardless of the count
9864                 $delete_inner_blank_lines->();
9865
9866                 if ( $ibeg > 0 ) {
9867                     my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
9868
9869                     # patch for hash bang line which is not currently marked as
9870                     # a comment; mark it as a comment
9871                     if ( $ibeg == 1 && !$code_type ) {
9872                         my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
9873                         $code_type = 'BC'
9874                           if ( $line_text && $line_text =~ /^#/ );
9875                     }
9876
9877                     # Do not insert a blank after a comment
9878                     # (this could be subject to a flag in the future)
9879                     if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
9880                         if ( $Opt_blanks_before == INSERT ) {
9881                             $insert_blank_after->( $ibeg - 1 );
9882
9883                         }
9884                         elsif ( $Opt_blanks_before == DELETE ) {
9885                             $delete_if_blank->( $ibeg - 1 );
9886                         }
9887                     }
9888                 }
9889
9890                 # We will only put blanks before code lines. We could loosen
9891                 # this rule a little, but we have to be very careful because
9892                 # for example we certainly don't want to drop a blank line
9893                 # after a line like this:
9894                 #   my $var = <<EOM;
9895                 if ( $line_type eq 'CODE' && defined($K_first) ) {
9896
9897                     # - Do not put a blank before a line of different level
9898                     # - Do not put a blank line if we ended the search badly
9899                     # - Do not put a blank at the end of the file
9900                     # - Do not put a blank line before a hanging side comment
9901                     my $level    = $rLL->[$K_first]->[_LEVEL_];
9902                     my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
9903
9904                     if (   $level == $level_beg
9905                         && $ci_level == 0
9906                         && !$bad_ending
9907                         && $iend < @{$rlines}
9908                         && $CODE_type ne 'HSC' )
9909                     {
9910                         if ( $Opt_blanks_after == INSERT ) {
9911                             $insert_blank_after->($iend);
9912                         }
9913                         elsif ( $Opt_blanks_after == DELETE ) {
9914                             $delete_if_blank->( $iend + 1 );
9915                         }
9916                     }
9917                 }
9918             }
9919             $split_into_sub_groups->();
9920         }
9921
9922         # reset for another group
9923         $ibeg      = -1;
9924         $iend      = undef;
9925         $level_beg = -1;
9926         $K_closing = undef;
9927         @group     = ();
9928         @subgroup  = ();
9929         @iblanks   = ();
9930     };
9931
9932     my $find_container_end = sub {
9933
9934         # If the keyword lines ends with an open token, find the closing token
9935         # '$K_closing' so that we can easily skip past the contents of the
9936         # container.
9937         return if ( $K_last <= $K_first );
9938         my $KK        = $K_last;
9939         my $type_last = $rLL->[$KK]->[_TYPE_];
9940         my $tok_last  = $rLL->[$KK]->[_TOKEN_];
9941         if ( $type_last eq '#' ) {
9942             $KK       = $self->K_previous_nonblank($KK);
9943             $tok_last = $rLL->[$KK]->[_TOKEN_];
9944         }
9945         if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
9946
9947             my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9948             my $lev           = $rLL->[$KK]->[_LEVEL_];
9949             if ( $lev == $level_beg ) {
9950                 $K_closing = $K_closing_container->{$type_sequence};
9951             }
9952         }
9953     };
9954
9955     my $add_to_group = sub {
9956         my ( $i, $token, $level ) = @_;
9957
9958         # End the previous group if we have reached the maximum
9959         # group size
9960         if ( $Opt_size_max && @group >= $Opt_size_max ) {
9961             $end_group->();
9962         }
9963
9964         if ( @group == 0 ) {
9965             $ibeg      = $i;
9966             $level_beg = $level;
9967             $count     = 0;
9968         }
9969
9970         $count++;
9971         $iend = $i;
9972
9973         # New sub-group?
9974         if ( !@group || $token ne $group[-1]->[1] ) {
9975             push @subgroup, scalar(@group);
9976         }
9977         push @group, [ $i, $token, $count ];
9978
9979         # remember if this line ends in an open container
9980         $find_container_end->();
9981
9982         return;
9983     };
9984
9985     ###################################
9986     # loop over all lines of the source
9987     ###################################
9988     $end_group->();
9989     my $i = -1;
9990     foreach my $line_of_tokens ( @{$rlines} ) {
9991
9992         $i++;
9993         last
9994           if ( $Opt_repeat_count > 0
9995             && $number_of_groups_seen >= $Opt_repeat_count );
9996
9997         $CODE_type = "";
9998         $K_first   = undef;
9999         $K_last    = undef;
10000         $line_type = $line_of_tokens->{_line_type};
10001
10002         # always end a group at non-CODE
10003         if ( $line_type ne 'CODE' ) { $end_group->(); next }
10004
10005         $CODE_type = $line_of_tokens->{_code_type};
10006
10007         # end any group at a format skipping line
10008         if ( $CODE_type && $CODE_type eq 'FS' ) {
10009             $end_group->();
10010             next;
10011         }
10012
10013         # continue in a verbatim (VB) type; it may be quoted text
10014         if ( $CODE_type eq 'VB' ) {
10015             if ( $ibeg >= 0 ) { $iend = $i; }
10016             next;
10017         }
10018
10019         # and continue in blank (BL) types
10020         if ( $CODE_type eq 'BL' ) {
10021             if ( $ibeg >= 0 ) {
10022                 $iend = $i;
10023                 push @{iblanks}, $i;
10024
10025                 # propagate current subgroup token
10026                 my $tok = $group[-1]->[1];
10027                 push @group, [ $i, $tok, $count ];
10028             }
10029             next;
10030         }
10031
10032         # examine the first token of this line
10033         my $rK_range = $line_of_tokens->{_rK_range};
10034         ( $K_first, $K_last ) = @{$rK_range};
10035         if ( !defined($K_first) ) {
10036
10037             # Somewhat unexpected blank line..
10038             # $rK_range is normally defined for line type CODE, but this can
10039             # happen for example if the input line was a single semicolon which
10040             # is being deleted.  In that case there was code in the input
10041             # file but it is not being retained. So we can silently return.
10042             return $rhash_of_desires;
10043         }
10044
10045         # This is not for keywords in lists ( keyword 'my' can occur in lists,
10046         # see case b760)
10047         next if ( $self->is_list_by_K($K_first) );
10048
10049         my $level    = $rLL->[$K_first]->[_LEVEL_];
10050         my $type     = $rLL->[$K_first]->[_TYPE_];
10051         my $token    = $rLL->[$K_first]->[_TOKEN_];
10052         my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
10053
10054         # see if this is a code type we seek (i.e. comment)
10055         if (   $CODE_type
10056             && $Opt_comment_pattern
10057             && $CODE_type =~ /$Opt_comment_pattern/ )
10058         {
10059
10060             my $tok = $CODE_type;
10061
10062             # Continuing a group
10063             if ( $ibeg >= 0 && $level == $level_beg ) {
10064                 $add_to_group->( $i, $tok, $level );
10065             }
10066
10067             # Start new group
10068             else {
10069
10070                 # first end old group if any; we might be starting new
10071                 # keywords at different level
10072                 if ( $ibeg > 0 ) { $end_group->(); }
10073                 $add_to_group->( $i, $tok, $level );
10074             }
10075             next;
10076         }
10077
10078         # See if it is a keyword we seek, but never start a group in a
10079         # continuation line; the code may be badly formatted.
10080         if (   $ci_level == 0
10081             && $type eq 'k'
10082             && $token =~ /$Opt_pattern/ )
10083         {
10084
10085             # Continuing a keyword group
10086             if ( $ibeg >= 0 && $level == $level_beg ) {
10087                 $add_to_group->( $i, $token, $level );
10088             }
10089
10090             # Start new keyword group
10091             else {
10092
10093                 # first end old group if any; we might be starting new
10094                 # keywords at different level
10095                 if ( $ibeg > 0 ) { $end_group->(); }
10096                 $add_to_group->( $i, $token, $level );
10097             }
10098             next;
10099         }
10100
10101         # This is not one of our keywords, but we are in a keyword group
10102         # so see if we should continue or quit
10103         elsif ( $ibeg >= 0 ) {
10104
10105             # - bail out on a large level change; we may have walked into a
10106             #   data structure or anoymous sub code.
10107             if ( $level > $level_beg + 1 || $level < $level_beg ) {
10108                 $end_group->();
10109                 next;
10110             }
10111
10112             # - keep going on a continuation line of the same level, since
10113             #   it is probably a continuation of our previous keyword,
10114             # - and keep going past hanging side comments because we never
10115             #   want to interrupt them.
10116             if ( ( ( $level == $level_beg ) && $ci_level > 0 )
10117                 || $CODE_type eq 'HSC' )
10118             {
10119                 $iend = $i;
10120                 next;
10121             }
10122
10123             # - continue if if we are within in a container which started with
10124             # the line of the previous keyword.
10125             if ( defined($K_closing) && $K_first <= $K_closing ) {
10126
10127                 # continue if entire line is within container
10128                 if ( $K_last <= $K_closing ) { $iend = $i; next }
10129
10130                 # continue at ); or }; or ];
10131                 my $KK = $K_closing + 1;
10132                 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
10133                     if ( $KK < $K_last ) {
10134                         if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
10135                         if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
10136                             $end_group->(1);
10137                             next;
10138                         }
10139                     }
10140                     $iend = $i;
10141                     next;
10142                 }
10143
10144                 $end_group->(1);
10145                 next;
10146             }
10147
10148             # - end the group if none of the above
10149             $end_group->();
10150             next;
10151         }
10152
10153         # not in a keyword group; continue
10154         else { next }
10155     }
10156
10157     # end of loop over all lines
10158     $end_group->();
10159     return $rhash_of_desires;
10160
10161 } ## end sub keyword_group_scan
10162
10163 #######################################
10164 # CODE SECTION 7: Process lines of code
10165 #######################################
10166
10167 {    ## begin closure process_line_of_CODE
10168
10169     # The routines in this closure receive lines of code and combine them into
10170     # 'batches' and send them along. A 'batch' is the unit of code which can be
10171     # processed further as a unit. It has the property that it is the largest
10172     # amount of code into which which perltidy is free to place one or more
10173     # line breaks within it without violating any constraints.
10174
10175     # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
10176
10177     # flags needed by the store routine
10178     my $line_of_tokens;
10179     my $no_internal_newlines;
10180     my $side_comment_follows;
10181     my $CODE_type;
10182
10183     # range of K of tokens for the current line
10184     my ( $K_first, $K_last );
10185
10186     my ( $rLL, $radjusted_levels );
10187
10188     # past stored nonblank tokens
10189     my (
10190         $last_last_nonblank_token,  $last_last_nonblank_type,
10191         $last_nonblank_token,       $last_nonblank_type,
10192         $last_nonblank_block_type,  $K_last_nonblank_code,
10193         $K_last_last_nonblank_code, $looking_for_else,
10194         $is_static_block_comment,   $batch_CODE_type,
10195         $last_line_had_side_comment,
10196     );
10197
10198     # Called once at the start of a new file
10199     sub initialize_process_line_of_CODE {
10200         $last_nonblank_token        = ';';
10201         $last_nonblank_type         = ';';
10202         $last_last_nonblank_token   = ';';
10203         $last_last_nonblank_type    = ';';
10204         $last_nonblank_block_type   = "";
10205         $K_last_nonblank_code       = undef;
10206         $K_last_last_nonblank_code  = undef;
10207         $looking_for_else           = 0;
10208         $is_static_block_comment    = 0;
10209         $batch_CODE_type            = "";
10210         $last_line_had_side_comment = 0;
10211         return;
10212     }
10213
10214     # Batch variables: these describe the current batch of code being formed
10215     # and sent down the pipeline.  They are initialized in the next
10216     # sub.
10217     my ( $rbrace_follower, $index_start_one_line_block,
10218         $semicolons_before_block_self_destruct,
10219         $starting_in_quote, $ending_in_quote, );
10220
10221     # Called before the start of each new batch
10222     sub initialize_batch_variables {
10223
10224         $max_index_to_go      = UNDEFINED_INDEX;
10225         @summed_lengths_to_go = @nesting_depth_to_go = (0);
10226
10227         # The initialization code for the remaining batch arrays is as follows
10228         # and can be activated for testing.  But profiling shows that it is
10229         # time-consuming to re-initialize the batch arrays and is not necessary
10230         # because the maximum valid token, $max_index_to_go, is carefully
10231         # controlled.  This means however that it is not possible to do any
10232         # type of filter or map operation directly on these arrays.  And it is
10233         # not possible to use negative indexes. As a precaution against program
10234         # changes which might do this, sub pad_array_to_go adds some undefs at
10235         # the end of the current batch of data.
10236
10237         # So 'long story short': this is a waste of time
10238         0 && do { #<<<
10239         @block_type_to_go        = ();
10240         @type_sequence_to_go     = ();
10241         @bond_strength_to_go     = ();
10242         @forced_breakpoint_to_go = ();
10243         @token_lengths_to_go     = ();
10244         @levels_to_go            = ();
10245         @mate_index_to_go        = ();
10246         @ci_levels_to_go         = ();
10247         @nobreak_to_go           = ();
10248         @old_breakpoint_to_go    = ();
10249         @tokens_to_go            = ();
10250         @K_to_go                 = ();
10251         @types_to_go             = ();
10252         @leading_spaces_to_go    = ();
10253         @reduced_spaces_to_go    = ();
10254         @inext_to_go             = ();
10255         @iprev_to_go             = ();
10256         @parent_seqno_to_go      = ();
10257         };
10258
10259         $rbrace_follower = undef;
10260         $ending_in_quote = 0;
10261         destroy_one_line_block();
10262         return;
10263     }
10264
10265     sub leading_spaces_to_go {
10266
10267         # return the number of indentation spaces for a token in the output
10268         # stream; these were previously stored by 'set_leading_whitespace'.
10269
10270         my ($ii) = @_;
10271         return 0 if ( $ii < 0 );
10272         my $indentation = $leading_spaces_to_go[$ii];
10273         return ref($indentation) ? $indentation->get_spaces() : $indentation;
10274     }
10275
10276     sub create_one_line_block {
10277         ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
10278           = @_;
10279         return;
10280     }
10281
10282     sub destroy_one_line_block {
10283         $index_start_one_line_block            = UNDEFINED_INDEX;
10284         $semicolons_before_block_self_destruct = 0;
10285         return;
10286     }
10287
10288     # Routine to place the current token into the output stream.
10289     # Called once per output token.
10290
10291     use constant DEBUG_STORE => 0;
10292
10293     sub store_token_to_go {
10294
10295         my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
10296
10297         # Add one token to the next batch.
10298         # $Ktoken_vars = the index K in the global token array
10299         # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
10300         #                unless they are temporarily being overridden
10301
10302         # NOTE: This routine needs to be coded efficiently because it is called
10303         # once per token.  I have gotten it down from the second slowest to the
10304         # eighth slowest, but that still seems rather slow for what it does.
10305
10306         # This closure variable has already been defined, for efficiency:
10307         #     my $radjusted_levels = $self->[_radjusted_levels_];
10308
10309         my $type = $rtoken_vars->[_TYPE_];
10310
10311         # Check for emergency flush...
10312         # The K indexes in the batch must always be a continuous sequence of
10313         # the global token array.  The batch process programming assumes this.
10314         # If storing this token would cause this relation to fail we must dump
10315         # the current batch before storing the new token.  It is extremely rare
10316         # for this to happen. One known example is the following two-line
10317         # snippet when run with parameters
10318         # --noadd-newlines  --space-terminal-semicolon:
10319         #    if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
10320         #    $yy=1;
10321         if ( $max_index_to_go >= 0 ) {
10322             my $Klast = $K_to_go[$max_index_to_go];
10323             if ( $Ktoken_vars != $Klast + 1 ) {
10324                 $self->flush_batch_of_CODE();
10325             }
10326
10327             # Do not output consecutive blank tokens ... this should not
10328             # happen, but it is worth checking.  Later code can then make the
10329             # simplifying assumption that blank tokens are not consecutive.
10330             elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
10331                 return;
10332             }
10333         }
10334
10335         # Do not start a batch with a blank token.
10336         # Fixes cases b149 b888 b984 b985 b986 b987
10337         else {
10338             if ( $type eq 'b' ) { return }
10339         }
10340
10341         ++$max_index_to_go;
10342         $batch_CODE_type               = $CODE_type;
10343         $K_to_go[$max_index_to_go]     = $Ktoken_vars;
10344         $types_to_go[$max_index_to_go] = $type;
10345
10346         $old_breakpoint_to_go[$max_index_to_go]    = 0;
10347         $forced_breakpoint_to_go[$max_index_to_go] = 0;
10348         $mate_index_to_go[$max_index_to_go]        = -1;
10349
10350         my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
10351         my $ci_level = $ci_levels_to_go[$max_index_to_go] =
10352           $rtoken_vars->[_CI_LEVEL_];
10353
10354         # Clip levels to zero if there are level errors in the file.
10355         # We had to wait until now for reasons explained in sub 'write_line'.
10356         my $level = $rtoken_vars->[_LEVEL_];
10357         if ( $level < 0 ) { $level = 0 }
10358         $levels_to_go[$max_index_to_go] = $level;
10359
10360         $nesting_depth_to_go[$max_index_to_go] = $rtoken_vars->[_SLEVEL_];
10361         $block_type_to_go[$max_index_to_go]    = $rtoken_vars->[_BLOCK_TYPE_];
10362         $type_sequence_to_go[$max_index_to_go] =
10363           $rtoken_vars->[_TYPE_SEQUENCE_];
10364
10365         $nobreak_to_go[$max_index_to_go] =
10366           $side_comment_follows ? 2 : $no_internal_newlines;
10367
10368         my $length = $rtoken_vars->[_TOKEN_LENGTH_];
10369
10370         # Safety check that length is defined. Should not be needed now.
10371         # Former patch for indent-only, in which the entire set of tokens is
10372         # turned into type 'q'. Lengths may have not been defined because sub
10373         # 'respace_tokens' is bypassed. We do not need lengths in this case,
10374         # but we will use the character count to have a defined value.  In the
10375         # future, it would be nicer to have 'respace_tokens' convert the lines
10376         # to quotes and get correct lengths.
10377         if ( !defined($length) ) { $length = length($token) }
10378
10379         $token_lengths_to_go[$max_index_to_go] = $length;
10380
10381         # We keep a running sum of token lengths from the start of this batch:
10382         #   summed_lengths_to_go[$i]   = total length to just before token $i
10383         #   summed_lengths_to_go[$i+1] = total length to just after token $i
10384         $summed_lengths_to_go[ $max_index_to_go + 1 ] =
10385           $summed_lengths_to_go[$max_index_to_go] + $length;
10386
10387         my $in_continued_quote =
10388           ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
10389         if ( $max_index_to_go == 0 ) {
10390             $starting_in_quote = $in_continued_quote;
10391         }
10392
10393         # Define the indentation that this token will have in two cases:
10394         # Without CI = reduced_spaces_to_go
10395         # With CI    = leading_spaces_to_go
10396         if ($in_continued_quote) {
10397             $leading_spaces_to_go[$max_index_to_go] = 0;
10398             $reduced_spaces_to_go[$max_index_to_go] = 0;
10399         }
10400         else {
10401             $reduced_spaces_to_go[$max_index_to_go] = my $reduced_spaces =
10402               $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
10403             $leading_spaces_to_go[$max_index_to_go] =
10404               $reduced_spaces + $rOpts_continuation_indentation * $ci_level;
10405         }
10406
10407         # Correct these values if -lp is used
10408         if ($rOpts_line_up_parentheses) {
10409             $self->set_leading_whitespace( $Ktoken_vars, $K_last_nonblank_code,
10410                 $K_last_last_nonblank_code, $level, $ci_level,
10411                 $in_continued_quote );
10412         }
10413
10414         DEBUG_STORE && do {
10415             my ( $a, $b, $c ) = caller();
10416             print STDOUT
10417 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
10418         };
10419         return;
10420     }
10421
10422     sub flush_batch_of_CODE {
10423
10424         # Finish any batch packaging and call the process routine.
10425         # This must be the only call to grind_batch_of_CODE()
10426         my ($self) = @_;
10427
10428         return unless ( $max_index_to_go >= 0 );
10429
10430         # Create an array to hold variables for this batch
10431         my $this_batch = [];
10432         $this_batch->[_starting_in_quote_] = $starting_in_quote;
10433         $this_batch->[_ending_in_quote_]   = $ending_in_quote;
10434         $this_batch->[_max_index_to_go_]   = $max_index_to_go;
10435         $this_batch->[_rK_to_go_]          = \@K_to_go;
10436         $this_batch->[_batch_CODE_type_]   = $batch_CODE_type;
10437
10438         # The flag $is_static_block_comment applies to the line which just
10439         # arrived. So it only applies if we are outputting that line.
10440         $this_batch->[_is_static_block_comment_] =
10441              defined($K_first)
10442           && $max_index_to_go == 0
10443           && $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
10444
10445         $self->[_this_batch_] = $this_batch;
10446
10447         $last_line_had_side_comment =
10448           $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
10449
10450         $self->grind_batch_of_CODE();
10451
10452         # Done .. this batch is history
10453         $self->[_this_batch_] = [];
10454
10455         initialize_batch_variables();
10456         initialize_forced_breakpoint_vars();
10457         initialize_gnu_batch_vars()
10458           if $rOpts_line_up_parentheses;
10459
10460         return;
10461     }
10462
10463     sub end_batch {
10464
10465         # end the current batch, EXCEPT for a few special cases
10466         my ($self) = @_;
10467
10468         # Exception 1: Do not end line in a weld
10469         return
10470           if ( $total_weld_count
10471             && $self->is_welded_right_at_i($max_index_to_go) );
10472
10473         # Exception 2: just set a tentative breakpoint if we might be in a
10474         # one-line block
10475         if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10476             $self->set_forced_breakpoint($max_index_to_go);
10477             return;
10478         }
10479
10480         $self->flush_batch_of_CODE();
10481         return;
10482     }
10483
10484     sub flush_vertical_aligner {
10485         my ($self) = @_;
10486         my $vao = $self->[_vertical_aligner_object_];
10487         $vao->flush();
10488         return;
10489     }
10490
10491     # flush is called to output any tokens in the pipeline, so that
10492     # an alternate source of lines can be written in the correct order
10493     sub flush {
10494         my ( $self, $CODE_type ) = @_;
10495
10496         # end the current batch with 1 exception
10497
10498         destroy_one_line_block();
10499
10500         # Exception: if we are flushing within the code stream only to insert
10501         # blank line(s), then we can keep the batch intact at a weld. This
10502         # improves formatting of -ce.  See test 'ce1.ce'
10503         if ( $CODE_type && $CODE_type eq 'BL' ) { $self->end_batch() }
10504
10505         # otherwise, we have to shut things down completely.
10506         else { $self->flush_batch_of_CODE() }
10507
10508         $self->flush_vertical_aligner();
10509         return;
10510     }
10511
10512     sub process_line_of_CODE {
10513
10514         my ( $self, $my_line_of_tokens ) = @_;
10515
10516         # This routine is called once per INPUT line to process all of the
10517         # tokens on that line.
10518
10519         # It outputs full-line comments and blank lines immediately.
10520
10521         # The tokens are copied one-by-one from the global token array $rLL to
10522         # a set of '_to_go' arrays which collect batches of tokens for a
10523         # further processing via calls to 'sub store_token_to_go', until a well
10524         # defined 'structural' break point* or 'forced' breakpoint* is reached.
10525         # Then, the batch of collected '_to_go' tokens is passed along to 'sub
10526         # grind_batch_of_CODE' for further processing.
10527
10528         # * 'structural' break points are basically line breaks corresponding
10529         # to code blocks.  An example is a chain of if-elsif-else statements,
10530         # which should typically be broken at the opening and closing braces.
10531
10532         # * 'forced' break points are breaks required by side comments or by
10533         # special user controls.
10534
10535         # So this routine is just making an initial set of required line
10536         # breaks, basically regardless of the maximum requested line length.
10537         # The subsequent stage of formating make additional line breaks
10538         # appropriate for lists and logical structures, and to keep line
10539         # lengths below the requested maximum line length.
10540
10541         $line_of_tokens = $my_line_of_tokens;
10542         $CODE_type      = $line_of_tokens->{_code_type};
10543         my $input_line_number = $line_of_tokens->{_line_number};
10544         my $input_line        = $line_of_tokens->{_line_text};
10545
10546         # initialize closure variables
10547         my $rK_range = $line_of_tokens->{_rK_range};
10548         ( $K_first, $K_last ) = @{$rK_range};
10549
10550         # remember original starting index in case it changes
10551         my $K_first_true = $K_first;
10552
10553         $rLL              = $self->[_rLL_];
10554         $radjusted_levels = $self->[_radjusted_levels_];
10555
10556         my $file_writer_object = $self->[_file_writer_object_];
10557         my $rbreak_container   = $self->[_rbreak_container_];
10558         my $rshort_nested      = $self->[_rshort_nested_];
10559         my $sink_object        = $self->[_sink_object_];
10560         my $fh_tee             = $self->[_fh_tee_];
10561         my $ris_bli_container  = $self->[_ris_bli_container_];
10562         my $rK_weld_left       = $self->[_rK_weld_left_];
10563
10564         if ( !defined($K_first) ) {
10565
10566             # Empty line: This can happen if tokens are deleted, for example
10567             # with the -mangle parameter
10568             return;
10569         }
10570
10571         $no_internal_newlines = 0;
10572         if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
10573             $no_internal_newlines = 2;
10574         }
10575
10576         $side_comment_follows = 0;
10577         my $is_comment =
10578           ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
10579         my $is_static_block_comment_without_leading_space =
10580           $CODE_type eq 'SBCX';
10581         $is_static_block_comment =
10582           $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
10583         my $is_hanging_side_comment = $CODE_type eq 'HSC';
10584         my $is_VERSION_statement    = $CODE_type eq 'VER';
10585
10586         if ($is_VERSION_statement) {
10587             $self->[_saw_VERSION_in_this_file_] = 1;
10588             $no_internal_newlines = 2;
10589         }
10590
10591         # Add interline blank if any
10592         my $last_old_nonblank_type   = "b";
10593         my $first_new_nonblank_token = "";
10594         if ( $max_index_to_go >= 0 ) {
10595             $last_old_nonblank_type   = $types_to_go[$max_index_to_go];
10596             $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
10597             if (  !$is_comment
10598                 && $types_to_go[$max_index_to_go] ne 'b'
10599                 && $K_first > 0
10600                 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
10601             {
10602                 $K_first -= 1;
10603             }
10604         }
10605
10606         my $rtok_first = $rLL->[$K_first];
10607
10608         my $in_quote = $line_of_tokens->{_ending_in_quote};
10609         $ending_in_quote = $in_quote;
10610         my $guessed_indentation_level =
10611           $line_of_tokens->{_guessed_indentation_level};
10612
10613         ######################################
10614         # Handle a block (full-line) comment..
10615         ######################################
10616         if ($is_comment) {
10617
10618             if ( $rOpts->{'delete-block-comments'} ) {
10619                 $self->flush();
10620                 return;
10621             }
10622
10623             destroy_one_line_block();
10624             $self->end_batch();
10625
10626             # output a blank line before block comments
10627             if (
10628                 # unless we follow a blank or comment line
10629                 $self->[_last_line_leading_type_] ne '#'
10630                 && $self->[_last_line_leading_type_] ne 'b'
10631
10632                 # only if allowed
10633                 && $rOpts->{'blanks-before-comments'}
10634
10635                 # if this is NOT an empty comment, unless it follows a side
10636                 # comment and could become a hanging side comment.
10637                 && (
10638                     $rtok_first->[_TOKEN_] ne '#'
10639                     || (   $last_line_had_side_comment
10640                         && $rLL->[$K_first]->[_LEVEL_] > 0 )
10641                 )
10642
10643                 # not after a short line ending in an opening token
10644                 # because we already have space above this comment.
10645                 # Note that the first comment in this if block, after
10646                 # the 'if (', does not get a blank line because of this.
10647                 && !$self->[_last_output_short_opening_token_]
10648
10649                 # never before static block comments
10650                 && !$is_static_block_comment
10651               )
10652             {
10653                 $self->flush();    # switching to new output stream
10654                 $file_writer_object->write_blank_code_line();
10655                 $self->[_last_line_leading_type_] = 'b';
10656             }
10657
10658             if (
10659                 $rOpts->{'indent-block-comments'}
10660                 && (  !$rOpts->{'indent-spaced-block-comments'}
10661                     || $input_line =~ /^\s+/ )
10662                 && !$is_static_block_comment_without_leading_space
10663               )
10664             {
10665                 my $Ktoken_vars = $K_first;
10666                 my $rtoken_vars = $rLL->[$Ktoken_vars];
10667                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10668                 $self->end_batch();
10669             }
10670             else {
10671
10672                 # switching to new output stream
10673                 $self->flush();
10674
10675                 # Note that last arg in call here is 'undef' for comments
10676                 $file_writer_object->write_code_line(
10677                     $rtok_first->[_TOKEN_] . "\n", undef );
10678                 $self->[_last_line_leading_type_] = '#';
10679             }
10680             return;
10681         }
10682
10683         # compare input/output indentation except for continuation lines
10684         # (because they have an unknown amount of initial blank space)
10685         # and lines which are quotes (because they may have been outdented)
10686         $self->compare_indentation_levels( $K_first, $guessed_indentation_level,
10687             $input_line_number )
10688           unless ( $is_hanging_side_comment
10689             || $rtok_first->[_CI_LEVEL_] > 0
10690             || $guessed_indentation_level == 0
10691             && $rtok_first->[_TYPE_] eq 'Q' );
10692
10693         ##########################
10694         # Handle indentation-only
10695         ##########################
10696
10697         # NOTE: In previous versions we sent all qw lines out immediately here.
10698         # No longer doing this: also write a line which is entirely a 'qw' list
10699         # to allow stacking of opening and closing tokens.  Note that interior
10700         # qw lines will still go out at the end of this routine.
10701         if ( $CODE_type eq 'IO' ) {
10702             $self->flush();
10703             my $line = $input_line;
10704
10705             # Fix for rt #125506 Unexpected string formating
10706             # in which leading space of a terminal quote was removed
10707             $line =~ s/\s+$//;
10708             $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
10709
10710             my $Ktoken_vars = $K_first;
10711
10712             # We work with a copy of the token variables and change the
10713             # first token to be the entire line as a quote variable
10714             my $rtoken_vars = $rLL->[$Ktoken_vars];
10715             $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
10716
10717             # Patch: length is not really important here
10718             $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
10719
10720             $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10721             $self->end_batch();
10722             return;
10723         }
10724
10725         ############################
10726         # Handle all other lines ...
10727         ############################
10728
10729         # If we just saw the end of an elsif block, write nag message
10730         # if we do not see another elseif or an else.
10731         if ($looking_for_else) {
10732
10733             unless ( $rLL->[$K_first]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
10734                 write_logfile_entry("(No else block)\n");
10735             }
10736             $looking_for_else = 0;
10737         }
10738
10739         # This is a good place to kill incomplete one-line blocks
10740         if (
10741             (
10742                    ( $semicolons_before_block_self_destruct == 0 )
10743                 && ( $max_index_to_go >= 0 )
10744                 && ( $last_old_nonblank_type eq ';' )
10745                 && ( $first_new_nonblank_token ne '}' )
10746             )
10747
10748             # Patch for RT #98902. Honor request to break at old commas.
10749             || (   $rOpts_break_at_old_comma_breakpoints
10750                 && $max_index_to_go >= 0
10751                 && $last_old_nonblank_type eq ',' )
10752           )
10753         {
10754             $forced_breakpoint_to_go[$max_index_to_go] = 1
10755               if ($rOpts_break_at_old_comma_breakpoints);
10756             destroy_one_line_block();
10757             $self->end_batch();
10758         }
10759
10760         # Keep any requested breaks before this line.  Note that we have to
10761         # use the original K_first because it may have been reduced above
10762         # to add a blank.  The value of the flag is as follows:
10763         #   1 => hard break, flush the batch
10764         #   2 => soft break, set breakpoint and continue building the batch
10765         if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
10766             destroy_one_line_block();
10767             if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
10768                 $self->set_forced_breakpoint($max_index_to_go);
10769             }
10770             else {
10771                 $self->end_batch();
10772             }
10773         }
10774
10775         # loop to process the tokens one-by-one
10776
10777         # We do not want a leading blank if the previous batch just got output
10778         if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
10779             $K_first++;
10780         }
10781
10782         foreach my $Ktoken_vars ( $K_first .. $K_last ) {
10783
10784             my $rtoken_vars   = $rLL->[$Ktoken_vars];
10785             my $token         = $rtoken_vars->[_TOKEN_];
10786             my $type          = $rtoken_vars->[_TYPE_];
10787             my $block_type    = $rtoken_vars->[_BLOCK_TYPE_];
10788             my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
10789
10790             # If we are continuing after seeing a right curly brace, flush
10791             # buffer unless we see what we are looking for, as in
10792             #   } else ...
10793             if ( $rbrace_follower && $type ne 'b' ) {
10794
10795                 unless ( $rbrace_follower->{$token} ) {
10796                     $self->end_batch();
10797                 }
10798                 $rbrace_follower = undef;
10799             }
10800
10801             # Get next nonblank on this line
10802             my $next_nonblank_token      = '';
10803             my $next_nonblank_token_type = 'b';
10804             if ( $Ktoken_vars < $K_last ) {
10805                 my $Knnb = $Ktoken_vars + 1;
10806                 if (   $rLL->[$Knnb]->[_TYPE_] eq 'b'
10807                     && $Knnb < $K_last )
10808                 {
10809                     $Knnb++;
10810                 }
10811                 $next_nonblank_token      = $rLL->[$Knnb]->[_TOKEN_];
10812                 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
10813             }
10814
10815             # Do not allow breaks which would promote a side comment to a
10816             # block comment.  In order to allow a break before an opening
10817             # or closing BLOCK, followed by a side comment, those sections
10818             # of code will handle this flag separately.
10819             $side_comment_follows = ( $next_nonblank_token_type eq '#' );
10820             my $is_opening_BLOCK =
10821               (      $type eq '{'
10822                   && $token eq '{'
10823                   && $block_type
10824                   && !$rshort_nested->{$type_sequence}
10825                   && $block_type ne 't' );
10826             my $is_closing_BLOCK =
10827               (      $type eq '}'
10828                   && $token eq '}'
10829                   && $block_type
10830                   && !$rshort_nested->{$type_sequence}
10831                   && $block_type ne 't' );
10832
10833             if (   $side_comment_follows
10834                 && !$is_opening_BLOCK
10835                 && !$is_closing_BLOCK )
10836             {
10837                 $no_internal_newlines = 1;
10838             }
10839
10840             # We're only going to handle breaking for code BLOCKS at this
10841             # (top) level.  Other indentation breaks will be handled by
10842             # sub scan_list, which is better suited to dealing with them.
10843             if ($is_opening_BLOCK) {
10844
10845                 # Tentatively output this token.  This is required before
10846                 # calling starting_one_line_block.  We may have to unstore
10847                 # it, though, if we have to break before it.
10848                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10849
10850                 # Look ahead to see if we might form a one-line block..
10851                 my $too_long =
10852                   $self->starting_one_line_block( $Ktoken_vars,
10853                     $K_last_nonblank_code, $K_last );
10854                 $self->clear_breakpoint_undo_stack();
10855
10856                 # to simplify the logic below, set a flag to indicate if
10857                 # this opening brace is far from the keyword which introduces it
10858                 my $keyword_on_same_line = 1;
10859                 if (
10860                        $max_index_to_go >= 0
10861                     && $last_nonblank_type eq ')'
10862                     && ( ( $rtoken_vars->[_SLEVEL_] < $nesting_depth_to_go[0] )
10863                         || $too_long )
10864                   )
10865                 {
10866                     $keyword_on_same_line = 0;
10867                 }
10868
10869                 # decide if user requested break before '{'
10870                 my $want_break =
10871
10872                   # This test was added to minimize changes in -bl formatting
10873                   # caused by other changes to fix cases b562 .. b983
10874                   # Previously, the -bl flag was being applied almost randomly
10875                   # to sort/map/grep/eval blocks, depending on if they were
10876                   # flagged as possible one-line blocks.  usually time they
10877                   # were not given -bl formatting.  The following flag was
10878                   # added to minimize changes to existing formatting.
10879                   $is_braces_left_exclude_block{$block_type}
10880                   ? 0
10881
10882                   # use -bl flag if not a sub block of any type
10883                   : $block_type !~ /$ANYSUB_PATTERN/
10884                   ? $rOpts->{'opening-brace-on-new-line'}
10885
10886                   # use -sbl flag for a named sub block
10887                   : $block_type !~ /$ASUB_PATTERN/
10888                   ? $rOpts->{'opening-sub-brace-on-new-line'}
10889
10890                   # use -asbl flag for an anonymous sub block
10891                   : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
10892
10893                 # Break if requested with -bli flag
10894                 $want_break ||= $ris_bli_container->{$type_sequence};
10895
10896                 # Do not break if this token is welded to the left
10897                 if ( $total_weld_count
10898                     && defined( $rK_weld_left->{$Ktoken_vars} ) )
10899                 {
10900                     $want_break = 0;
10901                 }
10902
10903                 # Break before an opening '{' ...
10904                 if (
10905
10906                     # if requested
10907                     $want_break
10908
10909                     # and we were unable to start looking for a block,
10910                     && $index_start_one_line_block == UNDEFINED_INDEX
10911
10912                     # or if it will not be on same line as its keyword, so that
10913                     # it will be outdented (eval.t, overload.t), and the user
10914                     # has not insisted on keeping it on the right
10915                     || (   !$keyword_on_same_line
10916                         && !$rOpts->{'opening-brace-always-on-right'} )
10917                   )
10918                 {
10919
10920                     # but only if allowed
10921                     unless ($no_internal_newlines) {
10922
10923                         # since we already stored this token, we must unstore it
10924                         $self->unstore_token_to_go();
10925
10926                         # then output the line
10927                         $self->end_batch();
10928
10929                         # and now store this token at the start of a new line
10930                         $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10931                     }
10932                 }
10933
10934                 # Now update for side comment
10935                 if ($side_comment_follows) { $no_internal_newlines = 1 }
10936
10937                 # now output this line
10938                 unless ($no_internal_newlines) {
10939                     $self->end_batch();
10940                 }
10941             }
10942
10943             elsif ($is_closing_BLOCK) {
10944
10945                 # If there is a pending one-line block ..
10946                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10947
10948                     # we have to terminate it if..
10949                     if (
10950
10951                         # it is too long (final length may be different from
10952                         # initial estimate). note: must allow 1 space for this
10953                         # token
10954                         $self->excess_line_length( $index_start_one_line_block,
10955                             $max_index_to_go ) >= 0
10956
10957                         # or if it has too many semicolons
10958                         || (   $semicolons_before_block_self_destruct == 0
10959                             && $last_nonblank_type ne ';' )
10960                       )
10961                     {
10962                         destroy_one_line_block();
10963                     }
10964                 }
10965
10966                 # put a break before this closing curly brace if appropriate
10967                 unless ( $no_internal_newlines
10968                     || $index_start_one_line_block != UNDEFINED_INDEX )
10969                 {
10970
10971                     # write out everything before this closing curly brace
10972                     $self->end_batch();
10973                 }
10974
10975                 # Now update for side comment
10976                 if ($side_comment_follows) { $no_internal_newlines = 1 }
10977
10978                 # store the closing curly brace
10979                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10980
10981                 # ok, we just stored a closing curly brace.  Often, but
10982                 # not always, we want to end the line immediately.
10983                 # So now we have to check for special cases.
10984
10985                 # if this '}' successfully ends a one-line block..
10986                 my $is_one_line_block = 0;
10987                 my $keep_going        = 0;
10988                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10989
10990                     # Remember the type of token just before the
10991                     # opening brace.  It would be more general to use
10992                     # a stack, but this will work for one-line blocks.
10993                     $is_one_line_block =
10994                       $types_to_go[$index_start_one_line_block];
10995
10996                     # we have to actually make it by removing tentative
10997                     # breaks that were set within it
10998                     $self->undo_forced_breakpoint_stack(0);
10999                     $self->set_nobreaks( $index_start_one_line_block,
11000                         $max_index_to_go - 1 );
11001
11002                     # then re-initialize for the next one-line block
11003                     destroy_one_line_block();
11004
11005                     # then decide if we want to break after the '}' ..
11006                     # We will keep going to allow certain brace followers as in:
11007                     #   do { $ifclosed = 1; last } unless $losing;
11008                     #
11009                     # But make a line break if the curly ends a
11010                     # significant block:
11011                     if (
11012                         (
11013                             $is_block_without_semicolon{$block_type}
11014
11015                             # Follow users break point for
11016                             # one line block types U & G, such as a 'try' block
11017                             || $is_one_line_block =~ /^[UG]$/
11018                             && $Ktoken_vars == $K_last
11019                         )
11020
11021                         # if needless semicolon follows we handle it later
11022                         && $next_nonblank_token ne ';'
11023                       )
11024                     {
11025                         $self->end_batch()
11026                           unless ($no_internal_newlines);
11027                     }
11028                 }
11029
11030                 # set string indicating what we need to look for brace follower
11031                 # tokens
11032                 if ( $block_type eq 'do' ) {
11033                     $rbrace_follower = \%is_do_follower;
11034                     if ( $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
11035                       )
11036                     {
11037                         $rbrace_follower = { ')' => 1 };
11038                     }
11039                 }
11040                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
11041                     $rbrace_follower = \%is_if_brace_follower;
11042                 }
11043                 elsif ( $block_type eq 'else' ) {
11044                     $rbrace_follower = \%is_else_brace_follower;
11045                 }
11046
11047                 # added eval for borris.t
11048                 elsif ($is_sort_map_grep_eval{$block_type}
11049                     || $is_one_line_block eq 'G' )
11050                 {
11051                     $rbrace_follower = undef;
11052                     $keep_going      = 1;
11053                 }
11054
11055                 # anonymous sub
11056                 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
11057
11058                     if ($is_one_line_block) {
11059                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
11060                     }
11061                     else {
11062                         $rbrace_follower = \%is_anon_sub_brace_follower;
11063                     }
11064                 }
11065
11066                 # None of the above: specify what can follow a closing
11067                 # brace of a block which is not an
11068                 # if/elsif/else/do/sort/map/grep/eval
11069                 # Testfiles:
11070                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
11071                 else {
11072                     $rbrace_follower = \%is_other_brace_follower;
11073                 }
11074
11075                 # See if an elsif block is followed by another elsif or else;
11076                 # complain if not.
11077                 if ( $block_type eq 'elsif' ) {
11078
11079                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
11080                         $looking_for_else = 1;    # ok, check on next line
11081                     }
11082                     else {
11083
11084                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
11085                             write_logfile_entry("No else block :(\n");
11086                         }
11087                     }
11088                 }
11089
11090                 # keep going after certain block types (map,sort,grep,eval)
11091                 # added eval for borris.t
11092                 if ($keep_going) {
11093
11094                     # keep going
11095                 }
11096
11097                 # if no more tokens, postpone decision until re-entring
11098                 elsif ( ( $next_nonblank_token_type eq 'b' )
11099                     && $rOpts_add_newlines )
11100                 {
11101                     unless ($rbrace_follower) {
11102                         $self->end_batch()
11103                           unless ($no_internal_newlines);
11104                     }
11105                 }
11106
11107                 elsif ($rbrace_follower) {
11108
11109                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
11110                         $self->end_batch()
11111                           unless ($no_internal_newlines);
11112                     }
11113                     $rbrace_follower = undef;
11114                 }
11115
11116                 else {
11117                     $self->end_batch()
11118                       unless ($no_internal_newlines);
11119                 }
11120
11121             }    # end treatment of closing block token
11122
11123             # handle semicolon
11124             elsif ( $type eq ';' ) {
11125
11126                 my $break_before_semicolon = ( $Ktoken_vars == $K_first )
11127                   && $rOpts_break_at_old_semicolon_breakpoints;
11128
11129                 # kill one-line blocks with too many semicolons
11130                 $semicolons_before_block_self_destruct--;
11131                 if (
11132                        $break_before_semicolon
11133                     || ( $semicolons_before_block_self_destruct < 0 )
11134                     || (   $semicolons_before_block_self_destruct == 0
11135                         && $next_nonblank_token_type !~ /^[b\}]$/ )
11136                   )
11137                 {
11138                     destroy_one_line_block();
11139                     $self->end_batch() if ($break_before_semicolon);
11140                 }
11141
11142                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
11143
11144                 $self->end_batch()
11145                   unless (
11146                     $no_internal_newlines
11147                     || (   $rOpts_keep_interior_semicolons
11148                         && $Ktoken_vars < $K_last )
11149                     || ( $next_nonblank_token eq '}' )
11150                   );
11151
11152             }
11153
11154             # handle here_doc target string
11155             elsif ( $type eq 'h' ) {
11156
11157                 # no newlines after seeing here-target
11158                 $no_internal_newlines = 2;
11159                 ## destroy_one_line_block();  # deleted to fix case b529
11160                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
11161             }
11162
11163             # handle all other token types
11164             else {
11165
11166                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
11167             }
11168
11169             # remember two previous nonblank OUTPUT tokens
11170             if ( $type ne '#' && $type ne 'b' ) {
11171                 $last_last_nonblank_token  = $last_nonblank_token;
11172                 $last_last_nonblank_type   = $last_nonblank_type;
11173                 $last_nonblank_token       = $token;
11174                 $last_nonblank_type        = $type;
11175                 $last_nonblank_block_type  = $block_type;
11176                 $K_last_last_nonblank_code = $K_last_nonblank_code;
11177                 $K_last_nonblank_code      = $Ktoken_vars;
11178             }
11179
11180         }    # end of loop over all tokens in this 'line_of_tokens'
11181
11182         my $type       = $rLL->[$K_last]->[_TYPE_];
11183         my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
11184
11185         # we have to flush ..
11186         if (
11187
11188             # if there is a side comment...
11189             $type eq '#'
11190
11191             # if this line ends in a quote
11192             # NOTE: This is critically important for insuring that quoted lines
11193             # do not get processed by things like -sot and -sct
11194             || $in_quote
11195
11196             # if this is a VERSION statement
11197             || $is_VERSION_statement
11198
11199             # to keep a label at the end of a line
11200             || $type eq 'J'
11201
11202             # if we have a hard break request
11203             || $break_flag && $break_flag != 2
11204
11205             # if we are instructed to keep all old line breaks
11206             || !$rOpts->{'delete-old-newlines'}
11207
11208             # if this is a line of the form 'use overload'. A break here
11209             # in the input file is a good break because it will allow
11210             # the operators which follow to be formatted well. Without
11211             # this break the formatting with -ci=4 -xci is poor, for example.
11212
11213             #   use overload
11214             #     '+' => sub {
11215             #       print length $_[2], "\n";
11216             #       my ( $x, $y ) = _order(@_);
11217             #       Number::Roman->new( int $x + $y );
11218             #     },
11219             #     '-' => sub {
11220             #       my ( $x, $y ) = _order(@_);
11221             #       Number::Roman->new( int $x - $y );
11222             #     };
11223             || (   $max_index_to_go == 2
11224                 && $types_to_go[0] eq 'k'
11225                 && $tokens_to_go[0] eq 'use'
11226                 && $tokens_to_go[$max_index_to_go] eq 'overload' )
11227           )
11228         {
11229             destroy_one_line_block();
11230             $self->end_batch();
11231         }
11232
11233         # Check for a soft break request
11234         if ( $max_index_to_go >= 0 && $break_flag && $break_flag == 2 ) {
11235             $self->set_forced_breakpoint($max_index_to_go);
11236         }
11237
11238         # mark old line breakpoints in current output stream
11239         if (
11240             $max_index_to_go >= 0
11241             && (  !$rOpts_ignore_old_breakpoints
11242                 || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
11243           )
11244         {
11245             my $jobp = $max_index_to_go;
11246             if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
11247             {
11248                 $jobp--;
11249             }
11250             $old_breakpoint_to_go[$jobp] = 1;
11251         }
11252         return;
11253     } ## end sub process_line_of_CODE
11254 } ## end closure process_line_of_CODE
11255
11256 sub tight_paren_follows {
11257
11258     my ( $self, $K_to_go_0, $K_ic ) = @_;
11259
11260     # Input parameters:
11261     #   $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
11262     #   $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
11263     # Return parameter:
11264     #   false if we want a break after the closing do brace
11265     #   true if we do not want a break after the closing do brace
11266
11267     # We are at the closing brace of a 'do' block.  See if this brace is
11268     # followed by a closing paren, and if so, set a flag which indicates
11269     # that we do not want a line break between the '}' and ')'.
11270
11271     # xxxxx ( ...... do {  ... } ) {
11272     #                          ^-------looking at this brace, K_ic
11273
11274     # Subscript notation:
11275     # _i = inner container (braces in this case)
11276     # _o = outer container (parens in this case)
11277     # _io = inner opening = '{'
11278     # _ic = inner closing = '}'
11279     # _oo = outer opening = '('
11280     # _oc = outer closing = ')'
11281
11282     #       |--K_oo                 |--K_oc  = outer container
11283     # xxxxx ( ...... do {  ...... } ) {
11284     #                   |--K_io   |--K_ic    = inner container
11285
11286     # In general, the safe thing to do is return a 'false' value
11287     # if the statement appears to be complex.  This will have
11288     # the downstream side-effect of opening up outer containers
11289     # to help make complex code readable.  But for simpler
11290     # do blocks it can be preferable to keep the code compact
11291     # by returning a 'true' value.
11292
11293     return unless defined($K_ic);
11294     my $rLL = $self->[_rLL_];
11295
11296     # we should only be called at a closing block
11297     my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
11298     return unless ($seqno_i);    # shouldn't happen;
11299
11300     # This only applies if the next nonblank is a ')'
11301     my $K_oc = $self->K_next_nonblank($K_ic);
11302     return unless defined($K_oc);
11303     my $token_next = $rLL->[$K_oc]->[_TOKEN_];
11304     return unless ( $token_next eq ')' );
11305
11306     my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
11307     my $K_io    = $self->[_K_opening_container_]->{$seqno_i};
11308     my $K_oo    = $self->[_K_opening_container_]->{$seqno_o};
11309     return unless ( defined($K_io) && defined($K_oo) );
11310
11311     # RULE 1: Do not break before a closing signature paren
11312     # (regardless of complexity).  This is a fix for issue git#22.
11313     # Looking for something like:
11314     #   sub xxx ( ... do {  ... } ) {
11315     #                               ^----- next block_type
11316     my $K_test = $self->K_next_nonblank($K_oc);
11317     if ( defined($K_test) ) {
11318         my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
11319         if (   $block_type
11320             && $rLL->[$K_test]->[_TYPE_] eq '{'
11321             && $block_type =~ /$ANYSUB_PATTERN/ )
11322         {
11323             return 1;
11324         }
11325     }
11326
11327     # RULE 2: Break if the contents within braces appears to be 'complex'.  We
11328     # base this decision on the number of tokens between braces.
11329
11330     # xxxxx ( ... do {  ... } ) {
11331     #                 ^^^^^^
11332
11333     # Although very simple, it has the advantages of (1) being insensitive to
11334     # changes in lengths of identifier names, (2) easy to understand, implement
11335     # and test.  A test case for this is 't/snippets/long_line.in'.
11336
11337     # Example: $K_ic - $K_oo = 9       [Pass Rule 2]
11338     # if ( do { $2 !~ /&/ } ) { ... }
11339
11340     # Example: $K_ic - $K_oo = 10      [Pass Rule 2]
11341     # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
11342
11343     # Example: $K_ic - $K_oo = 20      [Fail Rule 2]
11344     # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
11345
11346     return if ( $K_ic - $K_io > 16 );
11347
11348     # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
11349     # As with the previous rule, we decide based on the token count
11350
11351     # xxxxx ( ... do {  ... } ) {
11352     #        ^^^^^^^^
11353
11354     # Example: $K_ic - $K_oo = 9       [Pass Rule 2]
11355     #          $K_io - $K_oo = 4       [Pass Rule 3]
11356     # if ( do { $2 !~ /&/ } ) { ... }
11357
11358     # Example: $K_ic - $K_oo = 10    [Pass rule 2]
11359     #          $K_io - $K_oo = 9     [Pass rule 3]
11360     # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
11361
11362     return if ( $K_io - $K_oo > 9 );
11363
11364     # RULE 4: Break if we have already broken this batch of output tokens
11365     return if ( $K_oo < $K_to_go_0 );
11366
11367     # RULE 5: Break if input is not on one line
11368     # For example, we will set the flag for the following expression
11369     # written in one line:
11370
11371     # This has: $K_ic - $K_oo = 10    [Pass rule 2]
11372     #           $K_io - $K_oo = 8     [Pass rule 3]
11373     #   $self->debug( 'Error: ' . do { local $/; <$err> } );
11374
11375     # but we break after the brace if it is on multiple lines on input, since
11376     # the user may prefer it on multiple lines:
11377
11378     # [Fail rule 5]
11379     #   $self->debug(
11380     #       'Error: ' . do { local $/; <$err> }
11381     #   );
11382
11383     if ( !$rOpts_ignore_old_breakpoints ) {
11384         my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
11385         my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
11386         return if ( $iline_oo != $iline_oc );
11387     }
11388
11389     # OK to keep the paren tight
11390     return 1;
11391 }
11392
11393 sub starting_one_line_block {
11394
11395     # after seeing an opening curly brace, look for the closing brace and see
11396     # if the entire block will fit on a line.  This routine is not always right
11397     # so a check is made later (at the closing brace) to make sure we really
11398     # have a one-line block.  We have to do this preliminary check, though,
11399     # because otherwise we would always break at a semicolon within a one-line
11400     # block if the block contains multiple statements.
11401
11402     my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
11403
11404     my $rbreak_container    = $self->[_rbreak_container_];
11405     my $rshort_nested       = $self->[_rshort_nested_];
11406     my $rLL                 = $self->[_rLL_];
11407     my $K_opening_container = $self->[_K_opening_container_];
11408
11409     # kill any current block - we can only go 1 deep
11410     destroy_one_line_block();
11411
11412     # return value:
11413     #  1=distance from start of block to opening brace exceeds line length
11414     #  0=otherwise
11415
11416     my $i_start = 0;
11417
11418     # This routine should not have been called if there are no tokens in the
11419     # 'to_go' arrays of previously stored tokens.  A previous call to
11420     # 'store_token_to_go' should have stored an opening brace. An error here
11421     # indicates that a programming change may have caused a flush operation to
11422     # clean out the previously stored tokens.
11423     if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
11424         Fault("program bug: store_token_to_go called incorrectly\n");
11425     }
11426
11427     # Return if block should be broken
11428     my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
11429     if ( $rbreak_container->{$type_sequence} ) {
11430         return 0;
11431     }
11432
11433     my $ris_bli_container = $self->[_ris_bli_container_];
11434     my $is_bli            = $ris_bli_container->{$type_sequence};
11435
11436     my $block_type             = $rLL->[$Kj]->[_BLOCK_TYPE_];
11437     my $index_max_forced_break = get_index_max_forced_break();
11438
11439     my $previous_nonblank_token = '';
11440     my $i_last_nonblank         = -1;
11441     if ( defined($K_last_nonblank) ) {
11442         $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
11443         if ( $i_last_nonblank >= 0 ) {
11444             $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
11445         }
11446     }
11447
11448     # find the starting keyword for this block (such as 'if', 'else', ...)
11449     if (   $max_index_to_go == 0
11450         || $block_type =~ /^[\{\}\;\:]$/
11451         || $block_type =~ /^package/ )
11452     {
11453         $i_start = $max_index_to_go;
11454     }
11455
11456     # the previous nonblank token should start these block types
11457     elsif (
11458         $i_last_nonblank >= 0
11459         && (   $previous_nonblank_token eq $block_type
11460             || $block_type =~ /$ANYSUB_PATTERN/
11461             || $block_type =~ /\(\)/ )
11462       )
11463     {
11464         $i_start = $i_last_nonblank;
11465
11466         # For signatures and extended syntax ...
11467         # If this brace follows a parenthesized list, we should look back to
11468         # find the keyword before the opening paren because otherwise we might
11469         # form a one line block which stays intack, and cause the parenthesized
11470         # expression to break open. That looks bad.
11471         if ( $tokens_to_go[$i_start] eq ')' ) {
11472
11473             # Find the opening paren
11474             my $K_start = $K_to_go[$i_start];
11475             return 0 unless defined($K_start);
11476             my $seqno = $type_sequence_to_go[$i_start];
11477             return 0 unless ($seqno);
11478             my $K_opening = $K_opening_container->{$seqno};
11479             return 0 unless defined($K_opening);
11480             my $i_opening = $i_start + ( $K_opening - $K_start );
11481
11482             # give up if not on this line
11483             return 0 unless ( $i_opening >= 0 );
11484             $i_start = $i_opening;    ##$index_max_forced_break + 1;
11485
11486             # go back one token before the opening paren
11487             if ( $i_start > 0 )                                  { $i_start-- }
11488             if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
11489             my $lev = $levels_to_go[$i_start];
11490             if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
11491         }
11492     }
11493
11494     elsif ( $previous_nonblank_token eq ')' ) {
11495
11496         # For something like "if (xxx) {", the keyword "if" will be
11497         # just after the most recent break. This will be 0 unless
11498         # we have just killed a one-line block and are starting another.
11499         # (doif.t)
11500         # Note: cannot use inext_index_to_go[] here because that array
11501         # is still being constructed.
11502         $i_start = $index_max_forced_break + 1;
11503         if ( $types_to_go[$i_start] eq 'b' ) {
11504             $i_start++;
11505         }
11506
11507         # Patch to avoid breaking short blocks defined with extended_syntax:
11508         # Strip off any trailing () which was added in the parser to mark
11509         # the opening keyword.  For example, in the following
11510         #    create( TypeFoo $e) {$bubba}
11511         # the blocktype would be marked as create()
11512         my $stripped_block_type = $block_type;
11513         $stripped_block_type =~ s/\(\)$//;
11514
11515         unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
11516             return 0;
11517         }
11518     }
11519
11520     # patch for SWITCH/CASE to retain one-line case/when blocks
11521     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
11522
11523         # Note: cannot use inext_index_to_go[] here because that array
11524         # is still being constructed.
11525         $i_start = $index_max_forced_break + 1;
11526         if ( $types_to_go[$i_start] eq 'b' ) {
11527             $i_start++;
11528         }
11529         unless ( $tokens_to_go[$i_start] eq $block_type ) {
11530             return 0;
11531         }
11532     }
11533
11534     else {
11535         return 1;
11536     }
11537
11538     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
11539
11540     my $maximum_line_length =
11541       $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
11542
11543     # see if block starting location is too great to even start
11544     if ( $pos > $maximum_line_length ) {
11545         return 1;
11546     }
11547
11548     # See if everything to the closing token will fit on one line
11549     # This is part of an update to fix cases b562 .. b983
11550     my $K_closing = $self->[_K_closing_container_]->{$type_sequence};
11551     return 0 unless ( defined($K_closing) );
11552     my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
11553       $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
11554
11555     my $excess = $pos + 1 + $container_length - $maximum_line_length;
11556
11557     # Add a small tolerance for welded tokens (case b901)
11558     if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
11559         $excess += 2;
11560     }
11561
11562     if ( $excess > 0 ) {
11563
11564         # line is too long...  there is no chance of forming a one line block
11565         # if the excess is more than 1 char
11566         return 0 if ( $excess > 1 );
11567
11568         # ... and give up if it is not a one-line block on input.
11569         # note: for a one-line block on input, it may be possible to keep
11570         # it as a one-line block (by removing a needless semicolon ).
11571         my $K_start = $K_to_go[$i_start];
11572         my $ldiff =
11573           $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
11574         return 0 if ($ldiff);
11575     }
11576
11577     foreach my $Ki ( $Kj + 1 .. $K_last ) {
11578
11579         # old whitespace could be arbitrarily large, so don't use it
11580         if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
11581         else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
11582
11583         # ignore some small blocks
11584         my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
11585         my $nobreak       = $rshort_nested->{$type_sequence};
11586
11587         # Return false result if we exceed the maximum line length,
11588         if ( $pos > $maximum_line_length ) {
11589             return 0;
11590         }
11591
11592         # keep going for non-containers
11593         elsif ( !$type_sequence ) {
11594
11595         }
11596
11597         # return if we encounter another opening brace before finding the
11598         # closing brace.
11599         elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
11600             && $rLL->[$Ki]->[_TYPE_] eq '{'
11601             && $rLL->[$Ki]->[_BLOCK_TYPE_]
11602             && !$nobreak )
11603         {
11604             return 0;
11605         }
11606
11607         # if we find our closing brace..
11608         elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
11609             && $rLL->[$Ki]->[_TYPE_] eq '}'
11610             && $rLL->[$Ki]->[_BLOCK_TYPE_]
11611             && !$nobreak )
11612         {
11613
11614             # be sure any trailing comment also fits on the line
11615             my $Ki_nonblank = $Ki;
11616             if ( $Ki_nonblank < $K_last ) {
11617                 $Ki_nonblank++;
11618                 if (   $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
11619                     && $Ki_nonblank < $K_last )
11620                 {
11621                     $Ki_nonblank++;
11622                 }
11623             }
11624
11625             # Patch for one-line sort/map/grep/eval blocks with side comments:
11626             # We will ignore the side comment length for sort/map/grep/eval
11627             # because this can lead to statements which change every time
11628             # perltidy is run.  Here is an example from Denis Moskowitz which
11629             # oscillates between these two states without this patch:
11630
11631 ## --------
11632 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
11633 ##  @baz;
11634 ##
11635 ## grep {
11636 ##     $_->foo ne 'bar'
11637 ##   }    # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
11638 ##   @baz;
11639 ## --------
11640
11641             # When the first line is input it gets broken apart by the main
11642             # line break logic in sub process_line_of_CODE.
11643             # When the second line is input it gets recombined by
11644             # process_line_of_CODE and passed to the output routines.  The
11645             # output routines (set_continuation_breaks) do not break it apart
11646             # because the bond strengths are set to the highest possible value
11647             # for grep/map/eval/sort blocks, so the first version gets output.
11648             # It would be possible to fix this by changing bond strengths,
11649             # but they are high to prevent errors in older versions of perl.
11650
11651             if (   $Ki < $K_last
11652                 && $rLL->[$Ki_nonblank]->[_TYPE_] eq '#'
11653                 && !$is_sort_map_grep{$block_type} )
11654             {
11655
11656                 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
11657
11658                 if ( $Ki_nonblank > $Ki + 1 ) {
11659
11660                     # source whitespace could be anything, assume
11661                     # at least one space before the hash on output
11662                     if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
11663                         $pos += 1;
11664                     }
11665                     else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
11666                 }
11667
11668                 if ( $pos >= $maximum_line_length ) {
11669                     return 0;
11670                 }
11671             }
11672
11673             # ok, it's a one-line block
11674             create_one_line_block( $i_start, 20 );
11675             return 0;
11676         }
11677
11678         # just keep going for other characters
11679         else {
11680         }
11681     }
11682
11683     # We haven't hit the closing brace, but there is still space. So the
11684     # question here is, should we keep going to look at more lines in hopes of
11685     # forming a new one-line block, or should we stop right now. The problem
11686     # with continuing is that we will not be able to honor breaks before the
11687     # opening brace if we continue.
11688
11689     # Typically we will want to keep trying to make one-line blocks for things
11690     # like sort/map/grep/eval.  But it is not always a good idea to make as
11691     # many one-line blocks as possible, so other types are not done.  The user
11692     # can always use -mangle.
11693
11694     # If we want to keep going, we will create a new one-line block.
11695     # The blocks which we can keep going are in a hash, but we never want
11696     # to continue if we are at a '-bli' block.
11697     if ( $want_one_line_block{$block_type} && !$is_bli ) {
11698         create_one_line_block( $i_start, 1 );
11699     }
11700     return 0;
11701 }
11702
11703 sub unstore_token_to_go {
11704
11705     # remove most recent token from output stream
11706     my $self = shift;
11707     if ( $max_index_to_go > 0 ) {
11708         $max_index_to_go--;
11709     }
11710     else {
11711         $max_index_to_go = UNDEFINED_INDEX;
11712     }
11713     return;
11714 }
11715
11716 sub compare_indentation_levels {
11717
11718     # Check to see if output line tabbing agrees with input line
11719     # this can be very useful for debugging a script which has an extra
11720     # or missing brace.
11721
11722     my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
11723     return unless ( defined($K_first) );
11724
11725     my $rLL = $self->[_rLL_];
11726
11727     my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
11728     my $radjusted_levels             = $self->[_radjusted_levels_];
11729     if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
11730         $structural_indentation_level = $radjusted_levels->[$K_first];
11731     }
11732
11733     my $is_closing_block = $rLL->[$K_first]->[_TYPE_] eq '}'
11734       && $rLL->[$K_first]->[_BLOCK_TYPE_];
11735
11736     if ( $guessed_indentation_level ne $structural_indentation_level ) {
11737         $self->[_last_tabbing_disagreement_] = $line_number;
11738
11739         if ($is_closing_block) {
11740
11741             if ( !$self->[_in_brace_tabbing_disagreement_] ) {
11742                 $self->[_in_brace_tabbing_disagreement_] = $line_number;
11743             }
11744             if ( !$self->[_first_brace_tabbing_disagreement_] ) {
11745                 $self->[_first_brace_tabbing_disagreement_] = $line_number;
11746             }
11747
11748         }
11749
11750         if ( !$self->[_in_tabbing_disagreement_] ) {
11751             $self->[_tabbing_disagreement_count_]++;
11752
11753             if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
11754                 write_logfile_entry(
11755 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
11756                 );
11757             }
11758             $self->[_in_tabbing_disagreement_]    = $line_number;
11759             $self->[_first_tabbing_disagreement_] = $line_number
11760               unless ( $self->[_first_tabbing_disagreement_] );
11761         }
11762     }
11763     else {
11764
11765         $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
11766
11767         my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
11768         if ($in_tabbing_disagreement) {
11769
11770             if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
11771                 write_logfile_entry(
11772 "End indentation disagreement from input line $in_tabbing_disagreement\n"
11773                 );
11774
11775                 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
11776                 {
11777                     write_logfile_entry(
11778                         "No further tabbing disagreements will be noted\n");
11779                 }
11780             }
11781             $self->[_in_tabbing_disagreement_] = 0;
11782
11783         }
11784     }
11785     return;
11786 }
11787
11788 ###################################################
11789 # CODE SECTION 8: Utilities for setting breakpoints
11790 ###################################################
11791
11792 {    ## begin closure set_forced_breakpoint
11793
11794     my $forced_breakpoint_count;
11795     my $forced_breakpoint_undo_count;
11796     my @forced_breakpoint_undo_stack;
11797     my $index_max_forced_break;
11798
11799     # Break before or after certain tokens based on user settings
11800     my %break_before_or_after_token;
11801
11802     BEGIN {
11803
11804         # Updated to use all operators. This fixes case b1054
11805         # Here is the previous simplified version:
11806         ## my @q = qw( . : ? and or xor && || );
11807         my @q = @all_operators;
11808
11809         push @q, ',';
11810         @break_before_or_after_token{@q} = (1) x scalar(@q);
11811     }
11812
11813     sub initialize_forced_breakpoint_vars {
11814         $forced_breakpoint_count      = 0;
11815         $index_max_forced_break       = UNDEFINED_INDEX;
11816         $forced_breakpoint_undo_count = 0;
11817         @forced_breakpoint_undo_stack = ();
11818         return;
11819     }
11820
11821     sub get_forced_breakpoint_count {
11822         return $forced_breakpoint_count;
11823     }
11824
11825     sub get_forced_breakpoint_undo_count {
11826         return $forced_breakpoint_undo_count;
11827     }
11828
11829     sub get_index_max_forced_break {
11830         return $index_max_forced_break;
11831     }
11832
11833     sub set_fake_breakpoint {
11834
11835         # Just bump up the breakpoint count as a signal that there are breaks.
11836         # This is useful if we have breaks but may want to postpone deciding
11837         # where to make them.
11838         $forced_breakpoint_count++;
11839         return;
11840     }
11841
11842     use constant DEBUG_FORCE => 0;
11843
11844     sub set_forced_breakpoint {
11845         my ( $self, $i ) = @_;
11846
11847         return unless defined $i && $i >= 0;
11848
11849         # Back up at a blank in case we need an = break.
11850         # This is a backup fix for cases like b932.
11851         if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
11852
11853         # no breaks between welded tokens
11854         return if ( $total_weld_count && $self->is_welded_right_at_i($i) );
11855
11856         my $token = $tokens_to_go[$i];
11857         my $type  = $types_to_go[$i];
11858
11859         # For certain tokens, use user settings to decide if we break before or
11860         # after it
11861         if ( $break_before_or_after_token{$token}
11862             && ( $type eq $token || $type eq 'k' ) )
11863         {
11864             if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
11865         }
11866
11867         # breaks are forced before 'if' and 'unless'
11868         elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
11869
11870         if ( $i >= 0 && $i <= $max_index_to_go ) {
11871             my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
11872
11873             DEBUG_FORCE && do {
11874                 my ( $a, $b, $c ) = caller();
11875                 print STDOUT
11876 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
11877             };
11878
11879             ######################################################################
11880             # NOTE: if we call set_closing_breakpoint below it will then call
11881             # this routing back. So there is the possibility of an infinite
11882             # loop if a programming error is made. As a precaution, I have
11883             # added a check on the forced_breakpoint flag, so that we won't
11884             # keep trying to set it.  That will give additional protection
11885             # against a loop.
11886             ######################################################################
11887
11888             if (   $i_nonblank >= 0
11889                 && $nobreak_to_go[$i_nonblank] == 0
11890                 && !$forced_breakpoint_to_go[$i_nonblank] )
11891             {
11892                 $forced_breakpoint_to_go[$i_nonblank] = 1;
11893
11894                 if ( $i_nonblank > $index_max_forced_break ) {
11895                     $index_max_forced_break = $i_nonblank;
11896                 }
11897                 $forced_breakpoint_count++;
11898                 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
11899                   = $i_nonblank;
11900
11901                 # if we break at an opening container..break at the closing
11902                 if ( $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
11903                 {
11904                     $self->set_closing_breakpoint($i_nonblank);
11905                 }
11906             }
11907         }
11908         return;
11909     }
11910
11911     sub clear_breakpoint_undo_stack {
11912         my ($self) = @_;
11913         $forced_breakpoint_undo_count = 0;
11914         return;
11915     }
11916
11917     use constant DEBUG_UNDOBP => 0;
11918
11919     sub undo_forced_breakpoint_stack {
11920
11921         my ( $self, $i_start ) = @_;
11922
11923         # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
11924         # remove all breakpoints from the top of the 'undo stack' down to and
11925         # including index $i_start.
11926
11927         # The 'undo stack' is a stack of all breakpoints made for a batch of
11928         # code.
11929
11930         if ( $i_start < 0 ) {
11931             $i_start = 0;
11932             my ( $a, $b, $c ) = caller();
11933
11934             # Bad call, can only be due to a recent programming change.
11935             # Better stop here.
11936             Fault(
11937 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
11938             );
11939         }
11940
11941         while ( $forced_breakpoint_undo_count > $i_start ) {
11942             my $i =
11943               $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
11944             if ( $i >= 0 && $i <= $max_index_to_go ) {
11945                 $forced_breakpoint_to_go[$i] = 0;
11946                 $forced_breakpoint_count--;
11947
11948                 DEBUG_UNDOBP && do {
11949                     my ( $a, $b, $c ) = caller();
11950                     print STDOUT
11951 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
11952                 };
11953             }
11954
11955             # shouldn't happen, but not a critical error
11956             else {
11957                 DEBUG_UNDOBP && do {
11958                     my ( $a, $b, $c ) = caller();
11959                     print STDOUT
11960 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
11961                 };
11962             }
11963         }
11964         return;
11965     }
11966 } ## end closure set_forced_breakpoint
11967
11968 {    ## begin closure set_closing_breakpoint
11969
11970     my %postponed_breakpoint;
11971
11972     sub initialize_postponed_breakpoint {
11973         %postponed_breakpoint = ();
11974         return;
11975     }
11976
11977     sub has_postponed_breakpoint {
11978         my ($seqno) = @_;
11979         return $postponed_breakpoint{$seqno};
11980     }
11981
11982     sub set_closing_breakpoint {
11983
11984         # set a breakpoint at a matching closing token
11985         my ( $self, $i_break ) = @_;
11986
11987         if ( $mate_index_to_go[$i_break] >= 0 ) {
11988
11989             # CAUTION: infinite recursion possible here:
11990             #   set_closing_breakpoint calls set_forced_breakpoint, and
11991             #   set_forced_breakpoint call set_closing_breakpoint
11992             #   ( test files attrib.t, BasicLyx.pm.html).
11993             # Don't reduce the '2' in the statement below
11994             if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
11995
11996              # break before } ] and ), but sub set_forced_breakpoint will decide
11997              # to break before or after a ? and :
11998                 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
11999                 $self->set_forced_breakpoint(
12000                     $mate_index_to_go[$i_break] - $inc );
12001             }
12002         }
12003         else {
12004             my $type_sequence = $type_sequence_to_go[$i_break];
12005             if ($type_sequence) {
12006                 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
12007                 $postponed_breakpoint{$type_sequence} = 1;
12008             }
12009         }
12010         return;
12011     }
12012 } ## end closure set_closing_breakpoint
12013
12014 #########################################
12015 # CODE SECTION 9: Process batches of code
12016 #########################################
12017
12018 {    ## begin closure grind_batch_of_CODE
12019
12020     # The routines in this closure begin the processing of a 'batch' of code.
12021
12022     # A variable to keep track of consecutive nonblank lines so that we can
12023     # insert occasional blanks
12024     my @nonblank_lines_at_depth;
12025
12026     # A variable to remember maximum size of previous batches; this is needed
12027     # by the logical padding routine
12028     my $peak_batch_size;
12029     my $batch_count;
12030
12031     sub initialize_grind_batch_of_CODE {
12032         @nonblank_lines_at_depth = ();
12033         $peak_batch_size         = 0;
12034         $batch_count             = 0;
12035         return;
12036     }
12037
12038     # sub grind_batch_of_CODE receives sections of code which are the longest
12039     # possible lines without a break.  In other words, it receives what is left
12040     # after applying all breaks forced by blank lines, block comments, side
12041     # comments, pod text, and structural braces.  Its job is to break this code
12042     # down into smaller pieces, if necessary, which fit within the maximum
12043     # allowed line length.  Then it sends the resulting lines of code on down
12044     # the pipeline to the VerticalAligner package, breaking the code into
12045     # continuation lines as necessary.  The batch of tokens are in the "to_go"
12046     # arrays.  The name 'grind' is slightly suggestive of a machine continually
12047     # breaking down long lines of code, but mainly it is unique and easy to
12048     # remember and find with an editor search.
12049
12050     # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
12051     # together in the following way:
12052
12053     # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
12054     # combines them into the largest sequences of tokens which might form a new
12055     # line.
12056     # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
12057     # lines.
12058
12059     # So sub 'process_line_of_CODE' builds up the longest possible continouus
12060     # sequences of tokens, regardless of line length, and then
12061     # grind_batch_of_CODE breaks these sequences back down into the new output
12062     # lines.
12063
12064     # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
12065
12066     use constant DEBUG_GRIND => 0;
12067
12068     sub grind_batch_of_CODE {
12069
12070         my ($self) = @_;
12071         my $file_writer_object = $self->[_file_writer_object_];
12072
12073         my $this_batch = $self->[_this_batch_];
12074         $batch_count++;
12075
12076         my $starting_in_quote        = $this_batch->[_starting_in_quote_];
12077         my $ending_in_quote          = $this_batch->[_ending_in_quote_];
12078         my $is_static_block_comment  = $this_batch->[_is_static_block_comment_];
12079         my $rK_to_go                 = $this_batch->[_rK_to_go_];
12080         my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
12081
12082         my $rLL = $self->[_rLL_];
12083
12084         # This routine is only called from sub flush_batch_of_code, so that
12085         # routine is a better spot for debugging.
12086         DEBUG_GRIND && do {
12087             my $token = my $type = "";
12088             if ( $max_index_to_go >= 0 ) {
12089                 $token = $tokens_to_go[$max_index_to_go];
12090                 $type  = $types_to_go[$max_index_to_go];
12091             }
12092             my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
12093             print STDERR <<EOM;
12094 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
12095 $output_str
12096 EOM
12097         };
12098
12099         # Safety check - shouldn't happen. The calling routine must not call
12100         # here unless there are tokens in the batch to be processed.  This
12101         # fault can only be triggered by a recent programming change.
12102         if ( $max_index_to_go < 0 ) {
12103             Fault(
12104 "sub grind incorrectly called with max_index_to_go=$max_index_to_go"
12105             );
12106         }
12107
12108         # Initialize some batch variables
12109         my $comma_count_in_batch = 0;
12110         my $ilast_nonblank       = -1;
12111         my @colon_list;
12112         my @ix_seqno_controlling_ci;
12113         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12114             $bond_strength_to_go[$i] = 0;
12115             $iprev_to_go[$i]         = $ilast_nonblank;
12116             $inext_to_go[$i]         = $i + 1;
12117
12118             my $type = $types_to_go[$i];
12119             if ( $type ne 'b' ) {
12120                 if ( $ilast_nonblank >= 0 ) {
12121                     $inext_to_go[$ilast_nonblank] = $i;
12122
12123                     # just in case there are two blanks in a row (shouldn't
12124                     # happen)
12125                     if ( ++$ilast_nonblank < $i ) {
12126                         $inext_to_go[$ilast_nonblank] = $i;
12127                     }
12128                 }
12129                 $ilast_nonblank = $i;
12130
12131                 # This is a good spot to efficiently collect information needed
12132                 # for breaking lines...
12133
12134                 if ( $type eq ',' ) { $comma_count_in_batch++; }
12135
12136                 # gather info needed by sub set_continuation_breaks
12137                 my $seqno = $type_sequence_to_go[$i];
12138                 if ($seqno) {
12139
12140                     # remember indexes of any tokens controlling xci
12141                     # in this batch. This list is needed by sub undo_ci.
12142                     if ( $ris_seqno_controlling_ci->{$seqno} ) {
12143                         push @ix_seqno_controlling_ci, $i;
12144                     }
12145
12146                     if ( $type eq '?' ) {
12147                         push @colon_list, $type;
12148                     }
12149                     elsif ( $type eq ':' ) {
12150                         push @colon_list, $type;
12151                     }
12152                 }
12153             }
12154         }
12155
12156         my $comma_arrow_count_contained =
12157           $self->match_opening_and_closing_tokens();
12158
12159         # tell the -lp option we are outputting a batch so it can close
12160         # any unfinished items in its stack
12161         finish_lp_batch();
12162
12163         # If this line ends in a code block brace, set breaks at any
12164         # previous closing code block braces to breakup a chain of code
12165         # blocks on one line.  This is very rare but can happen for
12166         # user-defined subs.  For example we might be looking at this:
12167         #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
12168         my $saw_good_break = 0;    # flag to force breaks even if short line
12169         if (
12170
12171             # looking for opening or closing block brace
12172             $block_type_to_go[$max_index_to_go]
12173
12174             # never any good breaks if just one token
12175             && $max_index_to_go > 0
12176
12177             # but not one of these which are never duplicated on a line:
12178             # until|while|for|if|elsif|else
12179             && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
12180             }
12181           )
12182         {
12183             my $lev = $nesting_depth_to_go[$max_index_to_go];
12184
12185             # Walk backwards from the end and
12186             # set break at any closing block braces at the same level.
12187             # But quit if we are not in a chain of blocks.
12188             for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
12189                 last if ( $levels_to_go[$i] < $lev );   # stop at a lower level
12190                 next if ( $levels_to_go[$i] > $lev );   # skip past higher level
12191
12192                 if ( $block_type_to_go[$i] ) {
12193                     if ( $tokens_to_go[$i] eq '}' ) {
12194                         $self->set_forced_breakpoint($i);
12195                         $saw_good_break = 1;
12196                     }
12197                 }
12198
12199                 # quit if we see anything besides words, function, blanks
12200                 # at this level
12201                 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
12202             }
12203         }
12204
12205         my $imin = 0;
12206         my $imax = $max_index_to_go;
12207
12208         # trim any blank tokens
12209         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
12210         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
12211
12212         # anything left to write?
12213         if ( $imin <= $imax ) {
12214
12215             my $last_line_leading_type  = $self->[_last_line_leading_type_];
12216             my $last_line_leading_level = $self->[_last_line_leading_level_];
12217             my $last_last_line_leading_level =
12218               $self->[_last_last_line_leading_level_];
12219
12220             # add a blank line before certain key types but not after a comment
12221             if ( $last_line_leading_type ne '#' ) {
12222                 my $want_blank    = 0;
12223                 my $leading_token = $tokens_to_go[$imin];
12224                 my $leading_type  = $types_to_go[$imin];
12225
12226                 # blank lines before subs except declarations and one-liners
12227                 if ( $leading_type eq 'i' ) {
12228                     if ( $leading_token =~ /$SUB_PATTERN/ ) {
12229                         $want_blank = $rOpts->{'blank-lines-before-subs'}
12230                           if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
12231                     }
12232
12233                     # break before all package declarations
12234                     elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
12235                         $want_blank = $rOpts->{'blank-lines-before-packages'};
12236                     }
12237                 }
12238
12239                 # break before certain key blocks except one-liners
12240                 if ( $leading_type eq 'k' ) {
12241                     if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' )
12242                     {
12243                         $want_blank = $rOpts->{'blank-lines-before-subs'}
12244                           if ( terminal_type_i( $imin, $imax ) ne '}' );
12245                     }
12246
12247                     # Break before certain block types if we haven't had a
12248                     # break at this level for a while.  This is the
12249                     # difficult decision..
12250                     elsif ($last_line_leading_type ne 'b'
12251                         && $leading_token =~
12252                         /^(unless|if|while|until|for|foreach)$/ )
12253                     {
12254                         my $lc =
12255                           $nonblank_lines_at_depth[$last_line_leading_level];
12256                         if ( !defined($lc) ) { $lc = 0 }
12257
12258                        # patch for RT #128216: no blank line inserted at a level
12259                        # change
12260                         if ( $levels_to_go[$imin] != $last_line_leading_level )
12261                         {
12262                             $lc = 0;
12263                         }
12264
12265                         $want_blank =
12266                              $rOpts->{'blanks-before-blocks'}
12267                           && $lc >= $rOpts->{'long-block-line-count'}
12268                           && $self->consecutive_nonblank_lines() >=
12269                           $rOpts->{'long-block-line-count'}
12270                           && terminal_type_i( $imin, $imax ) ne '}';
12271                     }
12272                 }
12273
12274                 # Check for blank lines wanted before a closing brace
12275                 if ( $leading_token eq '}' ) {
12276                     if (   $rOpts->{'blank-lines-before-closing-block'}
12277                         && $block_type_to_go[$imin]
12278                         && $block_type_to_go[$imin] =~
12279                         /$blank_lines_before_closing_block_pattern/ )
12280                     {
12281                         my $nblanks =
12282                           $rOpts->{'blank-lines-before-closing-block'};
12283                         if ( $nblanks > $want_blank ) {
12284                             $want_blank = $nblanks;
12285                         }
12286                     }
12287                 }
12288
12289                 if ($want_blank) {
12290
12291                    # future: send blank line down normal path to VerticalAligner
12292                     $self->flush_vertical_aligner();
12293                     $file_writer_object->require_blank_code_lines($want_blank);
12294                 }
12295             }
12296
12297             # update blank line variables and count number of consecutive
12298             # non-blank, non-comment lines at this level
12299             $last_last_line_leading_level = $last_line_leading_level;
12300             $last_line_leading_level      = $levels_to_go[$imin];
12301             if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
12302             $last_line_leading_type = $types_to_go[$imin];
12303             if (   $last_line_leading_level == $last_last_line_leading_level
12304                 && $last_line_leading_type ne 'b'
12305                 && $last_line_leading_type ne '#'
12306                 && defined( $nonblank_lines_at_depth[$last_line_leading_level] )
12307               )
12308             {
12309                 $nonblank_lines_at_depth[$last_line_leading_level]++;
12310             }
12311             else {
12312                 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
12313             }
12314
12315             $self->[_last_line_leading_type_]  = $last_line_leading_type;
12316             $self->[_last_line_leading_level_] = $last_line_leading_level;
12317             $self->[_last_last_line_leading_level_] =
12318               $last_last_line_leading_level;
12319
12320             # Flag to remember if we called sub 'pad_array_to_go'.
12321             # Some routines (scan_list(), set_continuation_breaks() ) need some
12322             # extra tokens added at the end of the batch.  Most batches do not
12323             # use these routines, so we will avoid calling 'pad_array_to_go'
12324             # unless it is needed.
12325             my $called_pad_array_to_go;
12326
12327             # set all forced breakpoints for good list formatting
12328             my $is_long_line = $max_index_to_go > 0
12329               && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
12330
12331             my $old_line_count_in_batch =
12332               $max_index_to_go == 0
12333               ? 1
12334               : $self->get_old_line_count( $K_to_go[0],
12335                 $K_to_go[$max_index_to_go] );
12336
12337             if (
12338                    $is_long_line
12339                 || $old_line_count_in_batch > 1
12340
12341                 # must always call scan_list() with unbalanced batches because
12342                 # it is maintaining some stacks
12343                 || is_unbalanced_batch()
12344
12345                 # call scan_list if we might want to break at commas
12346                 || (
12347                     $comma_count_in_batch
12348                     && (   $rOpts_maximum_fields_per_table > 0
12349                         && $rOpts_maximum_fields_per_table <=
12350                         $comma_count_in_batch
12351                         || $rOpts_comma_arrow_breakpoints == 0 )
12352                 )
12353
12354                 # call scan_list if user may want to break open some one-line
12355                 # hash references
12356                 || (   $comma_arrow_count_contained
12357                     && $rOpts_comma_arrow_breakpoints != 3 )
12358               )
12359             {
12360                 # add a couple of extra terminal blank tokens
12361                 $self->pad_array_to_go();
12362                 $called_pad_array_to_go = 1;
12363
12364                 ## This caused problems in one version of perl for unknown reasons:
12365                 ## $saw_good_break ||= scan_list();
12366                 my $sgb = $self->scan_list($is_long_line);
12367                 $saw_good_break ||= $sgb;
12368             }
12369
12370             # let $ri_first and $ri_last be references to lists of
12371             # first and last tokens of line fragments to output..
12372             my ( $ri_first, $ri_last );
12373
12374             # write a single line if..
12375             if (
12376
12377                 # we aren't allowed to add any newlines
12378                 !$rOpts_add_newlines
12379
12380                 # or,
12381                 || (
12382
12383                     # this line is 'short'
12384                     !$is_long_line
12385
12386                     # and we didn't see a good breakpoint
12387                     && !$saw_good_break
12388
12389                     # and we don't already have an interior breakpoint
12390                     && !get_forced_breakpoint_count()
12391                 )
12392               )
12393             {
12394                 @{$ri_first} = ($imin);
12395                 @{$ri_last}  = ($imax);
12396             }
12397
12398             # otherwise use multiple lines
12399             else {
12400
12401                 # add a couple of extra terminal blank tokens if we haven't
12402                 # already done so
12403                 $self->pad_array_to_go() unless ($called_pad_array_to_go);
12404
12405                 ( $ri_first, $ri_last ) =
12406                   $self->set_continuation_breaks( $saw_good_break,
12407                     \@colon_list );
12408
12409                 $self->break_all_chain_tokens( $ri_first, $ri_last );
12410
12411                 $self->break_equals( $ri_first, $ri_last );
12412
12413                 # now we do a correction step to clean this up a bit
12414                 # (The only time we would not do this is for debugging)
12415                 if ($rOpts_recombine) {
12416                     ( $ri_first, $ri_last ) =
12417                       $self->recombine_breakpoints( $ri_first, $ri_last );
12418                 }
12419
12420                 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
12421                   if (@colon_list);
12422             }
12423
12424             $self->insert_breaks_before_list_opening_containers( $ri_first,
12425                 $ri_last )
12426               if ( %break_before_container_types && $max_index_to_go > 0 );
12427
12428             # do corrector step if -lp option is used
12429             my $do_not_pad = 0;
12430             if ($rOpts_line_up_parentheses) {
12431                 $do_not_pad =
12432                   $self->correct_lp_indentation( $ri_first, $ri_last );
12433             }
12434
12435             # unmask any invisible line-ending semicolon.  They were placed by
12436             # sub respace_tokens but we only now know if we actually need them.
12437             if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
12438                 my $i       = $imax;
12439                 my $tok     = ';';
12440                 my $tok_len = 1;
12441                 if ( $want_left_space{';'} != WS_NO ) {
12442                     $tok     = ' ;';
12443                     $tok_len = 2;
12444                 }
12445                 $tokens_to_go[$i]        = $tok;
12446                 $token_lengths_to_go[$i] = $tok_len;
12447                 my $KK = $K_to_go[$i];
12448                 $rLL->[$KK]->[_TOKEN_]        = $tok;
12449                 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
12450                 my $line_number = 1 + $self->get_old_line_index($KK);
12451                 $self->note_added_semicolon($line_number);
12452             }
12453
12454             if ( $rOpts_one_line_block_semicolons == 0 ) {
12455                 $self->delete_one_line_semicolons( $ri_first, $ri_last );
12456             }
12457
12458             # The line breaks for this batch of code have been finalized. Now we
12459             # can to package the results for further processing.  We will switch
12460             # from the local '_to_go' buffer arrays (i-index) back to the global
12461             # token arrays (K-index) at this point.
12462             my $rlines_K;
12463             my $index_error;
12464             for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
12465                 my $ibeg = $ri_first->[$n];
12466                 my $Kbeg = $K_to_go[$ibeg];
12467                 my $iend = $ri_last->[$n];
12468                 my $Kend = $K_to_go[$iend];
12469                 if ( $iend - $ibeg != $Kend - $Kbeg ) {
12470                     $index_error = $n unless defined($index_error);
12471                 }
12472                 push @{$rlines_K},
12473                   [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
12474             }
12475
12476             # Check correctness of the mapping between the i and K token
12477             # indexes.  (The K index is the global index, the i index is the
12478             # batch index).  It is important to do this check because an error
12479             # would be disastrous.  The reason that we should never see an
12480             # index error here is that sub 'store_token_to_go' has a check to
12481             # make sure that the indexes in batches remain continuous.  Since
12482             # sub 'store_token_to_go' controls feeding tokens into batches,
12483             # no index discrepancies should occur unless a recent programming
12484             # change has introduced a bug.
12485             if ( defined($index_error) ) {
12486
12487                 # Temporary debug code - should never get here
12488                 for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
12489                     my $ibeg  = $ri_first->[$n];
12490                     my $Kbeg  = $K_to_go[$ibeg];
12491                     my $iend  = $ri_last->[$n];
12492                     my $Kend  = $K_to_go[$iend];
12493                     my $idiff = $iend - $ibeg;
12494                     my $Kdiff = $Kend - $Kbeg;
12495                     print STDERR <<EOM;
12496 line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
12497 EOM
12498                 }
12499                 Fault(
12500                     "Index error at line $index_error; i and K ranges differ");
12501             }
12502
12503             $this_batch->[_rlines_K_]        = $rlines_K;
12504             $this_batch->[_ibeg0_]           = $ri_first->[0];
12505             $this_batch->[_peak_batch_size_] = $peak_batch_size;
12506             $this_batch->[_do_not_pad_]      = $do_not_pad;
12507             $this_batch->[_batch_count_]     = $batch_count;
12508             $this_batch->[_rix_seqno_controlling_ci_] =
12509               \@ix_seqno_controlling_ci;
12510
12511             $self->send_lines_to_vertical_aligner();
12512
12513             # Insert any requested blank lines after an opening brace.  We have
12514             # to skip back before any side comment to find the terminal token
12515             my $iterm;
12516             for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
12517                 next if $types_to_go[$iterm] eq '#';
12518                 next if $types_to_go[$iterm] eq 'b';
12519                 last;
12520             }
12521
12522             # write requested number of blank lines after an opening block brace
12523             if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
12524                 if (   $rOpts->{'blank-lines-after-opening-block'}
12525                     && $block_type_to_go[$iterm]
12526                     && $block_type_to_go[$iterm] =~
12527                     /$blank_lines_after_opening_block_pattern/ )
12528                 {
12529                     my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
12530                     $self->flush_vertical_aligner();
12531                     $file_writer_object->require_blank_code_lines($nblanks);
12532                 }
12533             }
12534         }
12535
12536         # Remember the largest batch size processed. This is needed by the
12537         # logical padding routine to avoid padding the first nonblank token
12538         if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
12539             $peak_batch_size = $max_index_to_go;
12540         }
12541
12542         return;
12543     }
12544 } ## end closure grind_batch_of_CODE
12545
12546 {    ## begin closure match_opening_and_closing_tokens
12547
12548     # closure to keep track of unbalanced containers.
12549     # arrays shared by the routines in this block:
12550     my %saved_opening_indentation;
12551     my @unmatched_opening_indexes_in_this_batch;
12552     my @unmatched_closing_indexes_in_this_batch;
12553     my %comma_arrow_count;
12554
12555     sub initialize_saved_opening_indentation {
12556         %saved_opening_indentation = ();
12557         return;
12558     }
12559
12560     sub is_unbalanced_batch {
12561         return @unmatched_opening_indexes_in_this_batch +
12562           @unmatched_closing_indexes_in_this_batch;
12563     }
12564
12565     sub match_opening_and_closing_tokens {
12566
12567         # Match up indexes of opening and closing braces, etc, in this batch.
12568         # This has to be done after all tokens are stored because unstoring
12569         # of tokens would otherwise cause trouble.
12570
12571         my ($self)               = @_;
12572         my $rwant_container_open = $self->[_rwant_container_open_];
12573         my $rparent_of_seqno     = $self->[_rparent_of_seqno_];
12574
12575         @unmatched_opening_indexes_in_this_batch = ();
12576         @unmatched_closing_indexes_in_this_batch = ();
12577         %comma_arrow_count                       = ();
12578         my $comma_arrow_count_contained = 0;
12579         my $parent_seqno = $self->parent_seqno_by_K( $K_to_go[0] );
12580
12581         foreach my $i ( 0 .. $max_index_to_go ) {
12582             $parent_seqno_to_go[$i] = $parent_seqno;
12583
12584             my $seqno = $type_sequence_to_go[$i];
12585             if ($seqno) {
12586                 my $token = $tokens_to_go[$i];
12587                 if ( $is_opening_sequence_token{$token} ) {
12588                     if ( $is_opening_token{$token} ) {
12589                         $parent_seqno = $seqno;
12590                     }
12591
12592                     if ( $rwant_container_open->{$seqno} ) {
12593                         $self->set_forced_breakpoint($i);
12594                     }
12595
12596                     push @unmatched_opening_indexes_in_this_batch, $i;
12597                 }
12598                 elsif ( $is_closing_sequence_token{$token} ) {
12599
12600                     if ( $is_closing_token{$token} ) {
12601                         $parent_seqno = $rparent_of_seqno->{$seqno};
12602                         $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
12603                         $parent_seqno_to_go[$i] = $parent_seqno;
12604                     }
12605
12606                     if ( $rwant_container_open->{$seqno} ) {
12607                         $self->set_forced_breakpoint( $i - 1 );
12608                     }
12609
12610                     my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12611                     if ( defined($i_mate) && $i_mate >= 0 ) {
12612                         if ( $type_sequence_to_go[$i_mate] ==
12613                             $type_sequence_to_go[$i] )
12614                         {
12615                             $mate_index_to_go[$i]      = $i_mate;
12616                             $mate_index_to_go[$i_mate] = $i;
12617                             my $seqno = $type_sequence_to_go[$i];
12618                             if ( $comma_arrow_count{$seqno} ) {
12619                                 $comma_arrow_count_contained +=
12620                                   $comma_arrow_count{$seqno};
12621                             }
12622                         }
12623                         else {
12624                             push @unmatched_opening_indexes_in_this_batch,
12625                               $i_mate;
12626                             push @unmatched_closing_indexes_in_this_batch, $i;
12627                         }
12628                     }
12629                     else {
12630                         push @unmatched_closing_indexes_in_this_batch, $i;
12631                     }
12632                 }
12633             }
12634             elsif ( $tokens_to_go[$i] eq '=>' ) {
12635                 if (@unmatched_opening_indexes_in_this_batch) {
12636                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
12637                     my $seqno = $type_sequence_to_go[$j];
12638                     $comma_arrow_count{$seqno}++;
12639                 }
12640             }
12641         }
12642
12643         return $comma_arrow_count_contained;
12644     }
12645
12646     sub save_opening_indentation {
12647
12648         # This should be called after each batch of tokens is output. It
12649         # saves indentations of lines of all unmatched opening tokens.
12650         # These will be used by sub get_opening_indentation.
12651
12652         my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
12653
12654         # QW INDENTATION PATCH 1:
12655         # Also save indentation for multiline qw quotes
12656         my @i_qw;
12657         my $seqno_qw_opening;
12658         if ( $types_to_go[$max_index_to_go] eq 'q' ) {
12659             my $KK = $K_to_go[$max_index_to_go];
12660             $seqno_qw_opening =
12661               $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
12662             if ($seqno_qw_opening) {
12663                 push @i_qw, $max_index_to_go;
12664             }
12665         }
12666
12667         # we need to save indentations of any unmatched opening tokens
12668         # in this batch because we may need them in a subsequent batch.
12669         foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
12670
12671             my $seqno = $type_sequence_to_go[$_];
12672
12673             if ( !$seqno ) {
12674                 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
12675                     $seqno = $seqno_qw_opening;
12676                 }
12677                 else {
12678
12679                     # shouldn't happen
12680                     $seqno = 'UNKNOWN';
12681                 }
12682             }
12683
12684             $saved_opening_indentation{$seqno} = [
12685                 lookup_opening_indentation(
12686                     $_, $ri_first, $ri_last, $rindentation_list
12687                 )
12688             ];
12689         }
12690         return;
12691     }
12692
12693     sub get_saved_opening_indentation {
12694         my ($seqno) = @_;
12695         my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
12696
12697         if ($seqno) {
12698             if ( $saved_opening_indentation{$seqno} ) {
12699                 ( $indent, $offset, $is_leading ) =
12700                   @{ $saved_opening_indentation{$seqno} };
12701                 $exists = 1;
12702             }
12703         }
12704
12705         # some kind of serious error it doesn't exist
12706         # (example is badfile.t)
12707
12708         return ( $indent, $offset, $is_leading, $exists );
12709     }
12710 } ## end closure match_opening_and_closing_tokens
12711
12712 sub lookup_opening_indentation {
12713
12714     # get the indentation of the line in the current output batch
12715     # which output a selected opening token
12716     #
12717     # given:
12718     #   $i_opening - index of an opening token in the current output batch
12719     #                whose line indentation we need
12720     #   $ri_first - reference to list of the first index $i for each output
12721     #               line in this batch
12722     #   $ri_last - reference to list of the last index $i for each output line
12723     #              in this batch
12724     #   $rindentation_list - reference to a list containing the indentation
12725     #            used for each line.  (NOTE: the first slot in
12726     #            this list is the last returned line number, and this is
12727     #            followed by the list of indentations).
12728     #
12729     # return
12730     #   -the indentation of the line which contained token $i_opening
12731     #   -and its offset (number of columns) from the start of the line
12732
12733     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12734
12735     if ( !@{$ri_last} ) {
12736
12737         # An error here implies a bug introduced by a recent program change.
12738         # Every batch of code has lines.
12739         Fault("Error in opening_indentation: no lines");
12740         return;
12741     }
12742
12743     my $nline = $rindentation_list->[0];    # line number of previous lookup
12744
12745     # reset line location if necessary
12746     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12747
12748     # find the correct line
12749     unless ( $i_opening > $ri_last->[-1] ) {
12750         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12751     }
12752
12753     # Error - token index is out of bounds - shouldn't happen
12754     # A program bug has been introduced in one of the calling routines.
12755     # We better stop here.
12756     else {
12757         my $i_last_line = $ri_last->[-1];
12758         Fault(<<EOM);
12759 Program bug in call to lookup_opening_indentation - index out of range
12760  called with index i_opening=$i_opening  > $i_last_line = max index of last line
12761 This batch has max index = $max_index_to_go,
12762 EOM
12763         report_definite_bug();    # old coding, will not get here
12764         $nline = $#{$ri_last};
12765     }
12766
12767     $rindentation_list->[0] =
12768       $nline;                     # save line number to start looking next call
12769     my $ibeg       = $ri_start->[$nline];
12770     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
12771     my $is_leading = ( $ibeg == $i_opening );
12772     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12773 }
12774
12775 {    ## begin closure terminal_type_i
12776
12777     my %is_sort_map_grep_eval_do;
12778
12779     BEGIN {
12780         my @q = qw(sort map grep eval do);
12781         @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
12782     }
12783
12784     sub terminal_type_i {
12785
12786       #    returns type of last token on this line (terminal token), as follows:
12787       #    returns # for a full-line comment
12788       #    returns ' ' for a blank line
12789       #    otherwise returns final token type
12790
12791         my ( $ibeg, $iend ) = @_;
12792
12793         # Start at the end and work backwards
12794         my $i      = $iend;
12795         my $type_i = $types_to_go[$i];
12796
12797         # Check for side comment
12798         if ( $type_i eq '#' ) {
12799             $i--;
12800             if ( $i < $ibeg ) {
12801                 return wantarray ? ( $type_i, $ibeg ) : $type_i;
12802             }
12803             $type_i = $types_to_go[$i];
12804         }
12805
12806         # Skip past a blank
12807         if ( $type_i eq 'b' ) {
12808             $i--;
12809             if ( $i < $ibeg ) {
12810                 return wantarray ? ( $type_i, $ibeg ) : $type_i;
12811             }
12812             $type_i = $types_to_go[$i];
12813         }
12814
12815         # Found it..make sure it is a BLOCK termination,
12816         # but hide a terminal } after sort/grep/map because it is not
12817         # necessarily the end of the line.  (terminal.t)
12818         my $block_type = $block_type_to_go[$i];
12819         if (
12820             $type_i eq '}'
12821             && ( !$block_type
12822                 || ( $is_sort_map_grep_eval_do{$block_type} ) )
12823           )
12824         {
12825             $type_i = 'b';
12826         }
12827         return wantarray ? ( $type_i, $i ) : $type_i;
12828     }
12829
12830 } ## end closure terminal_type_i
12831
12832 sub pad_array_to_go {
12833
12834     # To simplify coding in scan_list and set_bond_strengths, it helps to
12835     # create some extra blank tokens at the end of the arrays.  We also add
12836     # some undef's to help guard against using invalid data.
12837     my ($self) = @_;
12838     $K_to_go[ $max_index_to_go + 1 ]             = undef;
12839     $tokens_to_go[ $max_index_to_go + 1 ]        = '';
12840     $tokens_to_go[ $max_index_to_go + 2 ]        = '';
12841     $tokens_to_go[ $max_index_to_go + 3 ]        = undef;
12842     $types_to_go[ $max_index_to_go + 1 ]         = 'b';
12843     $types_to_go[ $max_index_to_go + 2 ]         = 'b';
12844     $types_to_go[ $max_index_to_go + 3 ]         = undef;
12845     $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
12846     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12847       $nesting_depth_to_go[$max_index_to_go];
12848
12849     #    /^[R\}\)\]]$/
12850     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12851         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12852
12853             # Nesting depths are equivalent to the _SLEVEL_ variable which is
12854             # clipped to be >=0 in sub write_line, so it should not be possible
12855             # to get here unless the code has a bracing error which leaves a
12856             # closing brace with zero nesting depth.
12857             unless ( get_saw_brace_error() ) {
12858                 warning(
12859 "Program bug in pad_array_to_go: hit nesting error which should have been caught\n"
12860                 );
12861                 report_definite_bug();
12862             }
12863         }
12864         else {
12865             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12866         }
12867     }
12868
12869     #       /^[L\{\(\[]$/
12870     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12871         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12872     }
12873     return;
12874 }
12875
12876 sub break_all_chain_tokens {
12877
12878     # scan the current breakpoints looking for breaks at certain "chain
12879     # operators" (. : && || + etc) which often occur repeatedly in a long
12880     # statement.  If we see a break at any one, break at all similar tokens
12881     # within the same container.
12882     #
12883     my ( $self, $ri_left, $ri_right ) = @_;
12884
12885     my %saw_chain_type;
12886     my %left_chain_type;
12887     my %right_chain_type;
12888     my %interior_chain_type;
12889     my $nmax = @{$ri_right} - 1;
12890
12891     # scan the left and right end tokens of all lines
12892     my $count = 0;
12893     for my $n ( 0 .. $nmax ) {
12894         my $il    = $ri_left->[$n];
12895         my $ir    = $ri_right->[$n];
12896         my $typel = $types_to_go[$il];
12897         my $typer = $types_to_go[$ir];
12898         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
12899         $typer = '+' if ( $typer eq '-' );
12900         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
12901         $typer = '*' if ( $typer eq '/' );
12902         my $tokenl = $tokens_to_go[$il];
12903         my $tokenr = $tokens_to_go[$ir];
12904
12905         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
12906             next if ( $typel eq '?' );
12907             push @{ $left_chain_type{$typel} }, $il;
12908             $saw_chain_type{$typel} = 1;
12909             $count++;
12910         }
12911         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
12912             next if ( $typer eq '?' );
12913             push @{ $right_chain_type{$typer} }, $ir;
12914             $saw_chain_type{$typer} = 1;
12915             $count++;
12916         }
12917     }
12918     return unless $count;
12919
12920     # now look for any interior tokens of the same types
12921     $count = 0;
12922     for my $n ( 0 .. $nmax ) {
12923         my $il = $ri_left->[$n];
12924         my $ir = $ri_right->[$n];
12925         foreach my $i ( $il + 1 .. $ir - 1 ) {
12926             my $type = $types_to_go[$i];
12927             $type = '+' if ( $type eq '-' );
12928             $type = '*' if ( $type eq '/' );
12929             if ( $saw_chain_type{$type} ) {
12930                 push @{ $interior_chain_type{$type} }, $i;
12931                 $count++;
12932             }
12933         }
12934     }
12935     return unless $count;
12936
12937     # now make a list of all new break points
12938     my @insert_list;
12939
12940     # loop over all chain types
12941     foreach my $type ( keys %saw_chain_type ) {
12942
12943         # quit if just ONE continuation line with leading .  For example--
12944         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
12945         #  . $contents;
12946         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
12947
12948         # loop over all interior chain tokens
12949         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
12950
12951             # loop over all left end tokens of same type
12952             if ( $left_chain_type{$type} ) {
12953                 next if $nobreak_to_go[ $itest - 1 ];
12954                 foreach my $i ( @{ $left_chain_type{$type} } ) {
12955                     next unless $self->in_same_container_i( $i, $itest );
12956                     push @insert_list, $itest - 1;
12957
12958                     # Break at matching ? if this : is at a different level.
12959                     # For example, the ? before $THRf_DEAD in the following
12960                     # should get a break if its : gets a break.
12961                     #
12962                     # my $flags =
12963                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
12964                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
12965                     #   :              $THRf_R_JOINABLE;
12966                     if (   $type eq ':'
12967                         && $levels_to_go[$i] != $levels_to_go[$itest] )
12968                     {
12969                         my $i_question = $mate_index_to_go[$itest];
12970                         if ( $i_question > 0 ) {
12971                             push @insert_list, $i_question - 1;
12972                         }
12973                     }
12974                     last;
12975                 }
12976             }
12977
12978             # loop over all right end tokens of same type
12979             if ( $right_chain_type{$type} ) {
12980                 next if $nobreak_to_go[$itest];
12981                 foreach my $i ( @{ $right_chain_type{$type} } ) {
12982                     next unless $self->in_same_container_i( $i, $itest );
12983                     push @insert_list, $itest;
12984
12985                     # break at matching ? if this : is at a different level
12986                     if (   $type eq ':'
12987                         && $levels_to_go[$i] != $levels_to_go[$itest] )
12988                     {
12989                         my $i_question = $mate_index_to_go[$itest];
12990                         if ( $i_question >= 0 ) {
12991                             push @insert_list, $i_question;
12992                         }
12993                     }
12994                     last;
12995                 }
12996             }
12997         }
12998     }
12999
13000     # insert any new break points
13001     if (@insert_list) {
13002         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
13003     }
13004     return;
13005 }
13006
13007 sub insert_additional_breaks {
13008
13009     # this routine will add line breaks at requested locations after
13010     # sub set_continuation_breaks has made preliminary breaks.
13011
13012     my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
13013     my $i_f;
13014     my $i_l;
13015     my $line_number = 0;
13016     foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
13017
13018         next if ( $nobreak_to_go[$i_break_left] );
13019
13020         $i_f = $ri_first->[$line_number];
13021         $i_l = $ri_last->[$line_number];
13022         while ( $i_break_left >= $i_l ) {
13023             $line_number++;
13024
13025             # shouldn't happen unless caller passes bad indexes
13026             if ( $line_number >= @{$ri_last} ) {
13027                 warning(
13028 "Non-fatal program bug: couldn't set break at $i_break_left\n"
13029                 );
13030                 report_definite_bug();
13031                 return;
13032             }
13033             $i_f = $ri_first->[$line_number];
13034             $i_l = $ri_last->[$line_number];
13035         }
13036
13037         # Do not leave a blank at the end of a line; back up if necessary
13038         if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
13039
13040         my $i_break_right = $inext_to_go[$i_break_left];
13041         if (   $i_break_left >= $i_f
13042             && $i_break_left < $i_l
13043             && $i_break_right > $i_f
13044             && $i_break_right <= $i_l )
13045         {
13046             splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
13047             splice( @{$ri_last},  $line_number, 1, ( $i_break_left, $i_l ) );
13048         }
13049     }
13050     return;
13051 }
13052
13053 sub in_same_container_i {
13054
13055     # check to see if tokens at i1 and i2 are in the
13056     # same container, and not separated by a comma, ? or :
13057     # This is an interface between the _to_go arrays to the rLL array
13058     my ( $self, $i1, $i2 ) = @_;
13059
13060     # quick check
13061     return if ( $parent_seqno_to_go[$i1] ne $parent_seqno_to_go[$i2] );
13062
13063     # full check
13064     return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
13065 }
13066
13067 {    ## begin closure in_same_container_K
13068     my $ris_break_token;
13069     my $ris_comma_token;
13070
13071     BEGIN {
13072
13073         # all cases break on seeing commas at same level
13074         my @q = qw( => );
13075         push @q, ',';
13076         @{$ris_comma_token}{@q} = (1) x scalar(@q);
13077
13078         # Non-ternary text also breaks on seeing any of qw(? : || or )
13079         # Example: we would not want to break at any of these .'s
13080         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
13081         push @q, qw( or || ? : );
13082         @{$ris_break_token}{@q} = (1) x scalar(@q);
13083     }
13084
13085     sub in_same_container_K {
13086
13087         # Check to see if tokens at K1 and K2 are in the same container,
13088         # and not separated by certain characters: => , ? : || or
13089         # This version uses the newer $rLL data structure.
13090
13091         my ( $self, $K1, $K2 ) = @_;
13092         if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
13093         my $rLL     = $self->[_rLL_];
13094         my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
13095         return if ( $depth_1 < 0 );
13096         return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
13097
13098         # Select character set to scan for
13099         my $type_1 = $rLL->[$K1]->[_TYPE_];
13100         my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
13101
13102         # Fast preliminary loop to verify that tokens are in the same container
13103         my $KK = $K1;
13104         while (1) {
13105             $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
13106             last if !defined($KK);
13107             last if ( $KK >= $K2 );
13108             my $depth_K = $rLL->[$KK]->[_SLEVEL_];
13109             return if ( $depth_K < $depth_1 );
13110             next   if ( $depth_K > $depth_1 );
13111             if ( $type_1 ne ':' ) {
13112                 my $tok_K = $rLL->[$KK]->[_TOKEN_];
13113                 return if ( $tok_K eq '?' || $tok_K eq ':' );
13114             }
13115         }
13116
13117         # Slow loop checking for certain characters
13118
13119         ###########################################################
13120         # This is potentially a slow routine and not critical.
13121         # For safety just give up for large differences.
13122         # See test file 'infinite_loop.txt'
13123         ###########################################################
13124         return if ( $K2 - $K1 > 200 );
13125
13126         foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
13127
13128             my $depth_K = $rLL->[$K]->[_SLEVEL_];
13129             next   if ( $depth_K > $depth_1 );
13130             return if ( $depth_K < $depth_1 );    # redundant, checked above
13131             my $tok = $rLL->[$K]->[_TOKEN_];
13132             return if ( $rbreak->{$tok} );
13133         }
13134         return 1;
13135     }
13136 } ## end closure in_same_container_K
13137
13138 sub break_equals {
13139
13140     # Look for assignment operators that could use a breakpoint.
13141     # For example, in the following snippet
13142     #
13143     #    $HOME = $ENV{HOME}
13144     #      || $ENV{LOGDIR}
13145     #      || $pw[7]
13146     #      || die "no home directory for user $<";
13147     #
13148     # we could break at the = to get this, which is a little nicer:
13149     #    $HOME =
13150     #         $ENV{HOME}
13151     #      || $ENV{LOGDIR}
13152     #      || $pw[7]
13153     #      || die "no home directory for user $<";
13154     #
13155     # The logic here follows the logic in set_logical_padding, which
13156     # will add the padding in the second line to improve alignment.
13157     #
13158     my ( $self, $ri_left, $ri_right ) = @_;
13159     my $nmax = @{$ri_right} - 1;
13160     return unless ( $nmax >= 2 );
13161
13162     # scan the left ends of first two lines
13163     my $tokbeg = "";
13164     my $depth_beg;
13165     for my $n ( 1 .. 2 ) {
13166         my $il     = $ri_left->[$n];
13167         my $typel  = $types_to_go[$il];
13168         my $tokenl = $tokens_to_go[$il];
13169
13170         my $has_leading_op = ( $tokenl =~ /^\w/ )
13171           ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
13172           : $is_chain_operator{$typel};    # and, or
13173         return unless ($has_leading_op);
13174         if ( $n > 1 ) {
13175             return
13176               unless ( $tokenl eq $tokbeg
13177                 && $nesting_depth_to_go[$il] eq $depth_beg );
13178         }
13179         $tokbeg    = $tokenl;
13180         $depth_beg = $nesting_depth_to_go[$il];
13181     }
13182
13183     # now look for any interior tokens of the same types
13184     my $il = $ri_left->[0];
13185     my $ir = $ri_right->[0];
13186
13187     # now make a list of all new break points
13188     my @insert_list;
13189     for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
13190         my $type = $types_to_go[$i];
13191         if (   $is_assignment{$type}
13192             && $nesting_depth_to_go[$i] eq $depth_beg )
13193         {
13194             if ( $want_break_before{$type} ) {
13195                 push @insert_list, $i - 1;
13196             }
13197             else {
13198                 push @insert_list, $i;
13199             }
13200         }
13201     }
13202
13203     # Break after a 'return' followed by a chain of operators
13204     #  return ( $^O !~ /win32|dos/i )
13205     #    && ( $^O ne 'VMS' )
13206     #    && ( $^O ne 'OS2' )
13207     #    && ( $^O ne 'MacOS' );
13208     # To give:
13209     #  return
13210     #       ( $^O !~ /win32|dos/i )
13211     #    && ( $^O ne 'VMS' )
13212     #    && ( $^O ne 'OS2' )
13213     #    && ( $^O ne 'MacOS' );
13214     my $i = 0;
13215     if (   $types_to_go[$i] eq 'k'
13216         && $tokens_to_go[$i] eq 'return'
13217         && $ir > $il
13218         && $nesting_depth_to_go[$i] eq $depth_beg )
13219     {
13220         push @insert_list, $i;
13221     }
13222
13223     return unless (@insert_list);
13224
13225     # One final check...
13226     # scan second and third lines and be sure there are no assignments
13227     # we want to avoid breaking at an = to make something like this:
13228     #    unless ( $icon =
13229     #           $html_icons{"$type-$state"}
13230     #        or $icon = $html_icons{$type}
13231     #        or $icon = $html_icons{$state} )
13232     for my $n ( 1 .. 2 ) {
13233         my $il = $ri_left->[$n];
13234         my $ir = $ri_right->[$n];
13235         foreach my $i ( $il + 1 .. $ir ) {
13236             my $type = $types_to_go[$i];
13237             return
13238               if ( $is_assignment{$type}
13239                 && $nesting_depth_to_go[$i] eq $depth_beg );
13240         }
13241     }
13242
13243     # ok, insert any new break point
13244     if (@insert_list) {
13245         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
13246     }
13247     return;
13248 }
13249
13250 {    ## begin closure recombine_breakpoints
13251
13252     # This routine is called once per batch to see if it would be better
13253     # to combine some of the lines into which the batch has been broken.
13254
13255     my %is_amp_amp;
13256     my %is_ternary;
13257     my %is_math_op;
13258     my %is_plus_minus;
13259     my %is_mult_div;
13260
13261     BEGIN {
13262
13263         my @q;
13264         @q = qw( && || );
13265         @is_amp_amp{@q} = (1) x scalar(@q);
13266
13267         @q = qw( ? : );
13268         @is_ternary{@q} = (1) x scalar(@q);
13269
13270         @q = qw( + - * / );
13271         @is_math_op{@q} = (1) x scalar(@q);
13272
13273         @q = qw( + - );
13274         @is_plus_minus{@q} = (1) x scalar(@q);
13275
13276         @q = qw( * / );
13277         @is_mult_div{@q} = (1) x scalar(@q);
13278     }
13279
13280     sub Debug_dump_breakpoints {
13281
13282         # Debug routine to dump current breakpoints...not normally called
13283         # We are given indexes to the current lines:
13284         # $ri_beg = ref to array of BEGinning indexes of each line
13285         # $ri_end = ref to array of ENDing indexes of each line
13286         my ( $self, $ri_beg, $ri_end, $msg ) = @_;
13287         print STDERR "----Dumping breakpoints from: $msg----\n";
13288         for my $n ( 0 .. @{$ri_end} - 1 ) {
13289             my $ibeg = $ri_beg->[$n];
13290             my $iend = $ri_end->[$n];
13291             my $text = "";
13292             foreach my $i ( $ibeg .. $iend ) {
13293                 $text .= $tokens_to_go[$i];
13294             }
13295             print STDERR "$n ($ibeg:$iend) $text\n";
13296         }
13297         print STDERR "----\n";
13298         return;
13299     }
13300
13301     sub delete_one_line_semicolons {
13302
13303         my ( $self, $ri_beg, $ri_end ) = @_;
13304         my $rLL                 = $self->[_rLL_];
13305         my $K_opening_container = $self->[_K_opening_container_];
13306
13307         # Walk down the lines of this batch and delete any semicolons
13308         # terminating one-line blocks;
13309         my $nmax = @{$ri_end} - 1;
13310
13311         foreach my $n ( 0 .. $nmax ) {
13312             my $i_beg    = $ri_beg->[$n];
13313             my $i_e      = $ri_end->[$n];
13314             my $K_beg    = $K_to_go[$i_beg];
13315             my $K_e      = $K_to_go[$i_e];
13316             my $K_end    = $K_e;
13317             my $type_end = $rLL->[$K_end]->[_TYPE_];
13318             if ( $type_end eq '#' ) {
13319                 $K_end = $self->K_previous_nonblank($K_end);
13320                 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
13321             }
13322
13323             # we are looking for a line ending in closing brace
13324             next
13325               unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
13326
13327             # ...and preceded by a semicolon on the same line
13328             my $K_semicolon = $self->K_previous_nonblank($K_end);
13329             next unless defined($K_semicolon);
13330             my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
13331             next if ( $i_semicolon <= $i_beg );
13332             next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
13333
13334             # Safety check - shouldn't happen - not critical
13335             # This is not worth throwing a Fault, except in DEVEL_MODE
13336             if ( $types_to_go[$i_semicolon] ne ';' ) {
13337                 DEVEL_MODE
13338                   && Fault("unexpected type looking for semicolon");
13339                 next;
13340             }
13341
13342             # ... with the corresponding opening brace on the same line
13343             my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
13344             my $K_opening     = $K_opening_container->{$type_sequence};
13345             next unless ( defined($K_opening) );
13346             my $i_opening = $i_beg + ( $K_opening - $K_beg );
13347             next if ( $i_opening < $i_beg );
13348
13349             # ... and only one semicolon between these braces
13350             my $semicolon_count = 0;
13351             foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
13352                 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
13353                     $semicolon_count++;
13354                     last;
13355                 }
13356             }
13357             next if ($semicolon_count);
13358
13359             # ...ok, then make the semicolon invisible
13360             $tokens_to_go[$i_semicolon]            = "";
13361             $token_lengths_to_go[$i_semicolon]     = 0;
13362             $rLL->[$K_semicolon]->[_TOKEN_]        = "";
13363             $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
13364         }
13365         return;
13366     }
13367
13368     use constant DEBUG_RECOMBINE => 0;
13369
13370     sub recombine_breakpoints {
13371
13372         # sub set_continuation_breaks is very liberal in setting line breaks
13373         # for long lines, always setting breaks at good breakpoints, even
13374         # when that creates small lines.  Sometimes small line fragments
13375         # are produced which would look better if they were combined.
13376         # That's the task of this routine.
13377         #
13378         # We are given indexes to the current lines:
13379         # $ri_beg = ref to array of BEGinning indexes of each line
13380         # $ri_end = ref to array of ENDing indexes of each line
13381         my ( $self, $ri_beg, $ri_end ) = @_;
13382
13383         my $rK_weld_right = $self->[_rK_weld_right_];
13384         my $rK_weld_left  = $self->[_rK_weld_left_];
13385
13386         # Make a list of all good joining tokens between the lines
13387         # n-1 and n.
13388         my @joint;
13389         my $nmax = @{$ri_end} - 1;
13390         for my $n ( 1 .. $nmax ) {
13391             my $ibeg_1 = $ri_beg->[ $n - 1 ];
13392             my $iend_1 = $ri_end->[ $n - 1 ];
13393             my $iend_2 = $ri_end->[$n];
13394             my $ibeg_2 = $ri_beg->[$n];
13395
13396             my ( $itok, $itokp, $itokm );
13397
13398             foreach my $itest ( $iend_1, $ibeg_2 ) {
13399                 my $type = $types_to_go[$itest];
13400                 if (   $is_math_op{$type}
13401                     || $is_amp_amp{$type}
13402                     || $is_assignment{$type}
13403                     || $type eq ':' )
13404                 {
13405                     $itok = $itest;
13406                 }
13407             }
13408             $joint[$n] = [$itok];
13409         }
13410
13411         my $more_to_do = 1;
13412
13413         # We keep looping over all of the lines of this batch
13414         # until there are no more possible recombinations
13415         my $nmax_last = @{$ri_end};
13416         my $reverse   = 0;
13417         while ($more_to_do) {
13418             my $n_best = 0;
13419             my $bs_best;
13420             my $nmax = @{$ri_end} - 1;
13421
13422             # Safety check for infinite loop
13423             unless ( $nmax < $nmax_last ) {
13424
13425                 # Shouldn't happen because splice below decreases nmax on each
13426                 # iteration.  An error can only be due to a recent programming
13427                 # change.
13428                 Fault("Program bug-infinite loop in recombine breakpoints\n");
13429             }
13430             $nmax_last  = $nmax;
13431             $more_to_do = 0;
13432             my $skip_Section_3;
13433             my $leading_amp_count = 0;
13434             my $this_line_is_semicolon_terminated;
13435
13436             # loop over all remaining lines in this batch
13437             for my $iter ( 1 .. $nmax ) {
13438
13439                 # alternating sweep direction gives symmetric results
13440                 # for recombining lines which exceed the line length
13441                 # such as eval {{{{.... }}}}
13442                 my $n;
13443                 if   ($reverse) { $n = 1 + $nmax - $iter; }
13444                 else            { $n = $iter }
13445
13446                 #----------------------------------------------------------
13447                 # If we join the current pair of lines,
13448                 # line $n-1 will become the left part of the joined line
13449                 # line $n will become the right part of the joined line
13450                 #
13451                 # Here are Indexes of the endpoint tokens of the two lines:
13452                 #
13453                 #  -----line $n-1--- | -----line $n-----
13454                 #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
13455                 #                    ^
13456                 #                    |
13457                 # We want to decide if we should remove the line break
13458                 # between the tokens at $iend_1 and $ibeg_2
13459                 #
13460                 # We will apply a number of ad-hoc tests to see if joining
13461                 # here will look ok.  The code will just issue a 'next'
13462                 # command if the join doesn't look good.  If we get through
13463                 # the gauntlet of tests, the lines will be recombined.
13464                 #----------------------------------------------------------
13465                 #
13466                 # beginning and ending tokens of the lines we are working on
13467                 my $ibeg_1    = $ri_beg->[ $n - 1 ];
13468                 my $iend_1    = $ri_end->[ $n - 1 ];
13469                 my $iend_2    = $ri_end->[$n];
13470                 my $ibeg_2    = $ri_beg->[$n];
13471                 my $ibeg_nmax = $ri_beg->[$nmax];
13472
13473                 # combined line cannot be too long
13474                 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
13475                 next if ( $excess > 0 );
13476
13477                 my $type_iend_1 = $types_to_go[$iend_1];
13478                 my $type_iend_2 = $types_to_go[$iend_2];
13479                 my $type_ibeg_1 = $types_to_go[$ibeg_1];
13480                 my $type_ibeg_2 = $types_to_go[$ibeg_2];
13481
13482                 # terminal token of line 2 if any side comment is ignored:
13483                 my $iend_2t      = $iend_2;
13484                 my $type_iend_2t = $type_iend_2;
13485
13486                 # some beginning indexes of other lines, which may not exist
13487                 my $ibeg_0 = $n > 1          ? $ri_beg->[ $n - 2 ] : -1;
13488                 my $ibeg_3 = $n < $nmax      ? $ri_beg->[ $n + 1 ] : -1;
13489                 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
13490
13491                 my $bs_tweak = 0;
13492
13493                 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
13494                 #        $nesting_depth_to_go[$ibeg_1] );
13495
13496                 DEBUG_RECOMBINE && do {
13497                     print STDERR
13498 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
13499                 };
13500
13501                 # If line $n is the last line, we set some flags and
13502                 # do any special checks for it
13503                 if ( $n == $nmax ) {
13504
13505                     # a terminal '{' should stay where it is
13506                     # unless preceded by a fat comma
13507                     next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
13508
13509                     if (   $type_iend_2 eq '#'
13510                         && $iend_2 - $ibeg_2 >= 2
13511                         && $types_to_go[ $iend_2 - 1 ] eq 'b' )
13512                     {
13513                         $iend_2t      = $iend_2 - 2;
13514                         $type_iend_2t = $types_to_go[$iend_2t];
13515                     }
13516
13517                     $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
13518                 }
13519
13520                 #----------------------------------------------------------
13521                 # Recombine Section 0:
13522                 # Examine the special token joining this line pair, if any.
13523                 # Put as many tests in this section to avoid duplicate code and
13524                 # to make formatting independent of whether breaks are to the
13525                 # left or right of an operator.
13526                 #----------------------------------------------------------
13527
13528                 my ($itok) = @{ $joint[$n] };
13529                 if ($itok) {
13530
13531                     my $type = $types_to_go[$itok];
13532
13533                     if ( $type eq ':' ) {
13534
13535                         # do not join at a colon unless it disobeys the break
13536                         # request
13537                         if ( $itok eq $iend_1 ) {
13538                             next unless $want_break_before{$type};
13539                         }
13540                         else {
13541                             $leading_amp_count++;
13542                             next if $want_break_before{$type};
13543                         }
13544                     } ## end if ':'
13545
13546                     # handle math operators + - * /
13547                     elsif ( $is_math_op{$type} ) {
13548
13549                         # Combine these lines if this line is a single
13550                         # number, or if it is a short term with same
13551                         # operator as the previous line.  For example, in
13552                         # the following code we will combine all of the
13553                         # short terms $A, $B, $C, $D, $E, $F, together
13554                         # instead of leaving them one per line:
13555                         #  my $time =
13556                         #    $A * $B * $C * $D * $E * $F *
13557                         #    ( 2. * $eps * $sigma * $area ) *
13558                         #    ( 1. / $tcold**3 - 1. / $thot**3 );
13559
13560                         # This can be important in math-intensive code.
13561
13562                         my $good_combo;
13563
13564                         my $itokp  = min( $inext_to_go[$itok],  $iend_2 );
13565                         my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
13566                         my $itokm  = max( $iprev_to_go[$itok],  $ibeg_1 );
13567                         my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
13568
13569                         # check for a number on the right
13570                         if ( $types_to_go[$itokp] eq 'n' ) {
13571
13572                             # ok if nothing else on right
13573                             if ( $itokp == $iend_2 ) {
13574                                 $good_combo = 1;
13575                             }
13576                             else {
13577
13578                                 # look one more token to right..
13579                                 # okay if math operator or some termination
13580                                 $good_combo =
13581                                   ( ( $itokpp == $iend_2 )
13582                                       && $is_math_op{ $types_to_go[$itokpp] } )
13583                                   || $types_to_go[$itokpp] =~ /^[#,;]$/;
13584                             }
13585                         }
13586
13587                         # check for a number on the left
13588                         if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
13589
13590                             # okay if nothing else to left
13591                             if ( $itokm == $ibeg_1 ) {
13592                                 $good_combo = 1;
13593                             }
13594
13595                             # otherwise look one more token to left
13596                             else {
13597
13598                                 # okay if math operator, comma, or assignment
13599                                 $good_combo = ( $itokmm == $ibeg_1 )
13600                                   && ( $is_math_op{ $types_to_go[$itokmm] }
13601                                     || $types_to_go[$itokmm] =~ /^[,]$/
13602                                     || $is_assignment{ $types_to_go[$itokmm] }
13603                                   );
13604                             }
13605                         }
13606
13607                         # look for a single short token either side of the
13608                         # operator
13609                         if ( !$good_combo ) {
13610
13611                             # Slight adjustment factor to make results
13612                             # independent of break before or after operator in
13613                             # long summed lists.  (An operator and a space make
13614                             # two spaces).
13615                             my $two = ( $itok eq $iend_1 ) ? 2 : 0;
13616
13617                             $good_combo =
13618
13619                               # numbers or id's on both sides of this joint
13620                               $types_to_go[$itokp] =~ /^[in]$/
13621                               && $types_to_go[$itokm] =~ /^[in]$/
13622
13623                               # one of the two lines must be short:
13624                               && (
13625                                 (
13626                                     # no more than 2 nonblank tokens right of
13627                                     # joint
13628                                     $itokpp == $iend_2
13629
13630                                     # short
13631                                     && token_sequence_length( $itokp, $iend_2 )
13632                                     < $two +
13633                                     $rOpts_short_concatenation_item_length
13634                                 )
13635                                 || (
13636                                     # no more than 2 nonblank tokens left of
13637                                     # joint
13638                                     $itokmm == $ibeg_1
13639
13640                                     # short
13641                                     && token_sequence_length( $ibeg_1, $itokm )
13642                                     < 2 - $two +
13643                                     $rOpts_short_concatenation_item_length
13644                                 )
13645
13646                               )
13647
13648                               # keep pure terms; don't mix +- with */
13649                               && !(
13650                                 $is_plus_minus{$type}
13651                                 && (   $is_mult_div{ $types_to_go[$itokmm] }
13652                                     || $is_mult_div{ $types_to_go[$itokpp] } )
13653                               )
13654                               && !(
13655                                 $is_mult_div{$type}
13656                                 && (   $is_plus_minus{ $types_to_go[$itokmm] }
13657                                     || $is_plus_minus{ $types_to_go[$itokpp] } )
13658                               )
13659
13660                               ;
13661                         }
13662
13663                         # it is also good to combine if we can reduce to 2 lines
13664                         if ( !$good_combo ) {
13665
13666                             # index on other line where same token would be in a
13667                             # long chain.
13668                             my $iother =
13669                               ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
13670
13671                             $good_combo =
13672                                  $n == 2
13673                               && $n == $nmax
13674                               && $types_to_go[$iother] ne $type;
13675                         }
13676
13677                         next unless ($good_combo);
13678
13679                     } ## end math
13680
13681                     elsif ( $is_amp_amp{$type} ) {
13682                         ##TBD
13683                     } ## end &&, ||
13684
13685                     elsif ( $is_assignment{$type} ) {
13686                         ##TBD
13687                     } ## end assignment
13688                 }
13689
13690                 #----------------------------------------------------------
13691                 # Recombine Section 1:
13692                 # Join welded nested containers immediately
13693                 #----------------------------------------------------------
13694
13695                 if (
13696                     $total_weld_count
13697                     && ( $type_sequence_to_go[$iend_1]
13698                         && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
13699                         || $type_sequence_to_go[$ibeg_2]
13700                         && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
13701                   )
13702                 {
13703                     $n_best = $n;
13704                     last;
13705                 }
13706
13707                 $reverse = 0;
13708
13709                 #----------------------------------------------------------
13710                 # Recombine Section 2:
13711                 # Examine token at $iend_1 (right end of first line of pair)
13712                 #----------------------------------------------------------
13713
13714                 # an isolated '}' may join with a ';' terminated segment
13715                 if ( $type_iend_1 eq '}' ) {
13716
13717                     # Check for cases where combining a semicolon terminated
13718                     # statement with a previous isolated closing paren will
13719                     # allow the combined line to be outdented.  This is
13720                     # generally a good move.  For example, we can join up
13721                     # the last two lines here:
13722                     #  (
13723                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
13724                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
13725                     #    )
13726                     #    = stat($file);
13727                     #
13728                     # to get:
13729                     #  (
13730                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
13731                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
13732                     #  ) = stat($file);
13733                     #
13734                     # which makes the parens line up.
13735                     #
13736                     # Another example, from Joe Matarazzo, probably looks best
13737                     # with the 'or' clause appended to the trailing paren:
13738                     #  $self->some_method(
13739                     #      PARAM1 => 'foo',
13740                     #      PARAM2 => 'bar'
13741                     #  ) or die "Some_method didn't work";
13742                     #
13743                     # But we do not want to do this for something like the -lp
13744                     # option where the paren is not outdentable because the
13745                     # trailing clause will be far to the right.
13746                     #
13747                     # The logic here is synchronized with the logic in sub
13748                     # sub set_adjusted_indentation, which actually does
13749                     # the outdenting.
13750                     #
13751                     $skip_Section_3 ||= $this_line_is_semicolon_terminated
13752
13753                       # only one token on last line
13754                       && $ibeg_1 == $iend_1
13755
13756                       # must be structural paren
13757                       && $tokens_to_go[$iend_1] eq ')'
13758
13759                       # style must allow outdenting,
13760                       && !$closing_token_indentation{')'}
13761
13762                       # only leading '&&', '||', and ':' if no others seen
13763                       # (but note: our count made below could be wrong
13764                       # due to intervening comments)
13765                       && ( $leading_amp_count == 0
13766                         || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
13767
13768                       # but leading colons probably line up with a
13769                       # previous colon or question (count could be wrong).
13770                       && $type_ibeg_2 ne ':'
13771
13772                       # only one step in depth allowed.  this line must not
13773                       # begin with a ')' itself.
13774                       && ( $nesting_depth_to_go[$iend_1] ==
13775                         $nesting_depth_to_go[$iend_2] + 1 );
13776
13777                     # YVES patch 2 of 2:
13778                     # Allow cuddled eval chains, like this:
13779                     #   eval {
13780                     #       #STUFF;
13781                     #       1; # return true
13782                     #   } or do {
13783                     #       #handle error
13784                     #   };
13785                     # This patch works together with a patch in
13786                     # setting adjusted indentation (where the closing eval
13787                     # brace is outdented if possible).
13788                     # The problem is that an 'eval' block has continuation
13789                     # indentation and it looks better to undo it in some
13790                     # cases.  If we do not use this patch we would get:
13791                     #   eval {
13792                     #       #STUFF;
13793                     #       1; # return true
13794                     #       }
13795                     #       or do {
13796                     #       #handle error
13797                     #     };
13798                     # The alternative, for uncuddled style, is to create
13799                     # a patch in set_adjusted_indentation which undoes
13800                     # the indentation of a leading line like 'or do {'.
13801                     # This doesn't work well with -icb through
13802                     if (
13803                            $block_type_to_go[$iend_1] eq 'eval'
13804                         && !$rOpts->{'line-up-parentheses'}
13805                         && !$rOpts->{'indent-closing-brace'}
13806                         && $tokens_to_go[$iend_2] eq '{'
13807                         && (
13808                             ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
13809                             || (   $type_ibeg_2 eq 'k'
13810                                 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
13811                             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
13812                         )
13813                       )
13814                     {
13815                         $skip_Section_3 ||= 1;
13816                     }
13817
13818                     next
13819                       unless (
13820                         $skip_Section_3
13821
13822                         # handle '.' and '?' specially below
13823                         || ( $type_ibeg_2 =~ /^[\.\?]$/ )
13824                       );
13825                 }
13826
13827                 elsif ( $type_iend_1 eq '{' ) {
13828
13829                     # YVES
13830                     # honor breaks at opening brace
13831                     # Added to prevent recombining something like this:
13832                     #  } || eval { package main;
13833                     next if $forced_breakpoint_to_go[$iend_1];
13834                 }
13835
13836                 # do not recombine lines with ending &&, ||,
13837                 elsif ( $is_amp_amp{$type_iend_1} ) {
13838                     next unless $want_break_before{$type_iend_1};
13839                 }
13840
13841                 # Identify and recombine a broken ?/: chain
13842                 elsif ( $type_iend_1 eq '?' ) {
13843
13844                     # Do not recombine different levels
13845                     next
13846                       if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
13847
13848                     # do not recombine unless next line ends in :
13849                     next unless $type_iend_2 eq ':';
13850                 }
13851
13852                 # for lines ending in a comma...
13853                 elsif ( $type_iend_1 eq ',' ) {
13854
13855                     # Do not recombine at comma which is following the
13856                     # input bias.
13857                     # TODO: might be best to make a special flag
13858                     next if ( $old_breakpoint_to_go[$iend_1] );
13859
13860                     # An isolated '},' may join with an identifier + ';'
13861                     # This is useful for the class of a 'bless' statement
13862                     # (bless.t)
13863                     if (   $type_ibeg_1 eq '}'
13864                         && $type_ibeg_2 eq 'i' )
13865                     {
13866                         next
13867                           unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
13868                             && ( $iend_2 == ( $ibeg_2 + 1 ) )
13869                             && $this_line_is_semicolon_terminated );
13870
13871                         # override breakpoint
13872                         $forced_breakpoint_to_go[$iend_1] = 0;
13873                     }
13874
13875                     # but otherwise ..
13876                     else {
13877
13878                         # do not recombine after a comma unless this will leave
13879                         # just 1 more line
13880                         next unless ( $n + 1 >= $nmax );
13881
13882                     # do not recombine if there is a change in indentation depth
13883                         next
13884                           if (
13885                             $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
13886
13887                         # do not recombine a "complex expression" after a
13888                         # comma.  "complex" means no parens.
13889                         my $saw_paren;
13890                         foreach my $ii ( $ibeg_2 .. $iend_2 ) {
13891                             if ( $tokens_to_go[$ii] eq '(' ) {
13892                                 $saw_paren = 1;
13893                                 last;
13894                             }
13895                         }
13896                         next if $saw_paren;
13897                     }
13898                 }
13899
13900                 # opening paren..
13901                 elsif ( $type_iend_1 eq '(' ) {
13902
13903                     # No longer doing this
13904                 }
13905
13906                 elsif ( $type_iend_1 eq ')' ) {
13907
13908                     # No longer doing this
13909                 }
13910
13911                 # keep a terminal for-semicolon
13912                 elsif ( $type_iend_1 eq 'f' ) {
13913                     next;
13914                 }
13915
13916                 # if '=' at end of line ...
13917                 elsif ( $is_assignment{$type_iend_1} ) {
13918
13919                     # keep break after = if it was in input stream
13920                     # this helps prevent 'blinkers'
13921                     next if $old_breakpoint_to_go[$iend_1]
13922
13923                       # don't strand an isolated '='
13924                       && $iend_1 != $ibeg_1;
13925
13926                     my $is_short_quote =
13927                       (      $type_ibeg_2 eq 'Q'
13928                           && $ibeg_2 == $iend_2
13929                           && token_sequence_length( $ibeg_2, $ibeg_2 ) <
13930                           $rOpts_short_concatenation_item_length );
13931                     my $is_ternary =
13932                       ( $type_ibeg_1 eq '?'
13933                           && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
13934
13935                     # always join an isolated '=', a short quote, or if this
13936                     # will put ?/: at start of adjacent lines
13937                     if (   $ibeg_1 != $iend_1
13938                         && !$is_short_quote
13939                         && !$is_ternary )
13940                     {
13941                         next
13942                           unless (
13943                             (
13944
13945                                 # unless we can reduce this to two lines
13946                                 $nmax < $n + 2
13947
13948                              # or three lines, the last with a leading semicolon
13949                                 || (   $nmax == $n + 2
13950                                     && $types_to_go[$ibeg_nmax] eq ';' )
13951
13952                                 # or the next line ends with a here doc
13953                                 || $type_iend_2 eq 'h'
13954
13955                                # or the next line ends in an open paren or brace
13956                                # and the break hasn't been forced [dima.t]
13957                                 || (  !$forced_breakpoint_to_go[$iend_1]
13958                                     && $type_iend_2 eq '{' )
13959                             )
13960
13961                             # do not recombine if the two lines might align well
13962                             # this is a very approximate test for this
13963                             && (
13964
13965                               # RT#127633 - the leading tokens are not operators
13966                                 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
13967
13968                                 # or they are different
13969                                 || (   $ibeg_3 >= 0
13970                                     && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
13971                             )
13972                           );
13973
13974                         if (
13975
13976                             # Recombine if we can make two lines
13977                             $nmax >= $n + 2
13978
13979                             # -lp users often prefer this:
13980                             #  my $title = function($env, $env, $sysarea,
13981                             #                       "bubba Borrower Entry");
13982                             #  so we will recombine if -lp is used we have
13983                             #  ending comma
13984                             && (  !$rOpts_line_up_parentheses
13985                                 || $type_iend_2 ne ',' )
13986                           )
13987                         {
13988
13989                            # otherwise, scan the rhs line up to last token for
13990                            # complexity.  Note that we are not counting the last
13991                            # token in case it is an opening paren.
13992                             my $tv    = 0;
13993                             my $depth = $nesting_depth_to_go[$ibeg_2];
13994                             foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
13995                                 if ( $nesting_depth_to_go[$i] != $depth ) {
13996                                     $tv++;
13997                                     last if ( $tv > 1 );
13998                                 }
13999                                 $depth = $nesting_depth_to_go[$i];
14000                             }
14001
14002                          # ok to recombine if no level changes before last token
14003                             if ( $tv > 0 ) {
14004
14005                                 # otherwise, do not recombine if more than two
14006                                 # level changes.
14007                                 next if ( $tv > 1 );
14008
14009                               # check total complexity of the two adjacent lines
14010                               # that will occur if we do this join
14011                                 my $istop =
14012                                   ( $n < $nmax )
14013                                   ? $ri_end->[ $n + 1 ]
14014                                   : $iend_2;
14015                                 foreach my $i ( $iend_2 .. $istop ) {
14016                                     if ( $nesting_depth_to_go[$i] != $depth ) {
14017                                         $tv++;
14018                                         last if ( $tv > 2 );
14019                                     }
14020                                     $depth = $nesting_depth_to_go[$i];
14021                                 }
14022
14023                         # do not recombine if total is more than 2 level changes
14024                                 next if ( $tv > 2 );
14025                             }
14026                         }
14027                     }
14028
14029                     unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
14030                         $forced_breakpoint_to_go[$iend_1] = 0;
14031                     }
14032                 }
14033
14034                 # for keywords..
14035                 elsif ( $type_iend_1 eq 'k' ) {
14036
14037                     # make major control keywords stand out
14038                     # (recombine.t)
14039                     next
14040                       if (
14041
14042                         #/^(last|next|redo|return)$/
14043                         $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
14044
14045                         # but only if followed by multiple lines
14046                         && $n < $nmax
14047                       );
14048
14049                     if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
14050                         next
14051                           unless $want_break_before{ $tokens_to_go[$iend_1] };
14052                     }
14053                 }
14054
14055                 #----------------------------------------------------------
14056                 # Recombine Section 3:
14057                 # Examine token at $ibeg_2 (left end of second line of pair)
14058                 #----------------------------------------------------------
14059
14060                 # join lines identified above as capable of
14061                 # causing an outdented line with leading closing paren
14062                 # Note that we are skipping the rest of this section
14063                 # and the rest of the loop to do the join
14064                 if ($skip_Section_3) {
14065                     $forced_breakpoint_to_go[$iend_1] = 0;
14066                     $n_best = $n;
14067                     last;
14068                 }
14069
14070                 # handle lines with leading &&, ||
14071                 elsif ( $is_amp_amp{$type_ibeg_2} ) {
14072
14073                     $leading_amp_count++;
14074
14075                     # ok to recombine if it follows a ? or :
14076                     # and is followed by an open paren..
14077                     my $ok =
14078                       (      $is_ternary{$type_ibeg_1}
14079                           && $tokens_to_go[$iend_2] eq '(' )
14080
14081                     # or is followed by a ? or : at same depth
14082                     #
14083                     # We are looking for something like this. We can
14084                     # recombine the && line with the line above to make the
14085                     # structure more clear:
14086                     #  return
14087                     #    exists $G->{Attr}->{V}
14088                     #    && exists $G->{Attr}->{V}->{$u}
14089                     #    ? %{ $G->{Attr}->{V}->{$u} }
14090                     #    : ();
14091                     #
14092                     # We should probably leave something like this alone:
14093                     #  return
14094                     #       exists $G->{Attr}->{E}
14095                     #    && exists $G->{Attr}->{E}->{$u}
14096                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
14097                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
14098                     #    : ();
14099                     # so that we either have all of the &&'s (or ||'s)
14100                     # on one line, as in the first example, or break at
14101                     # each one as in the second example.  However, it
14102                     # sometimes makes things worse to check for this because
14103                     # it prevents multiple recombinations.  So this is not done.
14104                       || ( $ibeg_3 >= 0
14105                         && $is_ternary{ $types_to_go[$ibeg_3] }
14106                         && $nesting_depth_to_go[$ibeg_3] ==
14107                         $nesting_depth_to_go[$ibeg_2] );
14108
14109                     next if !$ok && $want_break_before{$type_ibeg_2};
14110                     $forced_breakpoint_to_go[$iend_1] = 0;
14111
14112                     # tweak the bond strength to give this joint priority
14113                     # over ? and :
14114                     $bs_tweak = 0.25;
14115                 }
14116
14117                 # Identify and recombine a broken ?/: chain
14118                 elsif ( $type_ibeg_2 eq '?' ) {
14119
14120                     # Do not recombine different levels
14121                     my $lev = $levels_to_go[$ibeg_2];
14122                     next if ( $lev ne $levels_to_go[$ibeg_1] );
14123
14124                     # Do not recombine a '?' if either next line or
14125                     # previous line does not start with a ':'.  The reasons
14126                     # are that (1) no alignment of the ? will be possible
14127                     # and (2) the expression is somewhat complex, so the
14128                     # '?' is harder to see in the interior of the line.
14129                     my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
14130                     my $precedes_colon =
14131                       $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
14132                     next unless ( $follows_colon || $precedes_colon );
14133
14134                     # we will always combining a ? line following a : line
14135                     if ( !$follows_colon ) {
14136
14137                         # ...otherwise recombine only if it looks like a chain.
14138                         # we will just look at a few nearby lines to see if
14139                         # this looks like a chain.
14140                         my $local_count = 0;
14141                         foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
14142                             $local_count++
14143                               if $ii >= 0
14144                               && $types_to_go[$ii] eq ':'
14145                               && $levels_to_go[$ii] == $lev;
14146                         }
14147                         next unless ( $local_count > 1 );
14148                     }
14149                     $forced_breakpoint_to_go[$iend_1] = 0;
14150                 }
14151
14152                 # do not recombine lines with leading '.'
14153                 elsif ( $type_ibeg_2 eq '.' ) {
14154                     my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
14155                     next
14156                       unless (
14157
14158                    # ... unless there is just one and we can reduce
14159                    # this to two lines if we do.  For example, this
14160                    #
14161                    #
14162                    #  $bodyA .=
14163                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
14164                    #
14165                    #  looks better than this:
14166                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
14167                    #    . '$args .= $pat;'
14168
14169                         (
14170                                $n == 2
14171                             && $n == $nmax
14172                             && $type_ibeg_1 ne $type_ibeg_2
14173                         )
14174
14175                         #  ... or this would strand a short quote , like this
14176                         #                . "some long quote"
14177                         #                . "\n";
14178
14179                         || (   $types_to_go[$i_next_nonblank] eq 'Q'
14180                             && $i_next_nonblank >= $iend_2 - 1
14181                             && $token_lengths_to_go[$i_next_nonblank] <
14182                             $rOpts_short_concatenation_item_length )
14183                       );
14184                 }
14185
14186                 # handle leading keyword..
14187                 elsif ( $type_ibeg_2 eq 'k' ) {
14188
14189                     # handle leading "or"
14190                     if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
14191                         next
14192                           unless (
14193                             $this_line_is_semicolon_terminated
14194                             && (
14195                                 $type_ibeg_1 eq '}'
14196                                 || (
14197
14198                                     # following 'if' or 'unless' or 'or'
14199                                     $type_ibeg_1 eq 'k'
14200                                     && $is_if_unless{ $tokens_to_go[$ibeg_1] }
14201
14202                                     # important: only combine a very simple or
14203                                     # statement because the step below may have
14204                                     # combined a trailing 'and' with this or,
14205                                     # and we do not want to then combine
14206                                     # everything together
14207                                     && ( $iend_2 - $ibeg_2 <= 7 )
14208                                 )
14209                             )
14210                           );
14211
14212                         #X: RT #81854
14213                         $forced_breakpoint_to_go[$iend_1] = 0
14214                           unless $old_breakpoint_to_go[$iend_1];
14215                     }
14216
14217                     # handle leading 'and' and 'xor'
14218                     elsif ($tokens_to_go[$ibeg_2] eq 'and'
14219                         || $tokens_to_go[$ibeg_2] eq 'xor' )
14220                     {
14221
14222                         # Decide if we will combine a single terminal 'and'
14223                         # after an 'if' or 'unless'.
14224
14225                         #     This looks best with the 'and' on the same
14226                         #     line as the 'if':
14227                         #
14228                         #         $a = 1
14229                         #           if $seconds and $nu < 2;
14230                         #
14231                         #     But this looks better as shown:
14232                         #
14233                         #         $a = 1
14234                         #           if !$this->{Parents}{$_}
14235                         #           or $this->{Parents}{$_} eq $_;
14236                         #
14237                         next
14238                           unless (
14239                             $this_line_is_semicolon_terminated
14240                             && (
14241
14242                                 # following 'if' or 'unless' or 'or'
14243                                 $type_ibeg_1 eq 'k'
14244                                 && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
14245                                     || $tokens_to_go[$ibeg_1] eq 'or' )
14246                             )
14247                           );
14248                     }
14249
14250                     # handle leading "if" and "unless"
14251                     elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
14252
14253                         # Combine something like:
14254                         #    next
14255                         #      if ( $lang !~ /${l}$/i );
14256                         # into:
14257                         #    next if ( $lang !~ /${l}$/i );
14258                         next
14259                           unless (
14260                             $this_line_is_semicolon_terminated
14261
14262                             #  previous line begins with 'and' or 'or'
14263                             && $type_ibeg_1 eq 'k'
14264                             && $is_and_or{ $tokens_to_go[$ibeg_1] }
14265
14266                           );
14267                     }
14268
14269                     # handle all other leading keywords
14270                     else {
14271
14272                         # keywords look best at start of lines,
14273                         # but combine things like "1 while"
14274                         unless ( $is_assignment{$type_iend_1} ) {
14275                             next
14276                               if ( ( $type_iend_1 ne 'k' )
14277                                 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
14278                         }
14279                     }
14280                 }
14281
14282                 # similar treatment of && and || as above for 'and' and 'or':
14283                 # NOTE: This block of code is currently bypassed because
14284                 # of a previous block but is retained for possible future use.
14285                 elsif ( $is_amp_amp{$type_ibeg_2} ) {
14286
14287                     # maybe looking at something like:
14288                     # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
14289
14290                     next
14291                       unless (
14292                         $this_line_is_semicolon_terminated
14293
14294                         # previous line begins with an 'if' or 'unless' keyword
14295                         && $type_ibeg_1 eq 'k'
14296                         && $is_if_unless{ $tokens_to_go[$ibeg_1] }
14297
14298                       );
14299                 }
14300
14301                 # handle line with leading = or similar
14302                 elsif ( $is_assignment{$type_ibeg_2} ) {
14303                     next unless ( $n == 1 || $n == $nmax );
14304                     next if $old_breakpoint_to_go[$iend_1];
14305                     next
14306                       unless (
14307
14308                         # unless we can reduce this to two lines
14309                         $nmax == 2
14310
14311                         # or three lines, the last with a leading semicolon
14312                         || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
14313
14314                         # or the next line ends with a here doc
14315                         || $type_iend_2 eq 'h'
14316
14317                         # or this is a short line ending in ;
14318                         || ( $n == $nmax && $this_line_is_semicolon_terminated )
14319                       );
14320                     $forced_breakpoint_to_go[$iend_1] = 0;
14321                 }
14322
14323                 #----------------------------------------------------------
14324                 # Recombine Section 4:
14325                 # Combine the lines if we arrive here and it is possible
14326                 #----------------------------------------------------------
14327
14328                 # honor hard breakpoints
14329                 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
14330
14331                 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
14332
14333                 # Require a few extra spaces before recombining lines if we are
14334                 # at an old breakpoint unless this is a simple list or terminal
14335                 # line.  The goal is to avoid oscillating between two
14336                 # quasi-stable end states.  For example this snippet caused
14337                 # problems:
14338 ##    my $this =
14339 ##    bless {
14340 ##        TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
14341 ##      },
14342 ##      $type;
14343                 next
14344                   if ( $old_breakpoint_to_go[$iend_1]
14345                     && !$this_line_is_semicolon_terminated
14346                     && $n < $nmax
14347                     && $excess + 4 > 0
14348                     && $type_iend_2 ne ',' );
14349
14350                 # do not recombine if we would skip in indentation levels
14351                 if ( $n < $nmax ) {
14352                     my $if_next = $ri_beg->[ $n + 1 ];
14353                     next
14354                       if (
14355                            $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
14356                         && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
14357
14358                         # but an isolated 'if (' is undesirable
14359                         && !(
14360                                $n == 1
14361                             && $iend_1 - $ibeg_1 <= 2
14362                             && $type_ibeg_1 eq 'k'
14363                             && $tokens_to_go[$ibeg_1] eq 'if'
14364                             && $tokens_to_go[$iend_1] ne '('
14365                         )
14366                       );
14367                 }
14368
14369                 # honor no-break's
14370                 next if ( $bs >= NO_BREAK - 1 );
14371
14372                 # remember the pair with the greatest bond strength
14373                 if ( !$n_best ) {
14374                     $n_best  = $n;
14375                     $bs_best = $bs;
14376                 }
14377                 else {
14378
14379                     if ( $bs > $bs_best ) {
14380                         $n_best  = $n;
14381                         $bs_best = $bs;
14382                     }
14383                 }
14384             }
14385
14386             # recombine the pair with the greatest bond strength
14387             if ($n_best) {
14388                 splice @{$ri_beg}, $n_best, 1;
14389                 splice @{$ri_end}, $n_best - 1, 1;
14390                 splice @joint, $n_best, 1;
14391
14392                 # keep going if we are still making progress
14393                 $more_to_do++;
14394             }
14395         }
14396         return ( $ri_beg, $ri_end );
14397     }
14398 } ## end closure recombine_breakpoints
14399
14400 sub insert_final_ternary_breaks {
14401
14402     my ( $self, $ri_left, $ri_right ) = @_;
14403
14404     # Called once per batch to look for and do any final line breaks for
14405     # long ternary chains
14406
14407     my $nmax = @{$ri_right} - 1;
14408
14409     # scan the left and right end tokens of all lines
14410     my $count         = 0;
14411     my $i_first_colon = -1;
14412     for my $n ( 0 .. $nmax ) {
14413         my $il    = $ri_left->[$n];
14414         my $ir    = $ri_right->[$n];
14415         my $typel = $types_to_go[$il];
14416         my $typer = $types_to_go[$ir];
14417         return if ( $typel eq '?' );
14418         return if ( $typer eq '?' );
14419         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
14420         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
14421     }
14422
14423     # For long ternary chains,
14424     # if the first : we see has its ? is in the interior
14425     # of a preceding line, then see if there are any good
14426     # breakpoints before the ?.
14427     if ( $i_first_colon > 0 ) {
14428         my $i_question = $mate_index_to_go[$i_first_colon];
14429         if ( $i_question > 0 ) {
14430             my @insert_list;
14431             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
14432                 my $token = $tokens_to_go[$ii];
14433                 my $type  = $types_to_go[$ii];
14434
14435                 # For now, a good break is either a comma or,
14436                 # in a long chain, a 'return'.
14437                 # Patch for RT #126633: added the $nmax>1 check to avoid
14438                 # breaking after a return for a simple ternary.  For longer
14439                 # chains the break after return allows vertical alignment, so
14440                 # it is still done.  So perltidy -wba='?' will not break
14441                 # immediately after the return in the following statement:
14442                 # sub x {
14443                 #    return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
14444                 #      'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
14445                 # }
14446                 if (
14447                     (
14448                            $type eq ','
14449                         || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
14450                     )
14451                     && $self->in_same_container_i( $ii, $i_question )
14452                   )
14453                 {
14454                     push @insert_list, $ii;
14455                     last;
14456                 }
14457             }
14458
14459             # insert any new break points
14460             if (@insert_list) {
14461                 $self->insert_additional_breaks( \@insert_list, $ri_left,
14462                     $ri_right );
14463             }
14464         }
14465     }
14466     return;
14467 }
14468
14469 sub insert_breaks_before_list_opening_containers {
14470
14471     my ( $self, $ri_left, $ri_right ) = @_;
14472
14473     # This routine is called once per batch to implement the parameters
14474     # --break-before-hash-brace, etc.
14475
14476     # Nothing to do if none of these parameters has been set
14477     return unless %break_before_container_types;
14478
14479     my $nmax = @{$ri_right} - 1;
14480     return unless ( $nmax >= 0 );
14481
14482     my $rLL = $self->[_rLL_];
14483
14484     my $rbreak_before_container_by_seqno =
14485       $self->[_rbreak_before_container_by_seqno_];
14486     my $rK_weld_left = $self->[_rK_weld_left_];
14487
14488     # scan the ends of all lines
14489     my @insert_list;
14490     for my $n ( 0 .. $nmax ) {
14491         my $il = $ri_left->[$n];
14492         my $ir = $ri_right->[$n];
14493         next unless ( $ir > $il );
14494         my $Kl       = $K_to_go[$il];
14495         my $Kr       = $K_to_go[$ir];
14496         my $Kend     = $Kr;
14497         my $type_end = $rLL->[$Kr]->[_TYPE_];
14498
14499         # Backup before any side comment
14500         if ( $type_end eq '#' ) {
14501             $Kend = $self->K_previous_nonblank($Kr);
14502             next unless defined($Kend);
14503             $type_end = $rLL->[$Kend]->[_TYPE_];
14504         }
14505
14506         # Backup to the start of any weld; fix for b1173.
14507         if ($total_weld_count) {
14508             my $Kend_test = $rK_weld_left->{$Kend};
14509             if ( defined($Kend_test) && $Kend_test > $Kl ) {
14510                 $Kend      = $Kend_test;
14511                 $Kend_test = $rK_weld_left->{$Kend};
14512             }
14513
14514             # Do not break if we did not back up to the start of a weld
14515             # (shouldn't happen)
14516             next if ( defined($Kend_test) );
14517         }
14518
14519         my $token = $rLL->[$Kend]->[_TOKEN_];
14520         next unless ( $is_opening_token{$token} );
14521         next unless ( $Kl < $Kend - 1 );
14522
14523         my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
14524         next unless ( defined($seqno) );
14525
14526         # Use the flag which was previously set
14527         next unless ( $rbreak_before_container_by_seqno->{$seqno} );
14528
14529         # Install a break before this opening token.
14530         my $Kbreak = $self->K_previous_nonblank($Kend);
14531         my $ibreak = $Kbreak - $Kl + $il;
14532         next if ( $ibreak < $il );
14533         next if ( $nobreak_to_go[$ibreak] );
14534         push @insert_list, $ibreak;
14535     }
14536
14537     # insert any new break points
14538     if (@insert_list) {
14539         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
14540     }
14541     return;
14542 }
14543
14544 sub note_added_semicolon {
14545     my ( $self, $line_number ) = @_;
14546     $self->[_last_added_semicolon_at_] = $line_number;
14547     if ( $self->[_added_semicolon_count_] == 0 ) {
14548         $self->[_first_added_semicolon_at_] = $line_number;
14549     }
14550     $self->[_added_semicolon_count_]++;
14551     write_logfile_entry("Added ';' here\n");
14552     return;
14553 }
14554
14555 sub note_deleted_semicolon {
14556     my ( $self, $line_number ) = @_;
14557     $self->[_last_deleted_semicolon_at_] = $line_number;
14558     if ( $self->[_deleted_semicolon_count_] == 0 ) {
14559         $self->[_first_deleted_semicolon_at_] = $line_number;
14560     }
14561     $self->[_deleted_semicolon_count_]++;
14562     write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
14563     return;
14564 }
14565
14566 sub note_embedded_tab {
14567     my ( $self, $line_number ) = @_;
14568     $self->[_embedded_tab_count_]++;
14569     $self->[_last_embedded_tab_at_] = $line_number;
14570     if ( !$self->[_first_embedded_tab_at_] ) {
14571         $self->[_first_embedded_tab_at_] = $line_number;
14572     }
14573
14574     if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
14575         write_logfile_entry("Embedded tabs in quote or pattern\n");
14576     }
14577     return;
14578 }
14579
14580 sub correct_lp_indentation {
14581
14582     # When the -lp option is used, we need to make a last pass through
14583     # each line to correct the indentation positions in case they differ
14584     # from the predictions.  This is necessary because perltidy uses a
14585     # predictor/corrector method for aligning with opening parens.  The
14586     # predictor is usually good, but sometimes stumbles.  The corrector
14587     # tries to patch things up once the actual opening paren locations
14588     # are known.
14589     my ( $self, $ri_first, $ri_last ) = @_;
14590     my $do_not_pad = 0;
14591
14592     #  Note on flag '$do_not_pad':
14593     #  We want to avoid a situation like this, where the aligner inserts
14594     #  whitespace before the '=' to align it with a previous '=', because
14595     #  otherwise the parens might become mis-aligned in a situation like
14596     #  this, where the '=' has become aligned with the previous line,
14597     #  pushing the opening '(' forward beyond where we want it.
14598     #
14599     #  $mkFloor::currentRoom = '';
14600     #  $mkFloor::c_entry     = $c->Entry(
14601     #                                 -width        => '10',
14602     #                                 -relief       => 'sunken',
14603     #                                 ...
14604     #                                 );
14605     #
14606     #  We leave it to the aligner to decide how to do this.
14607
14608     # first remove continuation indentation if appropriate
14609     my $max_line = @{$ri_first} - 1;
14610
14611     # looking at each line of this batch..
14612     my ( $ibeg, $iend );
14613     foreach my $line ( 0 .. $max_line ) {
14614         $ibeg = $ri_first->[$line];
14615         $iend = $ri_last->[$line];
14616
14617         # looking at each token in this output line..
14618         foreach my $i ( $ibeg .. $iend ) {
14619
14620             # How many space characters to place before this token
14621             # for special alignment.  Actual padding is done in the
14622             # continue block.
14623
14624             # looking for next unvisited indentation item
14625             my $indentation = $leading_spaces_to_go[$i];
14626             if ( !$indentation->get_marked() ) {
14627                 $indentation->set_marked(1);
14628
14629                 # looking for indentation item for which we are aligning
14630                 # with parens, braces, and brackets
14631                 next unless ( $indentation->get_align_paren() );
14632
14633                 # skip closed container on this line
14634                 if ( $i > $ibeg ) {
14635                     my $im = max( $ibeg, $iprev_to_go[$i] );
14636                     if (   $type_sequence_to_go[$im]
14637                         && $mate_index_to_go[$im] <= $iend )
14638                     {
14639                         next;
14640                     }
14641                 }
14642
14643                 if ( $line == 1 && $i == $ibeg ) {
14644                     $do_not_pad = 1;
14645                 }
14646
14647                 # Ok, let's see what the error is and try to fix it
14648                 my $actual_pos;
14649                 my $predicted_pos = $indentation->get_spaces();
14650                 if ( $i > $ibeg ) {
14651
14652                     # token is mid-line - use length to previous token
14653                     $actual_pos = total_line_length( $ibeg, $i - 1 );
14654
14655                     # for mid-line token, we must check to see if all
14656                     # additional lines have continuation indentation,
14657                     # and remove it if so.  Otherwise, we do not get
14658                     # good alignment.
14659                     my $closing_index = $indentation->get_closed();
14660                     if ( $closing_index > $iend ) {
14661                         my $ibeg_next = $ri_first->[ $line + 1 ];
14662                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
14663                             $self->undo_lp_ci( $line, $i, $closing_index,
14664                                 $ri_first, $ri_last );
14665                         }
14666                     }
14667                 }
14668                 elsif ( $line > 0 ) {
14669
14670                     # handle case where token starts a new line;
14671                     # use length of previous line
14672                     my $ibegm = $ri_first->[ $line - 1 ];
14673                     my $iendm = $ri_last->[ $line - 1 ];
14674                     $actual_pos = total_line_length( $ibegm, $iendm );
14675
14676                     # follow -pt style
14677                     ++$actual_pos
14678                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
14679                 }
14680                 else {
14681
14682                     # token is first character of first line of batch
14683                     $actual_pos = $predicted_pos;
14684                 }
14685
14686                 my $move_right = $actual_pos - $predicted_pos;
14687
14688                 # done if no error to correct (gnu2.t)
14689                 if ( $move_right == 0 ) {
14690                     $indentation->set_recoverable_spaces($move_right);
14691                     next;
14692                 }
14693
14694                 # if we have not seen closure for this indentation in
14695                 # this batch, we can only pass on a request to the
14696                 # vertical aligner
14697                 my $closing_index = $indentation->get_closed();
14698
14699                 if ( $closing_index < 0 ) {
14700                     $indentation->set_recoverable_spaces($move_right);
14701                     next;
14702                 }
14703
14704                 # If necessary, look ahead to see if there is really any
14705                 # leading whitespace dependent on this whitespace, and
14706                 # also find the longest line using this whitespace.
14707                 # Since it is always safe to move left if there are no
14708                 # dependents, we only need to do this if we may have
14709                 # dependent nodes or need to move right.
14710
14711                 my $right_margin = 0;
14712                 my $have_child   = $indentation->get_have_child();
14713
14714                 my %saw_indentation;
14715                 my $line_count = 1;
14716                 $saw_indentation{$indentation} = $indentation;
14717
14718                 if ( $have_child || $move_right > 0 ) {
14719                     $have_child = 0;
14720                     my $max_length = 0;
14721                     if ( $i == $ibeg ) {
14722                         $max_length = total_line_length( $ibeg, $iend );
14723                     }
14724
14725                     # look ahead at the rest of the lines of this batch..
14726                     foreach my $line_t ( $line + 1 .. $max_line ) {
14727                         my $ibeg_t = $ri_first->[$line_t];
14728                         my $iend_t = $ri_last->[$line_t];
14729                         last if ( $closing_index <= $ibeg_t );
14730
14731                         # remember all different indentation objects
14732                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
14733                         $saw_indentation{$indentation_t} = $indentation_t;
14734                         $line_count++;
14735
14736                         # remember longest line in the group
14737                         my $length_t = total_line_length( $ibeg_t, $iend_t );
14738                         if ( $length_t > $max_length ) {
14739                             $max_length = $length_t;
14740                         }
14741                     }
14742                     $right_margin =
14743                       $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
14744                       $max_length;
14745                     if ( $right_margin < 0 ) { $right_margin = 0 }
14746                 }
14747
14748                 my $first_line_comma_count =
14749                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
14750                 my $comma_count = $indentation->get_comma_count();
14751                 my $arrow_count = $indentation->get_arrow_count();
14752
14753                 # This is a simple approximate test for vertical alignment:
14754                 # if we broke just after an opening paren, brace, bracket,
14755                 # and there are 2 or more commas in the first line,
14756                 # and there are no '=>'s,
14757                 # then we are probably vertically aligned.  We could set
14758                 # an exact flag in sub scan_list, but this is good
14759                 # enough.
14760                 my $indentation_count = keys %saw_indentation;
14761                 my $is_vertically_aligned =
14762                   (      $i == $ibeg
14763                       && $first_line_comma_count > 1
14764                       && $indentation_count == 1
14765                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
14766
14767                 # Make the move if possible ..
14768                 if (
14769
14770                     # we can always move left
14771                     $move_right < 0
14772
14773                     # but we should only move right if we are sure it will
14774                     # not spoil vertical alignment
14775                     || ( $comma_count == 0 )
14776                     || ( $comma_count > 0 && !$is_vertically_aligned )
14777                   )
14778                 {
14779                     my $move =
14780                       ( $move_right <= $right_margin )
14781                       ? $move_right
14782                       : $right_margin;
14783
14784                     foreach ( keys %saw_indentation ) {
14785                         $saw_indentation{$_}
14786                           ->permanently_decrease_available_spaces( -$move );
14787                     }
14788                 }
14789
14790                 # Otherwise, record what we want and the vertical aligner
14791                 # will try to recover it.
14792                 else {
14793                     $indentation->set_recoverable_spaces($move_right);
14794                 }
14795             }
14796         }
14797     }
14798     return $do_not_pad;
14799 }
14800
14801 sub undo_lp_ci {
14802
14803     # If there is a single, long parameter within parens, like this:
14804     #
14805     #  $self->command( "/msg "
14806     #        . $infoline->chan
14807     #        . " You said $1, but did you know that it's square was "
14808     #        . $1 * $1 . " ?" );
14809     #
14810     # we can remove the continuation indentation of the 2nd and higher lines
14811     # to achieve this effect, which is more pleasing:
14812     #
14813     #  $self->command("/msg "
14814     #                 . $infoline->chan
14815     #                 . " You said $1, but did you know that it's square was "
14816     #                 . $1 * $1 . " ?");
14817
14818     my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
14819       @_;
14820     my $max_line = @{$ri_first} - 1;
14821
14822     # must be multiple lines
14823     return unless $max_line > $line_open;
14824
14825     my $lev_start     = $levels_to_go[$i_start];
14826     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
14827
14828     # see if all additional lines in this container have continuation
14829     # indentation
14830     my $n;
14831     my $line_1 = 1 + $line_open;
14832     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
14833         my $ibeg = $ri_first->[$n];
14834         my $iend = $ri_last->[$n];
14835         if ( $ibeg eq $closing_index ) { $n--; last }
14836         return if ( $lev_start != $levels_to_go[$ibeg] );
14837         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
14838         last   if ( $closing_index <= $iend );
14839     }
14840
14841     # we can reduce the indentation of all continuation lines
14842     my $continuation_line_count = $n - $line_open;
14843     @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
14844       (0) x ($continuation_line_count);
14845     @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
14846       @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
14847     return;
14848 }
14849
14850 ###############################################
14851 # CODE SECTION 10: Code to break long statments
14852 ###############################################
14853
14854 sub set_continuation_breaks {
14855
14856     # Called once per batch to set breaks in long lines.
14857
14858     # Define an array of indexes for inserting newline characters to
14859     # keep the line lengths below the maximum desired length.  There is
14860     # an implied break after the last token, so it need not be included.
14861
14862     # Method:
14863     # This routine is part of series of routines which adjust line
14864     # lengths.  It is only called if a statement is longer than the
14865     # maximum line length, or if a preliminary scanning located
14866     # desirable break points.   Sub scan_list has already looked at
14867     # these tokens and set breakpoints (in array
14868     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
14869     # after commas, after opening parens, and before closing parens).
14870     # This routine will honor these breakpoints and also add additional
14871     # breakpoints as necessary to keep the line length below the maximum
14872     # requested.  It bases its decision on where the 'bond strength' is
14873     # lowest.
14874
14875     # Output: returns references to the arrays:
14876     #  @i_first
14877     #  @i_last
14878     # which contain the indexes $i of the first and last tokens on each
14879     # line.
14880
14881     # In addition, the array:
14882     #   $forced_breakpoint_to_go[$i]
14883     # may be updated to be =1 for any index $i after which there must be
14884     # a break.  This signals later routines not to undo the breakpoint.
14885
14886     my ( $self, $saw_good_break, $rcolon_list ) = @_;
14887
14888     # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
14889     # order.
14890
14891     use constant DEBUG_BREAKPOINTS => 0;
14892
14893     my @i_first        = ();    # the first index to output
14894     my @i_last         = ();    # the last index to output
14895     my @i_colon_breaks = ();    # needed to decide if we have to break at ?'s
14896     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
14897
14898     $self->set_bond_strengths();
14899
14900     my $imin = 0;
14901     my $imax = $max_index_to_go;
14902     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14903     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14904     my $i_begin = $imin;        # index for starting next iteration
14905
14906     my $leading_spaces          = leading_spaces_to_go($imin);
14907     my $line_count              = 0;
14908     my $last_break_strength     = NO_BREAK;
14909     my $i_last_break            = -1;
14910     my $max_bias                = 0.001;
14911     my $tiny_bias               = 0.0001;
14912     my $leading_alignment_token = "";
14913     my $leading_alignment_type  = "";
14914
14915     # see if any ?/:'s are in order
14916     my $colons_in_order = 1;
14917     my $last_tok        = "";
14918     foreach ( @{$rcolon_list} ) {
14919         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
14920         $last_tok = $_;
14921     }
14922
14923     # This is a sufficient but not necessary condition for colon chain
14924     my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
14925
14926     my $Msg = "";
14927
14928     #-------------------------------------------------------
14929     # BEGINNING of main loop to set continuation breakpoints
14930     # Keep iterating until we reach the end
14931     #-------------------------------------------------------
14932     while ( $i_begin <= $imax ) {
14933         my $lowest_strength        = NO_BREAK;
14934         my $starting_sum           = $summed_lengths_to_go[$i_begin];
14935         my $i_lowest               = -1;
14936         my $i_test                 = -1;
14937         my $lowest_next_token      = '';
14938         my $lowest_next_type       = 'b';
14939         my $i_lowest_next_nonblank = -1;
14940         my $maximum_line_length =
14941           $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
14942
14943         #-------------------------------------------------------
14944         # BEGINNING of inner loop to find the best next breakpoint
14945         #-------------------------------------------------------
14946         my $strength = NO_BREAK;
14947         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
14948             my $type                     = $types_to_go[$i_test];
14949             my $token                    = $tokens_to_go[$i_test];
14950             my $next_type                = $types_to_go[ $i_test + 1 ];
14951             my $next_token               = $tokens_to_go[ $i_test + 1 ];
14952             my $i_next_nonblank          = $inext_to_go[$i_test];
14953             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
14954             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
14955             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
14956
14957             # adjustments to the previous bond strength may have been made, and
14958             # we must keep the bond strength of a token and its following blank
14959             # the same;
14960             my $last_strength = $strength;
14961             $strength = $bond_strength_to_go[$i_test];
14962             if ( $type eq 'b' ) { $strength = $last_strength }
14963
14964             # reduce strength a bit to break ties at an old comma breakpoint ...
14965             if (
14966
14967                 $old_breakpoint_to_go[$i_test]
14968
14969                 # Patch: limited to just commas to avoid blinking states
14970                 && $type eq ','
14971
14972                 # which is a 'good' breakpoint, meaning ...
14973                 # we don't want to break before it
14974                 && !$want_break_before{$type}
14975
14976                 # and either we want to break before the next token
14977                 # or the next token is not short (i.e. not a '*', '/' etc.)
14978                 && $i_next_nonblank <= $imax
14979                 && (   $want_break_before{$next_nonblank_type}
14980                     || $token_lengths_to_go[$i_next_nonblank] > 2
14981                     || $next_nonblank_type eq ','
14982                     || $is_opening_type{$next_nonblank_type} )
14983               )
14984             {
14985                 $strength -= $tiny_bias;
14986                 DEBUG_BREAKPOINTS && do { $Msg .= " :-bias at i=$i_test" };
14987             }
14988
14989             # otherwise increase strength a bit if this token would be at the
14990             # maximum line length.  This is necessary to avoid blinking
14991             # in the above example when the -iob flag is added.
14992             else {
14993                 my $len =
14994                   $leading_spaces +
14995                   $summed_lengths_to_go[ $i_test + 1 ] -
14996                   $starting_sum;
14997                 if ( $len >= $maximum_line_length ) {
14998                     $strength += $tiny_bias;
14999                     DEBUG_BREAKPOINTS && do { $Msg .= " :+bias at i=$i_test" };
15000                 }
15001             }
15002
15003             my $must_break = 0;
15004
15005             # Force an immediate break at certain operators
15006             # with lower level than the start of the line,
15007             # unless we've already seen a better break.
15008             #
15009             ##############################################
15010             # Note on an issue with a preceding ?
15011             ##############################################
15012             # We don't include a ? in the above list, but there may
15013             # be a break at a previous ? if the line is long.
15014             # Because of this we do not want to force a break if
15015             # there is a previous ? on this line.  For now the best way
15016             # to do this is to not break if we have seen a lower strength
15017             # point, which is probably a ?.
15018             #
15019             # Example of unwanted breaks we are avoiding at a '.' following a ?
15020             # from pod2html using perltidy -gnu:
15021             # )
15022             # ? "\n&lt;A NAME=\""
15023             # . $value
15024             # . "\"&gt;\n$text&lt;/A&gt;\n"
15025             # : "\n$type$pod2.html\#" . $value . "\"&gt;$text&lt;\/A&gt;\n";
15026             if (
15027                 ( $strength <= $lowest_strength )
15028                 && ( $nesting_depth_to_go[$i_begin] >
15029                     $nesting_depth_to_go[$i_next_nonblank] )
15030                 && (
15031                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
15032                     || (   $next_nonblank_type eq 'k'
15033                         && $next_nonblank_token =~ /^(and|or)$/ )
15034                 )
15035               )
15036             {
15037                 $self->set_forced_breakpoint($i_next_nonblank);
15038                 DEBUG_BREAKPOINTS
15039                   && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
15040             }
15041
15042             if (
15043
15044                 # Try to put a break where requested by scan_list
15045                 $forced_breakpoint_to_go[$i_test]
15046
15047                 # break between ) { in a continued line so that the '{' can
15048                 # be outdented
15049                 # See similar logic in scan_list which catches instances
15050                 # where a line is just something like ') {'.  We have to
15051                 # be careful because the corresponding block keyword might
15052                 # not be on the first line, such as 'for' here:
15053                 #
15054                 # eval {
15055                 #     for ("a") {
15056                 #         for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
15057                 #     }
15058                 # };
15059                 #
15060                 || (
15061                        $line_count
15062                     && ( $token eq ')' )
15063                     && ( $next_nonblank_type eq '{' )
15064                     && ($next_nonblank_block_type)
15065                     && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
15066
15067                     # RT #104427: Dont break before opening sub brace because
15068                     # sub block breaks handled at higher level, unless
15069                     # it looks like the preceding list is long and broken
15070                     && !(
15071                         $next_nonblank_block_type =~ /$ANYSUB_PATTERN/
15072                         && ( $nesting_depth_to_go[$i_begin] ==
15073                             $nesting_depth_to_go[$i_next_nonblank] )
15074                     )
15075
15076                     && !$rOpts->{'opening-brace-always-on-right'}
15077                 )
15078
15079                 # There is an implied forced break at a terminal opening brace
15080                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
15081               )
15082             {
15083
15084                 # Forced breakpoints must sometimes be overridden, for example
15085                 # because of a side comment causing a NO_BREAK.  It is easier
15086                 # to catch this here than when they are set.
15087                 if ( $strength < NO_BREAK - 1 ) {
15088                     $strength   = $lowest_strength - $tiny_bias;
15089                     $must_break = 1;
15090                     DEBUG_BREAKPOINTS
15091                       && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
15092                 }
15093             }
15094
15095             # quit if a break here would put a good terminal token on
15096             # the next line and we already have a possible break
15097             if (
15098                    !$must_break
15099                 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
15100                 && (
15101                     (
15102                         $leading_spaces +
15103                         $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
15104                         $starting_sum
15105                     ) > $maximum_line_length
15106                 )
15107               )
15108             {
15109                 if ( $i_lowest >= 0 ) {
15110                     DEBUG_BREAKPOINTS && do {
15111                         $Msg .= " :quit at good terminal='$next_nonblank_type'";
15112                     };
15113                     last;
15114                 }
15115             }
15116
15117             # Avoid a break which would strand a single punctuation
15118             # token.  For example, we do not want to strand a leading
15119             # '.' which is followed by a long quoted string.
15120             # But note that we do want to do this with -extrude (l=1)
15121             # so please test any changes to this code on -extrude.
15122             if (
15123                    !$must_break
15124                 && ( $i_test == $i_begin )
15125                 && ( $i_test < $imax )
15126                 && ( $token eq $type )
15127                 && (
15128                     (
15129                         $leading_spaces +
15130                         $summed_lengths_to_go[ $i_test + 1 ] -
15131                         $starting_sum
15132                     ) < $maximum_line_length
15133                 )
15134               )
15135             {
15136                 $i_test = min( $imax, $inext_to_go[$i_test] );
15137                 DEBUG_BREAKPOINTS && do {
15138                     $Msg .= " :redo at i=$i_test";
15139                 };
15140                 redo;
15141             }
15142
15143             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
15144             {
15145
15146                 # break at previous best break if it would have produced
15147                 # a leading alignment of certain common tokens, and it
15148                 # is different from the latest candidate break
15149                 if ($leading_alignment_type) {
15150                     DEBUG_BREAKPOINTS && do {
15151                         $Msg .=
15152 " :last at leading_alignment='$leading_alignment_type'";
15153                     };
15154                     last;
15155                 }
15156
15157                 # Force at least one breakpoint if old code had good
15158                 # break It is only called if a breakpoint is required or
15159                 # desired.  This will probably need some adjustments
15160                 # over time.  A goal is to try to be sure that, if a new
15161                 # side comment is introduced into formatted text, then
15162                 # the same breakpoints will occur.  scbreak.t
15163                 if (
15164                     $i_test == $imax            # we are at the end
15165                     && !get_forced_breakpoint_count()
15166                     && $saw_good_break          # old line had good break
15167                     && $type =~ /^[#;\{]$/      # and this line ends in
15168                                                 # ';' or side comment
15169                     && $i_last_break < 0        # and we haven't made a break
15170                     && $i_lowest >= 0           # and we saw a possible break
15171                     && $i_lowest < $imax - 1    # (but not just before this ;)
15172                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
15173                   )
15174                 {
15175
15176                     DEBUG_BREAKPOINTS && do {
15177                         $Msg .= " :last at good old break\n";
15178                     };
15179                     last;
15180                 }
15181
15182                 # Do not skip past an important break point in a short final
15183                 # segment.  For example, without this check we would miss the
15184                 # break at the final / in the following code:
15185                 #
15186                 #  $depth_stop =
15187                 #    ( $tau * $mass_pellet * $q_0 *
15188                 #        ( 1. - exp( -$t_stop / $tau ) ) -
15189                 #        4. * $pi * $factor * $k_ice *
15190                 #        ( $t_melt - $t_ice ) *
15191                 #        $r_pellet *
15192                 #        $t_stop ) /
15193                 #    ( $rho_ice * $Qs * $pi * $r_pellet**2 );
15194                 #
15195                 if (
15196                        $line_count > 2
15197                     && $i_lowest >= 0    # and we saw a possible break
15198                     && $i_lowest < $i_test
15199                     && $i_test > $imax - 2
15200                     && $nesting_depth_to_go[$i_begin] >
15201                     $nesting_depth_to_go[$i_lowest]
15202                     && $lowest_strength < $last_break_strength - .5 * WEAK
15203                   )
15204                 {
15205                     # Make this break for math operators for now
15206                     my $ir = $inext_to_go[$i_lowest];
15207                     my $il = $iprev_to_go[$ir];
15208                     if (   $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
15209                         || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
15210                     {
15211                         DEBUG_BREAKPOINTS && do {
15212                             $Msg .= " :last-noskip_short";
15213                         };
15214                         last;
15215                     }
15216                 }
15217
15218                 # Update the minimum bond strength location
15219                 $lowest_strength        = $strength;
15220                 $i_lowest               = $i_test;
15221                 $lowest_next_token      = $next_nonblank_token;
15222                 $lowest_next_type       = $next_nonblank_type;
15223                 $i_lowest_next_nonblank = $i_next_nonblank;
15224                 if ($must_break) {
15225                     DEBUG_BREAKPOINTS && do {
15226                         $Msg .= " :last-must_break";
15227                     };
15228                     last;
15229                 }
15230
15231                 # set flags to remember if a break here will produce a
15232                 # leading alignment of certain common tokens
15233                 if (   $line_count > 0
15234                     && $i_test < $imax
15235                     && ( $lowest_strength - $last_break_strength <= $max_bias )
15236                   )
15237                 {
15238                     my $i_last_end = $iprev_to_go[$i_begin];
15239                     my $tok_beg    = $tokens_to_go[$i_begin];
15240                     my $type_beg   = $types_to_go[$i_begin];
15241                     if (
15242
15243                         # check for leading alignment of certain tokens
15244                         (
15245                                $tok_beg eq $next_nonblank_token
15246                             && $is_chain_operator{$tok_beg}
15247                             && (   $type_beg eq 'k'
15248                                 || $type_beg eq $tok_beg )
15249                             && $nesting_depth_to_go[$i_begin] >=
15250                             $nesting_depth_to_go[$i_next_nonblank]
15251                         )
15252
15253                         || (   $tokens_to_go[$i_last_end] eq $token
15254                             && $is_chain_operator{$token}
15255                             && ( $type eq 'k' || $type eq $token )
15256                             && $nesting_depth_to_go[$i_last_end] >=
15257                             $nesting_depth_to_go[$i_test] )
15258                       )
15259                     {
15260                         $leading_alignment_token = $next_nonblank_token;
15261                         $leading_alignment_type  = $next_nonblank_type;
15262                     }
15263                 }
15264             }
15265
15266             my $too_long = ( $i_test >= $imax );
15267             if ( !$too_long ) {
15268                 my $next_length =
15269                   $leading_spaces +
15270                   $summed_lengths_to_go[ $i_test + 2 ] -
15271                   $starting_sum;
15272                 $too_long = $next_length > $maximum_line_length;
15273
15274                 # To prevent blinkers we will avoid leaving a token exactly at
15275                 # the line length limit unless it is the last token or one of
15276                 # several "good" types.
15277                 #
15278                 # The following code was a blinker with -pbp before this
15279                 # modification:
15280 ##                    $last_nonblank_token eq '('
15281 ##                        && $is_indirect_object_taker{ $paren_type
15282 ##                            [$paren_depth] }
15283                 # The issue causing the problem is that if the
15284                 # term [$paren_depth] gets broken across a line then
15285                 # the whitespace routine doesn't see both opening and closing
15286                 # brackets and will format like '[ $paren_depth ]'.  This
15287                 # leads to an oscillation in length depending if we break
15288                 # before the closing bracket or not.
15289                 if (  !$too_long
15290                     && $i_test + 1 < $imax
15291                     && $next_nonblank_type ne ','
15292                     && !$is_closing_type{$next_nonblank_type} )
15293                 {
15294                     $too_long = $next_length >= $maximum_line_length;
15295                     DEBUG_BREAKPOINTS && do {
15296                         $Msg .= " :too_long=$too_long" if ($too_long);
15297                     }
15298                 }
15299             }
15300
15301             DEBUG_BREAKPOINTS && do {
15302                 my $ltok     = $token;
15303                 my $rtok     = $next_nonblank_token ? $next_nonblank_token : "";
15304                 my $i_testp2 = $i_test + 2;
15305                 if ( $i_testp2 > $max_index_to_go + 1 ) {
15306                     $i_testp2 = $max_index_to_go + 1;
15307                 }
15308                 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
15309                 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
15310                 print STDOUT
15311 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength    $ltok $rtok\n";
15312             };
15313
15314             # allow one extra terminal token after exceeding line length
15315             # if it would strand this token.
15316             if (   $rOpts_fuzzy_line_length
15317                 && $too_long
15318                 && $i_lowest == $i_test
15319                 && $token_lengths_to_go[$i_test] > 1
15320                 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
15321               )
15322             {
15323                 $too_long = 0;
15324                 DEBUG_BREAKPOINTS && do {
15325                     $Msg .= " :do_not_strand next='$next_nonblank_type'";
15326                 };
15327             }
15328
15329             # we are done if...
15330             if (
15331
15332                 # ... no more space and we have a break
15333                 $too_long && $i_lowest >= 0
15334
15335                 # ... or no more tokens
15336                 || $i_test == $imax
15337               )
15338             {
15339                 DEBUG_BREAKPOINTS && do {
15340                     $Msg .=
15341 " :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
15342                 };
15343                 last;
15344             }
15345         }
15346
15347         #-------------------------------------------------------
15348         # END of inner loop to find the best next breakpoint
15349         # Now decide exactly where to put the breakpoint
15350         #-------------------------------------------------------
15351
15352         # it's always ok to break at imax if no other break was found
15353         if ( $i_lowest < 0 ) { $i_lowest = $imax }
15354
15355         # semi-final index calculation
15356         my $i_next_nonblank     = $inext_to_go[$i_lowest];
15357         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
15358         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15359
15360         #-------------------------------------------------------
15361         # ?/: rule 1 : if a break here will separate a '?' on this
15362         # line from its closing ':', then break at the '?' instead.
15363         #-------------------------------------------------------
15364         foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
15365             next unless ( $tokens_to_go[$i] eq '?' );
15366
15367             # do not break if probable sequence of ?/: statements
15368             next if ($is_colon_chain);
15369
15370             # do not break if statement is broken by side comment
15371             next
15372               if ( $tokens_to_go[$max_index_to_go] eq '#'
15373                 && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
15374
15375             # no break needed if matching : is also on the line
15376             next
15377               if ( $mate_index_to_go[$i] >= 0
15378                 && $mate_index_to_go[$i] <= $i_next_nonblank );
15379
15380             $i_lowest = $i;
15381             if ( $want_break_before{'?'} ) { $i_lowest-- }
15382             last;
15383         }
15384
15385         #-------------------------------------------------------
15386         # END of inner loop to find the best next breakpoint:
15387         # Break the line after the token with index i=$i_lowest
15388         #-------------------------------------------------------
15389
15390         # final index calculation
15391         $i_next_nonblank     = $inext_to_go[$i_lowest];
15392         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
15393         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15394
15395         DEBUG_BREAKPOINTS
15396           && print STDOUT
15397 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
15398         $Msg = "";
15399
15400         #-------------------------------------------------------
15401         # ?/: rule 2 : if we break at a '?', then break at its ':'
15402         #
15403         # Note: this rule is also in sub scan_list to handle a break
15404         # at the start and end of a line (in case breaks are dictated
15405         # by side comments).
15406         #-------------------------------------------------------
15407         if ( $next_nonblank_type eq '?' ) {
15408             $self->set_closing_breakpoint($i_next_nonblank);
15409         }
15410         elsif ( $types_to_go[$i_lowest] eq '?' ) {
15411             $self->set_closing_breakpoint($i_lowest);
15412         }
15413
15414         #-------------------------------------------------------
15415         # ?/: rule 3 : if we break at a ':' then we save
15416         # its location for further work below.  We may need to go
15417         # back and break at its '?'.
15418         #-------------------------------------------------------
15419         if ( $next_nonblank_type eq ':' ) {
15420             push @i_colon_breaks, $i_next_nonblank;
15421         }
15422         elsif ( $types_to_go[$i_lowest] eq ':' ) {
15423             push @i_colon_breaks, $i_lowest;
15424         }
15425
15426         # here we should set breaks for all '?'/':' pairs which are
15427         # separated by this line
15428
15429         $line_count++;
15430
15431         # save this line segment, after trimming blanks at the ends
15432         push( @i_first,
15433             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
15434         push( @i_last,
15435             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
15436
15437         # set a forced breakpoint at a container opening, if necessary, to
15438         # signal a break at a closing container.  Excepting '(' for now.
15439         if (
15440             (
15441                    $tokens_to_go[$i_lowest] eq '{'
15442                 || $tokens_to_go[$i_lowest] eq '['
15443             )
15444             && !$forced_breakpoint_to_go[$i_lowest]
15445           )
15446         {
15447             $self->set_closing_breakpoint($i_lowest);
15448         }
15449
15450         # get ready to go again
15451         $i_begin                 = $i_lowest + 1;
15452         $last_break_strength     = $lowest_strength;
15453         $i_last_break            = $i_lowest;
15454         $leading_alignment_token = "";
15455         $leading_alignment_type  = "";
15456         $lowest_next_token       = '';
15457         $lowest_next_type        = 'b';
15458
15459         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
15460             $i_begin++;
15461         }
15462
15463         # update indentation size
15464         if ( $i_begin <= $imax ) {
15465             $leading_spaces = leading_spaces_to_go($i_begin);
15466             DEBUG_BREAKPOINTS
15467               && print STDOUT
15468               "updating leading spaces to be $leading_spaces at i=$i_begin\n";
15469         }
15470     }
15471
15472     #-------------------------------------------------------
15473     # END of main loop to set continuation breakpoints
15474     # Now go back and make any necessary corrections
15475     #-------------------------------------------------------
15476
15477     #-------------------------------------------------------
15478     # ?/: rule 4 -- if we broke at a ':', then break at
15479     # corresponding '?' unless this is a chain of ?: expressions
15480     #-------------------------------------------------------
15481     if (@i_colon_breaks) {
15482
15483         # using a simple method for deciding if we are in a ?/: chain --
15484         # this is a chain if it has multiple ?/: pairs all in order;
15485         # otherwise not.
15486         # Note that if line starts in a ':' we count that above as a break
15487         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
15488
15489         unless ($is_chain) {
15490             my @insert_list = ();
15491             foreach (@i_colon_breaks) {
15492                 my $i_question = $mate_index_to_go[$_];
15493                 if ( $i_question >= 0 ) {
15494                     if ( $want_break_before{'?'} ) {
15495                         $i_question = $iprev_to_go[$i_question];
15496                     }
15497
15498                     if ( $i_question >= 0 ) {
15499                         push @insert_list, $i_question;
15500                     }
15501                 }
15502                 $self->insert_additional_breaks( \@insert_list, \@i_first,
15503                     \@i_last );
15504             }
15505         }
15506     }
15507     return ( \@i_first, \@i_last );
15508 }
15509
15510 ###########################################
15511 # CODE SECTION 11: Code to break long lists
15512 ###########################################
15513
15514 {    ## begin closure scan_list
15515
15516     # These routines and variables are involved in finding good
15517     # places to break long lists.
15518
15519     my (
15520         $block_type,               $current_depth,
15521         $depth,                    $i,
15522         $i_last_nonblank_token,    $last_colon_sequence_number,
15523         $last_nonblank_token,      $last_nonblank_type,
15524         $last_nonblank_block_type, $last_old_breakpoint_count,
15525         $minimum_depth,            $next_nonblank_block_type,
15526         $next_nonblank_token,      $next_nonblank_type,
15527         $old_breakpoint_count,     $starting_breakpoint_count,
15528         $starting_depth,           $token,
15529         $type,                     $type_sequence,
15530     );
15531
15532     my (
15533         @breakpoint_stack,              @breakpoint_undo_stack,
15534         @comma_index,                   @container_type,
15535         @identifier_count_stack,        @index_before_arrow,
15536         @interrupted_list,              @item_count_stack,
15537         @last_comma_index,              @last_dot_index,
15538         @last_nonblank_type,            @old_breakpoint_count_stack,
15539         @opening_structure_index_stack, @rfor_semicolon_list,
15540         @has_old_logical_breakpoints,   @rand_or_list,
15541         @i_equals,                      @override_cab3,
15542         @type_sequence_stack,
15543     );
15544
15545     # these arrays must retain values between calls
15546     my ( @has_broken_sublist, @dont_align, @want_comma_break );
15547
15548     my $length_tol;
15549     my $length_tol_boost;
15550
15551     sub initialize_scan_list {
15552         @dont_align         = ();
15553         @has_broken_sublist = ();
15554         @want_comma_break   = ();
15555
15556         ####################################################
15557         # Set tolerances to prevent formatting instabilities
15558         ####################################################
15559
15560         # Define tolerances to use when checking if closed
15561         # containers will fit on one line.  This is necessary to avoid
15562         # formatting instability. The basic tolerance is based on the
15563         # following:
15564
15565         # - Always allow for at least one extra space after a closing token so
15566         # that we do not strand a comma or semicolon. (oneline.t).
15567
15568         # - Use an increased line length tolerance when -ci > -i to avoid
15569         # blinking states (case b923 and others).
15570         $length_tol =
15571           1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
15572
15573         # In addition, it may be necessary to use a few extra tolerance spaces
15574         # when -lp is used and/or when -xci is used.  The history of this
15575         # so far is as follows:
15576
15577         # FIX1: At least 3 characters were been found to be required for -lp
15578         # to fixes cases b1059 b1063 b1117.
15579
15580         # FIX2: Further testing showed that we need a total of 3 extra spaces
15581         # when -lp is set for non-lists, and at least 2 spaces when -lp and
15582         # -xci are set.
15583         # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
15584         # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
15585         # b1165
15586
15587         # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
15588         # 'find_token_starting_list' to go back before an initial blank space.
15589         # This fixed these three cases, and allowed the tolerances to be
15590         # reduced to continue to fix all other known cases of instability.
15591         # This gives the current tolerance formulation (note that
15592         # variable $length_tol_boost is always 0 now):
15593
15594         $length_tol_boost = 0;
15595         if ($rOpts_line_up_parentheses) {
15596
15597             if ( $rOpts->{'extended-continuation-indentation'} ) {
15598                 $length_tol += 2;
15599                 $length_tol_boost = 0;    # was 1 for FIX2, 0 for FIX3
15600             }
15601             else {
15602                 $length_tol_boost = 0;    # was 3 for FIX2, 0 for FIX3
15603             }
15604         }
15605
15606         # The -xci option alone also needs a slightly larger tol for non-lists
15607         elsif ( $rOpts->{'extended-continuation-indentation'} ) {
15608             $length_tol_boost = 0;        # was 1 for FIX2, 0 for FIX3
15609         }
15610         return;
15611     }
15612
15613     # routine to define essential variables when we go 'up' to
15614     # a new depth
15615     sub check_for_new_minimum_depth {
15616         my $depth = shift;
15617         if ( $depth < $minimum_depth ) {
15618
15619             $minimum_depth = $depth;
15620
15621             # these arrays need not retain values between calls
15622             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
15623             $container_type[$depth]                = "";
15624             $identifier_count_stack[$depth]        = 0;
15625             $index_before_arrow[$depth]            = -1;
15626             $interrupted_list[$depth]              = 1;
15627             $item_count_stack[$depth]              = 0;
15628             $last_nonblank_type[$depth]            = "";
15629             $opening_structure_index_stack[$depth] = -1;
15630
15631             $breakpoint_undo_stack[$depth]       = undef;
15632             $comma_index[$depth]                 = undef;
15633             $last_comma_index[$depth]            = undef;
15634             $last_dot_index[$depth]              = undef;
15635             $old_breakpoint_count_stack[$depth]  = undef;
15636             $has_old_logical_breakpoints[$depth] = 0;
15637             $rand_or_list[$depth]                = [];
15638             $rfor_semicolon_list[$depth]         = [];
15639             $i_equals[$depth]                    = -1;
15640
15641             # these arrays must retain values between calls
15642             if ( !defined( $has_broken_sublist[$depth] ) ) {
15643                 $dont_align[$depth]         = 0;
15644                 $has_broken_sublist[$depth] = 0;
15645                 $want_comma_break[$depth]   = 0;
15646             }
15647         }
15648         return;
15649     }
15650
15651     # routine to decide which commas to break at within a container;
15652     # returns:
15653     #   $bp_count = number of comma breakpoints set
15654     #   $do_not_break_apart = a flag indicating if container need not
15655     #     be broken open
15656     sub set_comma_breakpoints {
15657
15658         my ( $self, $dd ) = @_;
15659         my $bp_count           = 0;
15660         my $do_not_break_apart = 0;
15661
15662         # anything to do?
15663         if ( $item_count_stack[$dd] ) {
15664
15665             # handle commas not in containers...
15666             if ( $dont_align[$dd] ) {
15667                 $self->do_uncontained_comma_breaks($dd);
15668             }
15669
15670             # handle commas within containers...
15671             else {
15672                 my $fbc = get_forced_breakpoint_count();
15673
15674                 # always open comma lists not preceded by keywords,
15675                 # barewords, identifiers (that is, anything that doesn't
15676                 # look like a function call)
15677                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
15678
15679                 $self->set_comma_breakpoints_do(
15680                     {
15681                         depth            => $dd,
15682                         i_opening_paren  => $opening_structure_index_stack[$dd],
15683                         i_closing_paren  => $i,
15684                         item_count       => $item_count_stack[$dd],
15685                         identifier_count => $identifier_count_stack[$dd],
15686                         rcomma_index     => $comma_index[$dd],
15687                         next_nonblank_type  => $next_nonblank_type,
15688                         list_type           => $container_type[$dd],
15689                         interrupted         => $interrupted_list[$dd],
15690                         rdo_not_break_apart => \$do_not_break_apart,
15691                         must_break_open     => $must_break_open,
15692                         has_broken_sublist  => $has_broken_sublist[$dd],
15693                     }
15694                 );
15695                 $bp_count           = get_forced_breakpoint_count() - $fbc;
15696                 $do_not_break_apart = 0 if $must_break_open;
15697             }
15698         }
15699         return ( $bp_count, $do_not_break_apart );
15700     }
15701
15702     # These types are excluded at breakpoints to prevent blinking
15703     my %is_uncontained_comma_break_excluded_type;
15704
15705     BEGIN {
15706         my @q = qw< L { ( [ ? : + - >;
15707         @is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
15708     }
15709
15710     sub do_uncontained_comma_breaks {
15711
15712         # Handle commas not in containers...
15713         # This is a catch-all routine for commas that we
15714         # don't know what to do with because the don't fall
15715         # within containers.  We will bias the bond strength
15716         # to break at commas which ended lines in the input
15717         # file.  This usually works better than just trying
15718         # to put as many items on a line as possible.  A
15719         # downside is that if the input file is garbage it
15720         # won't work very well. However, the user can always
15721         # prevent following the old breakpoints with the
15722         # -iob flag.
15723         my ( $self, $dd ) = @_;
15724         my $bias                  = -.01;
15725         my $old_comma_break_count = 0;
15726         foreach my $ii ( @{ $comma_index[$dd] } ) {
15727             if ( $old_breakpoint_to_go[$ii] ) {
15728                 $old_comma_break_count++;
15729                 $bond_strength_to_go[$ii] = $bias;
15730
15731                 # reduce bias magnitude to force breaks in order
15732                 $bias *= 0.99;
15733             }
15734         }
15735
15736         # Also put a break before the first comma if
15737         # (1) there was a break there in the input, and
15738         # (2) there was exactly one old break before the first comma break
15739         # (3) OLD: there are multiple old comma breaks
15740         # (3) NEW: there are one or more old comma breaks (see return example)
15741         # (4) the first comma is at the starting level ...
15742         #     ... fixes cases b064 b065 b068 b210 b747
15743         #
15744         # For example, we will follow the user and break after
15745         # 'print' in this snippet:
15746         #    print
15747         #      "conformability (Not the same dimension)\n",
15748         #      "\t", $have, " is ", text_unit($hu), "\n",
15749         #      "\t", $want, " is ", text_unit($wu), "\n",
15750         #      ;
15751         #
15752         # Another example, just one comma, where we will break after
15753         # the return:
15754         #  return
15755         #    $x * cos($a) - $y * sin($a),
15756         #    $x * sin($a) + $y * cos($a);
15757
15758         # Breaking a print statement:
15759         # print SAVEOUT
15760         #   ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
15761         #   ( $? & 128 ) ? " -- core dumped" : "", "\n";
15762         #
15763         #  But we will not force a break after the opening paren here
15764         #  (causes a blinker):
15765         #        $heap->{stream}->set_output_filter(
15766         #            poe::filter::reference->new('myotherfreezer') ),
15767         #          ;
15768         #
15769         my $i_first_comma = $comma_index[$dd]->[0];
15770         my $level_comma   = $levels_to_go[$i_first_comma];
15771         if (   $old_breakpoint_to_go[$i_first_comma]
15772             && $level_comma == $levels_to_go[0] )
15773         {
15774             my $ibreak    = -1;
15775             my $obp_count = 0;
15776             for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
15777                 if ( $old_breakpoint_to_go[$ii] ) {
15778                     $obp_count++;
15779                     last if ( $obp_count > 1 );
15780                     $ibreak = $ii
15781                       if ( $levels_to_go[$ii] == $level_comma );
15782                 }
15783             }
15784
15785             # Changed rule from multiple old commas to just one here:
15786             if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
15787             {
15788                 my $ibreakm = $ibreak;
15789                 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
15790                 if ( $ibreakm >= 0 ) {
15791
15792                     # In order to avoid blinkers we have to be fairly
15793                     # restrictive:
15794
15795                     # Rule 1: Do not to break before an opening token
15796                     # Rule 2: avoid breaking at ternary operators
15797                     # (see b931, which is similar to the above print example)
15798                     # Rule 3: Do not break at chain operators to fix case b1119
15799                     #  - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
15800
15801                     # Be sure to test any changes to these rules against runs
15802                     # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
15803                     # series.
15804
15805                     my $typem = $types_to_go[$ibreakm];
15806                     if ( !$is_uncontained_comma_break_excluded_type{$typem} ) {
15807                         $self->set_forced_breakpoint($ibreak);
15808                     }
15809                 }
15810             }
15811         }
15812         return;
15813     }
15814
15815     my %is_logical_container;
15816     my %quick_filter;
15817
15818     BEGIN {
15819         my @q = qw# if elsif unless while and or err not && | || ? : ! #;
15820         @is_logical_container{@q} = (1) x scalar(@q);
15821
15822         # This filter will allow most tokens to skip past a section of code
15823         %quick_filter = %is_assignment;
15824         @q            = qw# => . ; < > ~ #;
15825         push @q, ',';
15826         @quick_filter{@q} = (1) x scalar(@q);
15827     }
15828
15829     sub set_for_semicolon_breakpoints {
15830         my ( $self, $dd ) = @_;
15831         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
15832             $self->set_forced_breakpoint($_);
15833         }
15834         return;
15835     }
15836
15837     sub set_logical_breakpoints {
15838         my ( $self, $dd ) = @_;
15839         if (
15840                $item_count_stack[$dd] == 0
15841             && $is_logical_container{ $container_type[$dd] }
15842
15843             || $has_old_logical_breakpoints[$dd]
15844           )
15845         {
15846
15847             # Look for breaks in this order:
15848             # 0   1    2   3
15849             # or  and  ||  &&
15850             foreach my $i ( 0 .. 3 ) {
15851                 if ( $rand_or_list[$dd][$i] ) {
15852                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
15853                         $self->set_forced_breakpoint($_);
15854                     }
15855
15856                     # break at any 'if' and 'unless' too
15857                     foreach ( @{ $rand_or_list[$dd][4] } ) {
15858                         $self->set_forced_breakpoint($_);
15859                     }
15860                     $rand_or_list[$dd] = [];
15861                     last;
15862                 }
15863             }
15864         }
15865         return;
15866     }
15867
15868     sub is_unbreakable_container {
15869
15870         # never break a container of one of these types
15871         # because bad things can happen (map1.t)
15872         my $dd = shift;
15873         return $is_sort_map_grep{ $container_type[$dd] };
15874     }
15875
15876     sub scan_list {
15877
15878         my ( $self, $is_long_line ) = @_;
15879
15880         # This routine is responsible for setting line breaks for all lists,
15881         # so that hierarchical structure can be displayed and so that list
15882         # items can be vertically aligned.  The output of this routine is
15883         # stored in the array @forced_breakpoint_to_go, which is used to set
15884         # final breakpoints.
15885
15886         # It is called once per batch if the batch is a list.
15887         my $rLL                  = $self->[_rLL_];
15888         my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
15889         my $ris_broken_container = $self->[_ris_broken_container_];
15890         my $rbreak_before_container_by_seqno =
15891           $self->[_rbreak_before_container_by_seqno_];
15892
15893         $starting_depth = $nesting_depth_to_go[0];
15894
15895         $block_type                 = ' ';
15896         $current_depth              = $starting_depth;
15897         $i                          = -1;
15898         $last_colon_sequence_number = -1;
15899         $last_nonblank_token        = ';';
15900         $last_nonblank_type         = ';';
15901         $last_nonblank_block_type   = ' ';
15902         $last_old_breakpoint_count  = 0;
15903         $minimum_depth = $current_depth + 1;    # forces update in check below
15904         $old_breakpoint_count      = 0;
15905         $starting_breakpoint_count = get_forced_breakpoint_count();
15906         $token                     = ';';
15907         $type                      = ';';
15908         $type_sequence             = '';
15909
15910         my $total_depth_variation = 0;
15911         my $i_old_assignment_break;
15912         my $depth_last = $starting_depth;
15913
15914         check_for_new_minimum_depth($current_depth);
15915
15916         my $want_previous_breakpoint = -1;
15917
15918         my $saw_good_breakpoint;
15919         my $i_line_end   = -1;
15920         my $i_line_start = -1;
15921
15922         # loop over all tokens in this batch
15923         while ( ++$i <= $max_index_to_go ) {
15924             if ( $type ne 'b' ) {
15925                 $i_last_nonblank_token    = $i - 1;
15926                 $last_nonblank_type       = $type;
15927                 $last_nonblank_token      = $token;
15928                 $last_nonblank_block_type = $block_type;
15929             } ## end if ( $type ne 'b' )
15930             $type          = $types_to_go[$i];
15931             $block_type    = $block_type_to_go[$i];
15932             $token         = $tokens_to_go[$i];
15933             $type_sequence = $type_sequence_to_go[$i];
15934             my $next_type       = $types_to_go[ $i + 1 ];
15935             my $next_token      = $tokens_to_go[ $i + 1 ];
15936             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15937             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
15938             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
15939             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15940
15941             # set break if flag was set
15942             if ( $want_previous_breakpoint >= 0 ) {
15943                 $self->set_forced_breakpoint($want_previous_breakpoint);
15944                 $want_previous_breakpoint = -1;
15945             }
15946
15947             $last_old_breakpoint_count = $old_breakpoint_count;
15948
15949             # Fixed for case b1097 to not consider old breaks at highly
15950             # stressed locations, such as types 'L' and 'R'.  It might be
15951             # useful to generalize this concept in the future by looking at
15952             # actual bond strengths.
15953             if (   $old_breakpoint_to_go[$i]
15954                 && $type ne 'L'
15955                 && $next_nonblank_type ne 'R' )
15956             {
15957                 $i_line_end   = $i;
15958                 $i_line_start = $i_next_nonblank;
15959
15960                 $old_breakpoint_count++;
15961
15962                 # Break before certain keywords if user broke there and
15963                 # this is a 'safe' break point. The idea is to retain
15964                 # any preferred breaks for sequential list operations,
15965                 # like a schwartzian transform.
15966                 if ($rOpts_break_at_old_keyword_breakpoints) {
15967                     if (
15968                            $next_nonblank_type eq 'k'
15969                         && $is_keyword_returning_list{$next_nonblank_token}
15970                         && (   $type =~ /^[=\)\]\}Riw]$/
15971                             || $type eq 'k'
15972                             && $is_keyword_returning_list{$token} )
15973                       )
15974                     {
15975
15976                         # we actually have to set this break next time through
15977                         # the loop because if we are at a closing token (such
15978                         # as '}') which forms a one-line block, this break might
15979                         # get undone.
15980
15981                         # And do not do this at an equals if the user wants
15982                         # breaks before an equals (blinker cases b434 b903)
15983                         unless ( $type eq '=' && $want_break_before{$type} ) {
15984                             $want_previous_breakpoint = $i;
15985                         }
15986                     } ## end if ( $next_nonblank_type...)
15987                 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
15988
15989                 # Break before attributes if user broke there
15990                 if ($rOpts_break_at_old_attribute_breakpoints) {
15991                     if ( $next_nonblank_type eq 'A' ) {
15992                         $want_previous_breakpoint = $i;
15993                     }
15994                 }
15995
15996                 # remember an = break as possible good break point
15997                 if ( $is_assignment{$type} ) {
15998                     $i_old_assignment_break = $i;
15999                 }
16000                 elsif ( $is_assignment{$next_nonblank_type} ) {
16001                     $i_old_assignment_break = $i_next_nonblank;
16002                 }
16003             } ## end if ( $old_breakpoint_to_go...)
16004
16005             next if ( $type eq 'b' );
16006             $depth = $nesting_depth_to_go[ $i + 1 ];
16007
16008             $total_depth_variation += abs( $depth - $depth_last );
16009             $depth_last = $depth;
16010
16011             # safety check - be sure we always break after a comment
16012             # Shouldn't happen .. an error here probably means that the
16013             # nobreak flag did not get turned off correctly during
16014             # formatting.
16015             if ( $type eq '#' ) {
16016                 if ( $i != $max_index_to_go ) {
16017                     warning(
16018 "Non-fatal program bug: backup logic required to break after a comment\n"
16019                     );
16020                     report_definite_bug();
16021                     $nobreak_to_go[$i] = 0;
16022                     $self->set_forced_breakpoint($i);
16023                 } ## end if ( $i != $max_index_to_go)
16024             } ## end if ( $type eq '#' )
16025
16026             # Force breakpoints at certain tokens in long lines.
16027             # Note that such breakpoints will be undone later if these tokens
16028             # are fully contained within parens on a line.
16029             if (
16030
16031                 # break before a keyword within a line
16032                 $type eq 'k'
16033                 && $i > 0
16034
16035                 # if one of these keywords:
16036                 #   /^(if|unless|while|until|for)$/
16037                 && $is_if_unless_while_until_for{$token}
16038
16039                 # but do not break at something like '1 while'
16040                 && ( $last_nonblank_type ne 'n' || $i > 2 )
16041
16042                 # and let keywords follow a closing 'do' brace
16043                 && $last_nonblank_block_type ne 'do'
16044
16045                 && (
16046                     $is_long_line
16047
16048                     # or container is broken (by side-comment, etc)
16049                     || (   $next_nonblank_token eq '('
16050                         && $mate_index_to_go[$i_next_nonblank] < $i )
16051                 )
16052               )
16053             {
16054                 $self->set_forced_breakpoint( $i - 1 );
16055             } ## end if ( $type eq 'k' && $i...)
16056
16057             # remember locations of '||'  and '&&' for possible breaks if we
16058             # decide this is a long logical expression.
16059             if ( $type eq '||' ) {
16060                 push @{ $rand_or_list[$depth][2] }, $i;
16061                 ++$has_old_logical_breakpoints[$depth]
16062                   if ( ( $i == $i_line_start || $i == $i_line_end )
16063                     && $rOpts_break_at_old_logical_breakpoints );
16064             } ## end elsif ( $type eq '||' )
16065             elsif ( $type eq '&&' ) {
16066                 push @{ $rand_or_list[$depth][3] }, $i;
16067                 ++$has_old_logical_breakpoints[$depth]
16068                   if ( ( $i == $i_line_start || $i == $i_line_end )
16069                     && $rOpts_break_at_old_logical_breakpoints );
16070             } ## end elsif ( $type eq '&&' )
16071             elsif ( $type eq 'f' ) {
16072                 push @{ $rfor_semicolon_list[$depth] }, $i;
16073             }
16074             elsif ( $type eq 'k' ) {
16075                 if ( $token eq 'and' ) {
16076                     push @{ $rand_or_list[$depth][1] }, $i;
16077                     ++$has_old_logical_breakpoints[$depth]
16078                       if ( ( $i == $i_line_start || $i == $i_line_end )
16079                         && $rOpts_break_at_old_logical_breakpoints );
16080                 } ## end if ( $token eq 'and' )
16081
16082                 # break immediately at 'or's which are probably not in a logical
16083                 # block -- but we will break in logical breaks below so that
16084                 # they do not add to the forced_breakpoint_count
16085                 elsif ( $token eq 'or' ) {
16086                     push @{ $rand_or_list[$depth][0] }, $i;
16087                     ++$has_old_logical_breakpoints[$depth]
16088                       if ( ( $i == $i_line_start || $i == $i_line_end )
16089                         && $rOpts_break_at_old_logical_breakpoints );
16090                     if ( $is_logical_container{ $container_type[$depth] } ) {
16091                     }
16092                     else {
16093                         if ($is_long_line) { $self->set_forced_breakpoint($i) }
16094                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
16095                             && $rOpts_break_at_old_logical_breakpoints )
16096                         {
16097                             $saw_good_breakpoint = 1;
16098                         }
16099                     } ## end else [ if ( $is_logical_container...)]
16100                 } ## end elsif ( $token eq 'or' )
16101                 elsif ( $token eq 'if' || $token eq 'unless' ) {
16102                     push @{ $rand_or_list[$depth][4] }, $i;
16103                     if ( ( $i == $i_line_start || $i == $i_line_end )
16104                         && $rOpts_break_at_old_logical_breakpoints )
16105                     {
16106                         $self->set_forced_breakpoint($i);
16107                     }
16108                 } ## end elsif ( $token eq 'if' ||...)
16109             } ## end elsif ( $type eq 'k' )
16110             elsif ( $is_assignment{$type} ) {
16111                 $i_equals[$depth] = $i;
16112             }
16113
16114             if ($type_sequence) {
16115
16116                 # handle any postponed closing breakpoints
16117                 if ( $is_closing_sequence_token{$token} ) {
16118                     if ( $type eq ':' ) {
16119                         $last_colon_sequence_number = $type_sequence;
16120
16121                         # retain break at a ':' line break
16122                         if ( ( $i == $i_line_start || $i == $i_line_end )
16123                             && $rOpts_break_at_old_ternary_breakpoints )
16124                         {
16125
16126                             $self->set_forced_breakpoint($i);
16127
16128                             # break at previous '='
16129                             if ( $i_equals[$depth] > 0 ) {
16130                                 $self->set_forced_breakpoint(
16131                                     $i_equals[$depth] );
16132                                 $i_equals[$depth] = -1;
16133                             }
16134                         } ## end if ( ( $i == $i_line_start...))
16135                     } ## end if ( $type eq ':' )
16136                     if ( has_postponed_breakpoint($type_sequence) ) {
16137                         my $inc = ( $type eq ':' ) ? 0 : 1;
16138                         $self->set_forced_breakpoint( $i - $inc );
16139                     }
16140                 } ## end if ( $is_closing_sequence_token{$token} )
16141
16142                 # set breaks at ?/: if they will get separated (and are
16143                 # not a ?/: chain), or if the '?' is at the end of the
16144                 # line
16145                 elsif ( $token eq '?' ) {
16146                     my $i_colon = $mate_index_to_go[$i];
16147                     if (
16148                         $i_colon <= 0  # the ':' is not in this batch
16149                         || $i == 0     # this '?' is the first token of the line
16150                         || $i ==
16151                         $max_index_to_go    # or this '?' is the last token
16152                       )
16153                     {
16154
16155                         # don't break at a '?' if preceded by ':' on
16156                         # this line of previous ?/: pair on this line.
16157                         # This is an attempt to preserve a chain of ?/:
16158                         # expressions (elsif2.t).  And don't break if
16159                         # this has a side comment.
16160                         $self->set_forced_breakpoint($i)
16161                           unless (
16162                             $type_sequence == (
16163                                 $last_colon_sequence_number +
16164                                   TYPE_SEQUENCE_INCREMENT
16165                             )
16166                             || $tokens_to_go[$max_index_to_go] eq '#'
16167                           );
16168                         $self->set_closing_breakpoint($i);
16169                     } ## end if ( $i_colon <= 0  ||...)
16170                 } ## end elsif ( $token eq '?' )
16171             } ## end if ($type_sequence)
16172
16173 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
16174
16175             #------------------------------------------------------------
16176             # Handle Increasing Depth..
16177             #
16178             # prepare for a new list when depth increases
16179             # token $i is a '(','{', or '['
16180             #------------------------------------------------------------
16181             if ( $depth > $current_depth ) {
16182
16183                 $type_sequence_stack[$depth] = $type_sequence;
16184                 $override_cab3[$depth] =
16185                      $rOpts_comma_arrow_breakpoints == 3
16186                   && $type_sequence
16187                   && $self->[_roverride_cab3_]->{$type_sequence};
16188                 $breakpoint_stack[$depth] = get_forced_breakpoint_count();
16189                 $breakpoint_undo_stack[$depth] =
16190                   get_forced_breakpoint_undo_count();
16191                 $has_broken_sublist[$depth]            = 0;
16192                 $identifier_count_stack[$depth]        = 0;
16193                 $index_before_arrow[$depth]            = -1;
16194                 $interrupted_list[$depth]              = 0;
16195                 $item_count_stack[$depth]              = 0;
16196                 $last_comma_index[$depth]              = undef;
16197                 $last_dot_index[$depth]                = undef;
16198                 $last_nonblank_type[$depth]            = $last_nonblank_type;
16199                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
16200                 $opening_structure_index_stack[$depth] = $i;
16201                 $rand_or_list[$depth]                  = [];
16202                 $rfor_semicolon_list[$depth]           = [];
16203                 $i_equals[$depth]                      = -1;
16204                 $want_comma_break[$depth]              = 0;
16205                 $container_type[$depth] =
16206
16207                   #      k => && || ? : .
16208                   $is_container_label_type{$last_nonblank_type}
16209                   ? $last_nonblank_token
16210                   : "";
16211                 $has_old_logical_breakpoints[$depth] = 0;
16212
16213                 # if line ends here then signal closing token to break
16214                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
16215                 {
16216                     $self->set_closing_breakpoint($i);
16217                 }
16218
16219                 # Not all lists of values should be vertically aligned..
16220                 $dont_align[$depth] =
16221
16222                   # code BLOCKS are handled at a higher level
16223                   ( $block_type ne "" )
16224
16225                   # certain paren lists
16226                   || ( $type eq '(' ) && (
16227
16228                     # it does not usually look good to align a list of
16229                     # identifiers in a parameter list, as in:
16230                     #    my($var1, $var2, ...)
16231                     # (This test should probably be refined, for now I'm just
16232                     # testing for any keyword)
16233                     ( $last_nonblank_type eq 'k' )
16234
16235                     # a trailing '(' usually indicates a non-list
16236                     || ( $next_nonblank_type eq '(' )
16237                   );
16238
16239                 # patch to outdent opening brace of long if/for/..
16240                 # statements (like this one).  See similar coding in
16241                 # set_continuation breaks.  We have also catch it here for
16242                 # short line fragments which otherwise will not go through
16243                 # set_continuation_breaks.
16244                 if (
16245                     $block_type
16246
16247                     # if we have the ')' but not its '(' in this batch..
16248                     && ( $last_nonblank_token eq ')' )
16249                     && $mate_index_to_go[$i_last_nonblank_token] < 0
16250
16251                     # and user wants brace to left
16252                     && !$rOpts->{'opening-brace-always-on-right'}
16253
16254                     && ( $type eq '{' )     # should be true
16255                     && ( $token eq '{' )    # should be true
16256                   )
16257                 {
16258                     $self->set_forced_breakpoint( $i - 1 );
16259                 } ## end if ( $block_type && ( ...))
16260             } ## end if ( $depth > $current_depth)
16261
16262             #------------------------------------------------------------
16263             # Handle Decreasing Depth..
16264             #
16265             # finish off any old list when depth decreases
16266             # token $i is a ')','}', or ']'
16267             #------------------------------------------------------------
16268             elsif ( $depth < $current_depth ) {
16269
16270                 check_for_new_minimum_depth($depth);
16271
16272                 # force all outer logical containers to break after we see on
16273                 # old breakpoint
16274                 $has_old_logical_breakpoints[$depth] ||=
16275                   $has_old_logical_breakpoints[$current_depth];
16276
16277                 # Patch to break between ') {' if the paren list is broken.
16278                 # There is similar logic in set_continuation_breaks for
16279                 # non-broken lists.
16280                 if (   $token eq ')'
16281                     && $next_nonblank_block_type
16282                     && $interrupted_list[$current_depth]
16283                     && $next_nonblank_type eq '{'
16284                     && !$rOpts->{'opening-brace-always-on-right'} )
16285                 {
16286                     $self->set_forced_breakpoint($i);
16287                 } ## end if ( $token eq ')' && ...
16288
16289 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
16290
16291                 # set breaks at commas if necessary
16292                 my ( $bp_count, $do_not_break_apart ) =
16293                   $self->set_comma_breakpoints($current_depth);
16294
16295                 my $i_opening = $opening_structure_index_stack[$current_depth];
16296                 my $saw_opening_structure = ( $i_opening >= 0 );
16297
16298                 # this term is long if we had to break at interior commas..
16299                 my $is_long_term = $bp_count > 0;
16300
16301                 # If this is a short container with one or more comma arrows,
16302                 # then we will mark it as a long term to open it if requested.
16303                 # $rOpts_comma_arrow_breakpoints =
16304                 #    0 - open only if comma precedes closing brace
16305                 #    1 - stable: except for one line blocks
16306                 #    2 - try to form 1 line blocks
16307                 #    3 - ignore =>
16308                 #    4 - always open up if vt=0
16309                 #    5 - stable: even for one line blocks if vt=0
16310
16311                 # PATCH: Modify the -cab flag if we are not processing a list:
16312                 # We only want the -cab flag to apply to list containers, so
16313                 # for non-lists we use the default and stable -cab=5 value.
16314                 # Fixes case b939a.
16315                 my $cab_flag = $rOpts_comma_arrow_breakpoints;
16316                 if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
16317                     $cab_flag = 5;
16318                 }
16319
16320                 if (  !$is_long_term
16321                     && $saw_opening_structure
16322                     && $is_opening_token{ $tokens_to_go[$i_opening] }
16323                     && $index_before_arrow[ $depth + 1 ] > 0
16324                     && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
16325                   )
16326                 {
16327                     $is_long_term =
16328                          $cab_flag == 4
16329                       || $cab_flag == 0 && $last_nonblank_token eq ','
16330                       || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
16331                 } ## end if ( !$is_long_term &&...)
16332
16333                 # mark term as long if the length between opening and closing
16334                 # parens exceeds allowed line length
16335                 if ( !$is_long_term && $saw_opening_structure ) {
16336
16337                     my $i_opening_minus =
16338                       $self->find_token_starting_list($i_opening);
16339
16340                     my $excess =
16341                       $self->excess_line_length( $i_opening_minus, $i );
16342
16343                     my $tol =
16344                       $length_tol_boost
16345                       && !$ris_list_by_seqno->{$type_sequence}
16346                       ? $length_tol + $length_tol_boost
16347                       : $length_tol;
16348
16349                     # Patch to avoid blinking with -bbxi=2 and -cab=2
16350                     # in which variations in -ci cause unstable formatting
16351                     # in edge cases. We just always add one ci level so that
16352                     # the formatting is independent of the -BBX results.
16353                     # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
16354                     # b1161 b1166 b1167 b1168
16355                     if (  !$ci_levels_to_go[$i_opening]
16356                         && $rbreak_before_container_by_seqno->{$type_sequence} )
16357                     {
16358                         $tol += $rOpts->{'continuation-indentation'};
16359                     }
16360
16361                     $is_long_term = $excess + $tol > 0;
16362
16363                 } ## end if ( !$is_long_term &&...)
16364
16365                 # We've set breaks after all comma-arrows.  Now we have to
16366                 # undo them if this can be a one-line block
16367                 # (the only breakpoints set will be due to comma-arrows)
16368
16369                 if (
16370
16371                     # user doesn't require breaking after all comma-arrows
16372                     ( $cab_flag != 0 ) && ( $cab_flag != 4 )
16373
16374                     # and if the opening structure is in this batch
16375                     && $saw_opening_structure
16376
16377                     # and either on the same old line
16378                     && (
16379                         $old_breakpoint_count_stack[$current_depth] ==
16380                         $last_old_breakpoint_count
16381
16382                         # or user wants to form long blocks with arrows
16383                         || $cab_flag == 2
16384
16385                         # if -cab=3 is overridden then use -cab=2 behavior
16386                         || $cab_flag == 3 && $override_cab3[$current_depth]
16387                     )
16388
16389                     # and we made breakpoints between the opening and closing
16390                     && ( $breakpoint_undo_stack[$current_depth] <
16391                         get_forced_breakpoint_undo_count() )
16392
16393                     # and this block is short enough to fit on one line
16394                     # Note: use < because need 1 more space for possible comma
16395                     && !$is_long_term
16396
16397                   )
16398                 {
16399                     $self->undo_forced_breakpoint_stack(
16400                         $breakpoint_undo_stack[$current_depth] );
16401                 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
16402
16403                 # now see if we have any comma breakpoints left
16404                 my $has_comma_breakpoints =
16405                   ( $breakpoint_stack[$current_depth] !=
16406                       get_forced_breakpoint_count() );
16407
16408                 # update broken-sublist flag of the outer container
16409                 $has_broken_sublist[$depth] =
16410                      $has_broken_sublist[$depth]
16411                   || $has_broken_sublist[$current_depth]
16412                   || $is_long_term
16413                   || $has_comma_breakpoints;
16414
16415 # Having come to the closing ')', '}', or ']', now we have to decide if we
16416 # should 'open up' the structure by placing breaks at the opening and
16417 # closing containers.  This is a tricky decision.  Here are some of the
16418 # basic considerations:
16419 #
16420 # -If this is a BLOCK container, then any breakpoints will have already
16421 # been set (and according to user preferences), so we need do nothing here.
16422 #
16423 # -If we have a comma-separated list for which we can align the list items,
16424 # then we need to do so because otherwise the vertical aligner cannot
16425 # currently do the alignment.
16426 #
16427 # -If this container does itself contain a container which has been broken
16428 # open, then it should be broken open to properly show the structure.
16429 #
16430 # -If there is nothing to align, and no other reason to break apart,
16431 # then do not do it.
16432 #
16433 # We will not break open the parens of a long but 'simple' logical expression.
16434 # For example:
16435 #
16436 # This is an example of a simple logical expression and its formatting:
16437 #
16438 #     if ( $bigwasteofspace1 && $bigwasteofspace2
16439 #         || $bigwasteofspace3 && $bigwasteofspace4 )
16440 #
16441 # Most people would prefer this than the 'spacey' version:
16442 #
16443 #     if (
16444 #         $bigwasteofspace1 && $bigwasteofspace2
16445 #         || $bigwasteofspace3 && $bigwasteofspace4
16446 #     )
16447 #
16448 # To illustrate the rules for breaking logical expressions, consider:
16449 #
16450 #             FULLY DENSE:
16451 #             if ( $opt_excl
16452 #                 and ( exists $ids_excl_uc{$id_uc}
16453 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
16454 #
16455 # This is on the verge of being difficult to read.  The current default is to
16456 # open it up like this:
16457 #
16458 #             DEFAULT:
16459 #             if (
16460 #                 $opt_excl
16461 #                 and ( exists $ids_excl_uc{$id_uc}
16462 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
16463 #               )
16464 #
16465 # This is a compromise which tries to avoid being too dense and to spacey.
16466 # A more spaced version would be:
16467 #
16468 #             SPACEY:
16469 #             if (
16470 #                 $opt_excl
16471 #                 and (
16472 #                     exists $ids_excl_uc{$id_uc}
16473 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
16474 #                 )
16475 #               )
16476 #
16477 # Some people might prefer the spacey version -- an option could be added.  The
16478 # innermost expression contains a long block '( exists $ids_...  ')'.
16479 #
16480 # Here is how the logic goes: We will force a break at the 'or' that the
16481 # innermost expression contains, but we will not break apart its opening and
16482 # closing containers because (1) it contains no multi-line sub-containers itself,
16483 # and (2) there is no alignment to be gained by breaking it open like this
16484 #
16485 #             and (
16486 #                 exists $ids_excl_uc{$id_uc}
16487 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
16488 #             )
16489 #
16490 # (although this looks perfectly ok and might be good for long expressions).  The
16491 # outer 'if' container, though, contains a broken sub-container, so it will be
16492 # broken open to avoid too much density.  Also, since it contains no 'or's, there
16493 # will be a forced break at its 'and'.
16494
16495                 # set some flags telling something about this container..
16496                 my $is_simple_logical_expression = 0;
16497                 if (   $item_count_stack[$current_depth] == 0
16498                     && $saw_opening_structure
16499                     && $tokens_to_go[$i_opening] eq '('
16500                     && $is_logical_container{ $container_type[$current_depth] }
16501                   )
16502                 {
16503
16504                     # This seems to be a simple logical expression with
16505                     # no existing breakpoints.  Set a flag to prevent
16506                     # opening it up.
16507                     if ( !$has_comma_breakpoints ) {
16508                         $is_simple_logical_expression = 1;
16509                     }
16510
16511                     # This seems to be a simple logical expression with
16512                     # breakpoints (broken sublists, for example).  Break
16513                     # at all 'or's and '||'s.
16514                     else {
16515                         $self->set_logical_breakpoints($current_depth);
16516                     }
16517                 } ## end if ( $item_count_stack...)
16518
16519                 if ( $is_long_term
16520                     && @{ $rfor_semicolon_list[$current_depth] } )
16521                 {
16522                     $self->set_for_semicolon_breakpoints($current_depth);
16523
16524                     # open up a long 'for' or 'foreach' container to allow
16525                     # leading term alignment unless -lp is used.
16526                     $has_comma_breakpoints = 1
16527                       unless $rOpts_line_up_parentheses;
16528                 } ## end if ( $is_long_term && ...)
16529
16530                 if (
16531
16532                     # breaks for code BLOCKS are handled at a higher level
16533                     !$block_type
16534
16535                     # we do not need to break at the top level of an 'if'
16536                     # type expression
16537                     && !$is_simple_logical_expression
16538
16539                     ## modification to keep ': (' containers vertically tight;
16540                     ## but probably better to let user set -vt=1 to avoid
16541                     ## inconsistency with other paren types
16542                     ## && ($container_type[$current_depth] ne ':')
16543
16544                     # otherwise, we require one of these reasons for breaking:
16545                     && (
16546
16547                         # - this term has forced line breaks
16548                         $has_comma_breakpoints
16549
16550                        # - the opening container is separated from this batch
16551                        #   for some reason (comment, blank line, code block)
16552                        # - this is a non-paren container spanning multiple lines
16553                         || !$saw_opening_structure
16554
16555                         # - this is a long block contained in another breakable
16556                         #   container
16557                         || $is_long_term && !$self->is_in_block_by_i($i_opening)
16558                     )
16559                   )
16560                 {
16561
16562                     # For -lp option, we must put a breakpoint before
16563                     # the token which has been identified as starting
16564                     # this indentation level.  This is necessary for
16565                     # proper alignment.
16566                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
16567                     {
16568                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
16569                         if (   $i_opening + 1 < $max_index_to_go
16570                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
16571                         {
16572                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
16573                         }
16574                         if ( defined($item) ) {
16575                             my $i_start_2;
16576                             my $K_start_2 = $item->get_starting_index_K();
16577                             if ( defined($K_start_2) ) {
16578                                 $i_start_2 = $K_start_2 - $K_to_go[0];
16579                             }
16580                             if (
16581                                 defined($i_start_2)
16582
16583                                 # we are breaking after an opening brace, paren,
16584                                 # so don't break before it too
16585                                 && $i_start_2 ne $i_opening
16586                                 && $i_start_2 >= 0
16587                                 && $i_start_2 <= $max_index_to_go
16588                               )
16589                             {
16590
16591                                 # Only break for breakpoints at the same
16592                                 # indentation level as the opening paren
16593                                 my $test1 = $nesting_depth_to_go[$i_opening];
16594                                 my $test2 = $nesting_depth_to_go[$i_start_2];
16595                                 if ( $test2 == $test1 ) {
16596
16597                                     # Back up at a blank (fixes case b932)
16598                                     my $ibr = $i_start_2 - 1;
16599                                     if (   $ibr > 0
16600                                         && $types_to_go[$ibr] eq 'b' )
16601                                     {
16602                                         $ibr--;
16603                                     }
16604
16605                                     $self->set_forced_breakpoint($ibr);
16606
16607                                 }
16608                             } ## end if ( defined($i_start_2...))
16609                         } ## end if ( defined($item) )
16610                     } ## end if ( $rOpts_line_up_parentheses...)
16611
16612                     # break after opening structure.
16613                     # note: break before closing structure will be automatic
16614                     if ( $minimum_depth <= $current_depth ) {
16615
16616                         $self->set_forced_breakpoint($i_opening)
16617                           unless ( $do_not_break_apart
16618                             || is_unbreakable_container($current_depth) );
16619
16620                         # break at ',' of lower depth level before opening token
16621                         if ( $last_comma_index[$depth] ) {
16622                             $self->set_forced_breakpoint(
16623                                 $last_comma_index[$depth] );
16624                         }
16625
16626                         # break at '.' of lower depth level before opening token
16627                         if ( $last_dot_index[$depth] ) {
16628                             $self->set_forced_breakpoint(
16629                                 $last_dot_index[$depth] );
16630                         }
16631
16632                         # break before opening structure if preceded by another
16633                         # closing structure and a comma.  This is normally
16634                         # done by the previous closing brace, but not
16635                         # if it was a one-line block.
16636                         if ( $i_opening > 2 ) {
16637                             my $i_prev =
16638                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
16639                               ? $i_opening - 2
16640                               : $i_opening - 1;
16641
16642                             if (
16643                                 $types_to_go[$i_prev] eq ','
16644                                 && (   $types_to_go[ $i_prev - 1 ] eq ')'
16645                                     || $types_to_go[ $i_prev - 1 ] eq '}' )
16646                               )
16647                             {
16648                                 $self->set_forced_breakpoint($i_prev);
16649                             }
16650
16651                             # also break before something like ':('  or '?('
16652                             # if appropriate.
16653                             elsif (
16654                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
16655                             {
16656                                 my $token_prev = $tokens_to_go[$i_prev];
16657                                 if ( $want_break_before{$token_prev} ) {
16658                                     $self->set_forced_breakpoint($i_prev);
16659                                 }
16660                             } ## end elsif ( $types_to_go[$i_prev...])
16661                         } ## end if ( $i_opening > 2 )
16662                     } ## end if ( $minimum_depth <=...)
16663
16664                     # break after comma following closing structure
16665                     if ( $next_type eq ',' ) {
16666                         $self->set_forced_breakpoint( $i + 1 );
16667                     }
16668
16669                     # break before an '=' following closing structure
16670                     if (
16671                         $is_assignment{$next_nonblank_type}
16672                         && ( $breakpoint_stack[$current_depth] !=
16673                             get_forced_breakpoint_count() )
16674                       )
16675                     {
16676                         $self->set_forced_breakpoint($i);
16677                     } ## end if ( $is_assignment{$next_nonblank_type...})
16678
16679                     # break at any comma before the opening structure Added
16680                     # for -lp, but seems to be good in general.  It isn't
16681                     # obvious how far back to look; the '5' below seems to
16682                     # work well and will catch the comma in something like
16683                     #  push @list, myfunc( $param, $param, ..
16684
16685                     my $icomma = $last_comma_index[$depth];
16686                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
16687                         unless ( $forced_breakpoint_to_go[$icomma] ) {
16688                             $self->set_forced_breakpoint($icomma);
16689                         }
16690                     }
16691                 }    # end logic to open up a container
16692
16693                 # Break open a logical container open if it was already open
16694                 elsif ($is_simple_logical_expression
16695                     && $has_old_logical_breakpoints[$current_depth] )
16696                 {
16697                     $self->set_logical_breakpoints($current_depth);
16698                 }
16699
16700                 # Handle long container which does not get opened up
16701                 elsif ($is_long_term) {
16702
16703                     # must set fake breakpoint to alert outer containers that
16704                     # they are complex
16705                     set_fake_breakpoint();
16706                 } ## end elsif ($is_long_term)
16707
16708             } ## end elsif ( $depth < $current_depth)
16709
16710             #------------------------------------------------------------
16711             # Handle this token
16712             #------------------------------------------------------------
16713
16714             $current_depth = $depth;
16715
16716             # most token types can skip the rest of this loop
16717             next unless ( $quick_filter{$type} );
16718
16719             # handle comma-arrow
16720             if ( $type eq '=>' ) {
16721                 next if ( $last_nonblank_type eq '=>' );
16722                 next if $rOpts_break_at_old_comma_breakpoints;
16723                 next
16724                   if ( $rOpts_comma_arrow_breakpoints == 3
16725                     && !$override_cab3[$depth] );
16726                 $want_comma_break[$depth]   = 1;
16727                 $index_before_arrow[$depth] = $i_last_nonblank_token;
16728                 next;
16729             } ## end if ( $type eq '=>' )
16730
16731             elsif ( $type eq '.' ) {
16732                 $last_dot_index[$depth] = $i;
16733             }
16734
16735             # Turn off alignment if we are sure that this is not a list
16736             # environment.  To be safe, we will do this if we see certain
16737             # non-list tokens, such as ';', and also the environment is
16738             # not a list.  Note that '=' could be in any of the = operators
16739             # (lextest.t). We can't just use the reported environment
16740             # because it can be incorrect in some cases.
16741             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
16742                 && !$self->is_in_list_by_i($i) )
16743             {
16744                 $dont_align[$depth]         = 1;
16745                 $want_comma_break[$depth]   = 0;
16746                 $index_before_arrow[$depth] = -1;
16747             } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
16748
16749             # now just handle any commas
16750             next unless ( $type eq ',' );
16751
16752             $last_dot_index[$depth]   = undef;
16753             $last_comma_index[$depth] = $i;
16754
16755             # break here if this comma follows a '=>'
16756             # but not if there is a side comment after the comma
16757             if ( $want_comma_break[$depth] ) {
16758
16759                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
16760                     if ($rOpts_comma_arrow_breakpoints) {
16761                         $want_comma_break[$depth] = 0;
16762                         next;
16763                     }
16764                 }
16765
16766                 $self->set_forced_breakpoint($i)
16767                   unless ( $next_nonblank_type eq '#' );
16768
16769                 # break before the previous token if it looks safe
16770                 # Example of something that we will not try to break before:
16771                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
16772                 # Also we don't want to break at a binary operator (like +):
16773                 # $c->createOval(
16774                 #    $x + $R, $y +
16775                 #    $R => $x - $R,
16776                 #    $y - $R, -fill   => 'black',
16777                 # );
16778                 my $ibreak = $index_before_arrow[$depth] - 1;
16779                 if (   $ibreak > 0
16780                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
16781                 {
16782                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
16783                     if ( $types_to_go[$ibreak] eq 'b' )  { $ibreak-- }
16784                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
16785
16786                         # don't break pointer calls, such as the following:
16787                         #  File::Spec->curdir  => 1,
16788                         # (This is tokenized as adjacent 'w' tokens)
16789                         ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
16790
16791                         # And don't break before a comma, as in the following:
16792                         # ( LONGER_THAN,=> 1,
16793                         #    EIGHTY_CHARACTERS,=> 2,
16794                         #    CAUSES_FORMATTING,=> 3,
16795                         #    LIKE_THIS,=> 4,
16796                         # );
16797                         # This example is for -tso but should be general rule
16798                         if (   $tokens_to_go[ $ibreak + 1 ] ne '->'
16799                             && $tokens_to_go[ $ibreak + 1 ] ne ',' )
16800                         {
16801                             $self->set_forced_breakpoint($ibreak);
16802                         }
16803                     } ## end if ( $types_to_go[$ibreak...])
16804                 } ## end if ( $ibreak > 0 && $tokens_to_go...)
16805
16806                 $want_comma_break[$depth]   = 0;
16807                 $index_before_arrow[$depth] = -1;
16808
16809                 # handle list which mixes '=>'s and ','s:
16810                 # treat any list items so far as an interrupted list
16811                 $interrupted_list[$depth] = 1;
16812                 next;
16813             } ## end if ( $want_comma_break...)
16814
16815             # break after all commas above starting depth
16816             if ( $depth < $starting_depth && !$dont_align[$depth] ) {
16817                 $self->set_forced_breakpoint($i)
16818                   unless ( $next_nonblank_type eq '#' );
16819                 next;
16820             }
16821
16822             # add this comma to the list..
16823             my $item_count = $item_count_stack[$depth];
16824             if ( $item_count == 0 ) {
16825
16826                 # but do not form a list with no opening structure
16827                 # for example:
16828
16829                 #            open INFILE_COPY, ">$input_file_copy"
16830                 #              or die ("very long message");
16831                 if ( ( $opening_structure_index_stack[$depth] < 0 )
16832                     && $self->is_in_block_by_i($i) )
16833                 {
16834                     $dont_align[$depth] = 1;
16835                 }
16836             } ## end if ( $item_count == 0 )
16837
16838             $comma_index[$depth][$item_count] = $i;
16839             ++$item_count_stack[$depth];
16840             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
16841                 $identifier_count_stack[$depth]++;
16842             }
16843         } ## end while ( ++$i <= $max_index_to_go)
16844
16845         #-------------------------------------------
16846         # end of loop over all tokens in this batch
16847         #-------------------------------------------
16848
16849         # set breaks for any unfinished lists ..
16850         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
16851
16852             $interrupted_list[$dd]   = 1;
16853             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
16854             $self->set_comma_breakpoints($dd);
16855             $self->set_logical_breakpoints($dd)
16856               if ( $has_old_logical_breakpoints[$dd] );
16857             $self->set_for_semicolon_breakpoints($dd);
16858
16859             # break open container...
16860             my $i_opening = $opening_structure_index_stack[$dd];
16861             $self->set_forced_breakpoint($i_opening)
16862               unless (
16863                 is_unbreakable_container($dd)
16864
16865                 # Avoid a break which would place an isolated ' or "
16866                 # on a line
16867                 || (   $type eq 'Q'
16868                     && $i_opening >= $max_index_to_go - 2
16869                     && ( $token eq "'" || $token eq '"' ) )
16870               );
16871         } ## end for ( my $dd = $current_depth...)
16872
16873         # Return a flag indicating if the input file had some good breakpoints.
16874         # This flag will be used to force a break in a line shorter than the
16875         # allowed line length.
16876         if ( $has_old_logical_breakpoints[$current_depth] ) {
16877             $saw_good_breakpoint = 1;
16878         }
16879
16880         # A complex line with one break at an = has a good breakpoint.
16881         # This is not complex ($total_depth_variation=0):
16882         # $res1
16883         #   = 10;
16884         #
16885         # This is complex ($total_depth_variation=6):
16886         # $res2 =
16887         #  (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
16888         elsif ($i_old_assignment_break
16889             && $total_depth_variation > 4
16890             && $old_breakpoint_count == 1 )
16891         {
16892             $saw_good_breakpoint = 1;
16893         } ## end elsif ( $i_old_assignment_break...)
16894
16895         return $saw_good_breakpoint;
16896     } ## end sub scan_list
16897 } ## end closure scan_list
16898
16899 my %is_kwiZ;
16900
16901 BEGIN {
16902
16903     # Added 'w' to fix b1172
16904     my @q = qw(k w i Z);
16905     @is_kwiZ{@q} = (1) x scalar(@q);
16906 }
16907
16908 sub find_token_starting_list {
16909
16910     # When testing to see if a block will fit on one line, some
16911     # previous token(s) may also need to be on the line; particularly
16912     # if this is a sub call.  So we will look back at least one
16913     # token.
16914     my ( $self, $i_opening_paren ) = @_;
16915
16916     # This will be the return index
16917     my $i_opening_minus = $i_opening_paren;
16918
16919     goto RETURN if ( $i_opening_minus <= 0 );
16920
16921     my $im1 = $i_opening_paren - 1;
16922     my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
16923     if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
16924         $iprev_nb -= 1;
16925         $type_prev_nb = $types_to_go[$iprev_nb];
16926     }
16927
16928     if ( $type_prev_nb eq ',' ) {
16929
16930         # a previous comma is a good break point
16931         # $i_opening_minus = $i_opening_paren;
16932     }
16933     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
16934         $i_opening_minus = $im1;
16935
16936         # Walk back to improve length estimate...
16937         # FIX for cases b1169 b1170 b1171: start walking back
16938         # at the previous nonblank. This makes the result insensitive
16939         # to the flag --space-function-paren, and similar.
16940         # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
16941         for ( my $j = $iprev_nb ; $j >= 0 ; $j-- ) {
16942             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
16943             $i_opening_minus = $j;
16944         }
16945         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
16946     }
16947
16948     # Handle non-parens
16949     elsif ( $is_kwiZ{$type_prev_nb} ) { $i_opening_minus = $iprev_nb }
16950
16951   RETURN:
16952
16953     return $i_opening_minus;
16954 }
16955
16956 {    ## begin closure set_comma_breakpoints_do
16957
16958     my %is_keyword_with_special_leading_term;
16959
16960     BEGIN {
16961
16962         # These keywords have prototypes which allow a special leading item
16963         # followed by a list
16964         my @q =
16965           qw(formline grep kill map printf sprintf push chmod join pack unshift);
16966         @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
16967     }
16968
16969     use constant DEBUG_SPARSE => 0;
16970
16971     sub set_comma_breakpoints_do {
16972
16973         # Given a list with some commas, set breakpoints at some of the
16974         # commas, if necessary, to make it easy to read.
16975
16976         my ( $self, $rinput_hash ) = @_;
16977
16978         my $depth               = $rinput_hash->{depth};
16979         my $i_opening_paren     = $rinput_hash->{i_opening_paren};
16980         my $i_closing_paren     = $rinput_hash->{i_closing_paren};
16981         my $item_count          = $rinput_hash->{item_count};
16982         my $identifier_count    = $rinput_hash->{identifier_count};
16983         my $rcomma_index        = $rinput_hash->{rcomma_index};
16984         my $next_nonblank_type  = $rinput_hash->{next_nonblank_type};
16985         my $list_type           = $rinput_hash->{list_type};
16986         my $interrupted         = $rinput_hash->{interrupted};
16987         my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
16988         my $must_break_open     = $rinput_hash->{must_break_open};
16989         my $has_broken_sublist  = $rinput_hash->{has_broken_sublist};
16990
16991         # nothing to do if no commas seen
16992         return if ( $item_count < 1 );
16993
16994         my $i_first_comma     = $rcomma_index->[0];
16995         my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
16996         my $i_last_comma      = $i_true_last_comma;
16997         if ( $i_last_comma >= $max_index_to_go ) {
16998             $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
16999             return if ( $item_count < 1 );
17000         }
17001
17002         #---------------------------------------------------------------
17003         # find lengths of all items in the list to calculate page layout
17004         #---------------------------------------------------------------
17005         my $comma_count = $item_count;
17006         my @item_lengths;
17007         my @i_term_begin;
17008         my @i_term_end;
17009         my @i_term_comma;
17010         my $i_prev_plus;
17011         my @max_length = ( 0, 0 );
17012         my $first_term_length;
17013         my $i      = $i_opening_paren;
17014         my $is_odd = 1;
17015
17016         foreach my $j ( 0 .. $comma_count - 1 ) {
17017             $is_odd      = 1 - $is_odd;
17018             $i_prev_plus = $i + 1;
17019             $i           = $rcomma_index->[$j];
17020
17021             my $i_term_end =
17022               ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
17023             my $i_term_begin =
17024               ( $types_to_go[$i_prev_plus] eq 'b' )
17025               ? $i_prev_plus + 1
17026               : $i_prev_plus;
17027             push @i_term_begin, $i_term_begin;
17028             push @i_term_end,   $i_term_end;
17029             push @i_term_comma, $i;
17030
17031             # note: currently adding 2 to all lengths (for comma and space)
17032             my $length =
17033               2 + token_sequence_length( $i_term_begin, $i_term_end );
17034             push @item_lengths, $length;
17035
17036             if ( $j == 0 ) {
17037                 $first_term_length = $length;
17038             }
17039             else {
17040
17041                 if ( $length > $max_length[$is_odd] ) {
17042                     $max_length[$is_odd] = $length;
17043                 }
17044             }
17045         }
17046
17047         # now we have to make a distinction between the comma count and item
17048         # count, because the item count will be one greater than the comma
17049         # count if the last item is not terminated with a comma
17050         my $i_b =
17051           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
17052           ? $i_last_comma + 1
17053           : $i_last_comma;
17054         my $i_e =
17055           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
17056           ? $i_closing_paren - 2
17057           : $i_closing_paren - 1;
17058         my $i_effective_last_comma = $i_last_comma;
17059
17060         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
17061
17062         if ( $last_item_length > 0 ) {
17063
17064             # add 2 to length because other lengths include a comma and a blank
17065             $last_item_length += 2;
17066             push @item_lengths, $last_item_length;
17067             push @i_term_begin, $i_b + 1;
17068             push @i_term_end,   $i_e;
17069             push @i_term_comma, undef;
17070
17071             my $i_odd = $item_count % 2;
17072
17073             if ( $last_item_length > $max_length[$i_odd] ) {
17074                 $max_length[$i_odd] = $last_item_length;
17075             }
17076
17077             $item_count++;
17078             $i_effective_last_comma = $i_e + 1;
17079
17080             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
17081                 $identifier_count++;
17082             }
17083         }
17084
17085         #---------------------------------------------------------------
17086         # End of length calculations
17087         #---------------------------------------------------------------
17088
17089         #---------------------------------------------------------------
17090         # Compound List Rule 1:
17091         # Break at (almost) every comma for a list containing a broken
17092         # sublist.  This has higher priority than the Interrupted List
17093         # Rule.
17094         #---------------------------------------------------------------
17095         if ($has_broken_sublist) {
17096
17097             # Break at every comma except for a comma between two
17098             # simple, small terms.  This prevents long vertical
17099             # columns of, say, just 0's.
17100             my $small_length = 10;    # 2 + actual maximum length wanted
17101
17102             # We'll insert a break in long runs of small terms to
17103             # allow alignment in uniform tables.
17104             my $skipped_count = 0;
17105             my $columns       = table_columns_available($i_first_comma);
17106             my $fields        = int( $columns / $small_length );
17107             if (   $rOpts_maximum_fields_per_table
17108                 && $fields > $rOpts_maximum_fields_per_table )
17109             {
17110                 $fields = $rOpts_maximum_fields_per_table;
17111             }
17112             my $max_skipped_count = $fields - 1;
17113
17114             my $is_simple_last_term = 0;
17115             my $is_simple_next_term = 0;
17116             foreach my $j ( 0 .. $item_count ) {
17117                 $is_simple_last_term = $is_simple_next_term;
17118                 $is_simple_next_term = 0;
17119                 if (   $j < $item_count
17120                     && $i_term_end[$j] == $i_term_begin[$j]
17121                     && $item_lengths[$j] <= $small_length )
17122                 {
17123                     $is_simple_next_term = 1;
17124                 }
17125                 next if $j == 0;
17126                 if (   $is_simple_last_term
17127                     && $is_simple_next_term
17128                     && $skipped_count < $max_skipped_count )
17129                 {
17130                     $skipped_count++;
17131                 }
17132                 else {
17133                     $skipped_count = 0;
17134                     my $i = $i_term_comma[ $j - 1 ];
17135                     last unless defined $i;
17136                     $self->set_forced_breakpoint($i);
17137                 }
17138             }
17139
17140             # always break at the last comma if this list is
17141             # interrupted; we wouldn't want to leave a terminal '{', for
17142             # example.
17143             if ($interrupted) {
17144                 $self->set_forced_breakpoint($i_true_last_comma);
17145             }
17146             return;
17147         }
17148
17149 #my ( $a, $b, $c ) = caller();
17150 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
17151 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
17152 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
17153
17154         #---------------------------------------------------------------
17155         # Interrupted List Rule:
17156         # A list is forced to use old breakpoints if it was interrupted
17157         # by side comments or blank lines, or requested by user.
17158         #---------------------------------------------------------------
17159         if (   $rOpts_break_at_old_comma_breakpoints
17160             || $interrupted
17161             || $i_opening_paren < 0 )
17162         {
17163             $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
17164             return;
17165         }
17166
17167         #---------------------------------------------------------------
17168         # Looks like a list of items.  We have to look at it and size it up.
17169         #---------------------------------------------------------------
17170
17171         my $opening_token       = $tokens_to_go[$i_opening_paren];
17172         my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
17173
17174         #-------------------------------------------------------------------
17175         # Return if this will fit on one line
17176         #-------------------------------------------------------------------
17177
17178         my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
17179         return
17180           unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
17181           > 0;
17182
17183         #-------------------------------------------------------------------
17184         # Now we know that this block spans multiple lines; we have to set
17185         # at least one breakpoint -- real or fake -- as a signal to break
17186         # open any outer containers.
17187         #-------------------------------------------------------------------
17188         set_fake_breakpoint();
17189
17190         # be sure we do not extend beyond the current list length
17191         if ( $i_effective_last_comma >= $max_index_to_go ) {
17192             $i_effective_last_comma = $max_index_to_go - 1;
17193         }
17194
17195         # Set a flag indicating if we need to break open to keep -lp
17196         # items aligned.  This is necessary if any of the list terms
17197         # exceeds the available space after the '('.
17198         my $need_lp_break_open = $must_break_open;
17199         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
17200             my $columns_if_unbroken =
17201               $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
17202               - total_line_length( $i_opening_minus, $i_opening_paren );
17203             $need_lp_break_open =
17204                  ( $max_length[0] > $columns_if_unbroken )
17205               || ( $max_length[1] > $columns_if_unbroken )
17206               || ( $first_term_length > $columns_if_unbroken );
17207         }
17208
17209         # Specify if the list must have an even number of fields or not.
17210         # It is generally safest to assume an even number, because the
17211         # list items might be a hash list.  But if we can be sure that
17212         # it is not a hash, then we can allow an odd number for more
17213         # flexibility.
17214         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
17215
17216         if (   $identifier_count >= $item_count - 1
17217             || $is_assignment{$next_nonblank_type}
17218             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
17219           )
17220         {
17221             $odd_or_even = 1;
17222         }
17223
17224         # do we have a long first term which should be
17225         # left on a line by itself?
17226         my $use_separate_first_term = (
17227             $odd_or_even == 1           # only if we can use 1 field/line
17228               && $item_count > 3        # need several items
17229               && $first_term_length >
17230               2 * $max_length[0] - 2    # need long first term
17231               && $first_term_length >
17232               2 * $max_length[1] - 2    # need long first term
17233         );
17234
17235         # or do we know from the type of list that the first term should
17236         # be placed alone?
17237         if ( !$use_separate_first_term ) {
17238             if ( $is_keyword_with_special_leading_term{$list_type} ) {
17239                 $use_separate_first_term = 1;
17240
17241                 # should the container be broken open?
17242                 if ( $item_count < 3 ) {
17243                     if ( $i_first_comma - $i_opening_paren < 4 ) {
17244                         ${$rdo_not_break_apart} = 1;
17245                     }
17246                 }
17247                 elsif ($first_term_length < 20
17248                     && $i_first_comma - $i_opening_paren < 4 )
17249                 {
17250                     my $columns = table_columns_available($i_first_comma);
17251                     if ( $first_term_length < $columns ) {
17252                         ${$rdo_not_break_apart} = 1;
17253                     }
17254                 }
17255             }
17256         }
17257
17258         # if so,
17259         if ($use_separate_first_term) {
17260
17261             # ..set a break and update starting values
17262             $use_separate_first_term = 1;
17263             $self->set_forced_breakpoint($i_first_comma);
17264             $i_opening_paren = $i_first_comma;
17265             $i_first_comma   = $rcomma_index->[1];
17266             $item_count--;
17267             return if $comma_count == 1;
17268             shift @item_lengths;
17269             shift @i_term_begin;
17270             shift @i_term_end;
17271             shift @i_term_comma;
17272         }
17273
17274         # if not, update the metrics to include the first term
17275         else {
17276             if ( $first_term_length > $max_length[0] ) {
17277                 $max_length[0] = $first_term_length;
17278             }
17279         }
17280
17281         # Field width parameters
17282         my $pair_width = ( $max_length[0] + $max_length[1] );
17283         my $max_width =
17284           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
17285
17286         # Number of free columns across the page width for laying out tables
17287         my $columns = table_columns_available($i_first_comma);
17288
17289         # Estimated maximum number of fields which fit this space
17290         # This will be our first guess
17291         my $number_of_fields_max =
17292           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
17293             $pair_width );
17294         my $number_of_fields = $number_of_fields_max;
17295
17296         # Find the best-looking number of fields
17297         # and make this our second guess if possible
17298         my ( $number_of_fields_best, $ri_ragged_break_list,
17299             $new_identifier_count )
17300           = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
17301             \@item_lengths, $max_width );
17302
17303         if (   $number_of_fields_best != 0
17304             && $number_of_fields_best < $number_of_fields_max )
17305         {
17306             $number_of_fields = $number_of_fields_best;
17307         }
17308
17309         # ----------------------------------------------------------------------
17310         # If we are crowded and the -lp option is being used, try to
17311         # undo some indentation
17312         # ----------------------------------------------------------------------
17313         if (
17314             $rOpts_line_up_parentheses
17315             && (
17316                 $number_of_fields == 0
17317                 || (   $number_of_fields == 1
17318                     && $number_of_fields != $number_of_fields_best )
17319             )
17320           )
17321         {
17322             my $available_spaces =
17323               $self->get_available_spaces_to_go($i_first_comma);
17324             if ( $available_spaces > 0 ) {
17325
17326                 my $spaces_wanted = $max_width - $columns;    # for 1 field
17327
17328                 if ( $number_of_fields_best == 0 ) {
17329                     $number_of_fields_best =
17330                       get_maximum_fields_wanted( \@item_lengths );
17331                 }
17332
17333                 if ( $number_of_fields_best != 1 ) {
17334                     my $spaces_wanted_2 =
17335                       1 + $pair_width - $columns;    # for 2 fields
17336                     if ( $available_spaces > $spaces_wanted_2 ) {
17337                         $spaces_wanted = $spaces_wanted_2;
17338                     }
17339                 }
17340
17341                 if ( $spaces_wanted > 0 ) {
17342                     my $deleted_spaces =
17343                       $self->reduce_lp_indentation( $i_first_comma,
17344                         $spaces_wanted );
17345
17346                     # redo the math
17347                     if ( $deleted_spaces > 0 ) {
17348                         $columns = table_columns_available($i_first_comma);
17349                         $number_of_fields_max =
17350                           maximum_number_of_fields( $columns, $odd_or_even,
17351                             $max_width, $pair_width );
17352                         $number_of_fields = $number_of_fields_max;
17353
17354                         if (   $number_of_fields_best == 1
17355                             && $number_of_fields >= 1 )
17356                         {
17357                             $number_of_fields = $number_of_fields_best;
17358                         }
17359                     }
17360                 }
17361             }
17362         }
17363
17364         # try for one column if two won't work
17365         if ( $number_of_fields <= 0 ) {
17366             $number_of_fields = int( $columns / $max_width );
17367         }
17368
17369         # The user can place an upper bound on the number of fields,
17370         # which can be useful for doing maintenance on tables
17371         if (   $rOpts_maximum_fields_per_table
17372             && $number_of_fields > $rOpts_maximum_fields_per_table )
17373         {
17374             $number_of_fields = $rOpts_maximum_fields_per_table;
17375         }
17376
17377         # How many columns (characters) and lines would this container take
17378         # if no additional whitespace were added?
17379         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
17380             $i_effective_last_comma + 1 );
17381         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
17382         my $packed_lines = 1 + int( $packed_columns / $columns );
17383
17384         # are we an item contained in an outer list?
17385         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
17386
17387         if ( $number_of_fields <= 0 ) {
17388
17389 #         #---------------------------------------------------------------
17390 #         # We're in trouble.  We can't find a single field width that works.
17391 #         # There is no simple answer here; we may have a single long list
17392 #         # item, or many.
17393 #         #---------------------------------------------------------------
17394 #
17395 #         In many cases, it may be best to not force a break if there is just one
17396 #         comma, because the standard continuation break logic will do a better
17397 #         job without it.
17398 #
17399 #         In the common case that all but one of the terms can fit
17400 #         on a single line, it may look better not to break open the
17401 #         containing parens.  Consider, for example
17402 #
17403 #             $color =
17404 #               join ( '/',
17405 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
17406 #                 keys %colors );
17407 #
17408 #         which will look like this with the container broken:
17409 #
17410 #             $color = join (
17411 #                 '/',
17412 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
17413 #             );
17414 #
17415 #         Here is an example of this rule for a long last term:
17416 #
17417 #             log_message( 0, 256, 128,
17418 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
17419 #
17420 #         And here is an example with a long first term:
17421 #
17422 #         $s = sprintf(
17423 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
17424 #             $r, $pu, $ps, $cu, $cs, $tt
17425 #           )
17426 #           if $style eq 'all';
17427
17428             my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
17429             my $long_last_term =
17430               $self->excess_line_length( 0, $i_last_comma ) <= 0;
17431             my $long_first_term =
17432               $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
17433               <= 0;
17434
17435             # break at every comma ...
17436             if (
17437
17438                 # if requested by user or is best looking
17439                 $number_of_fields_best == 1
17440
17441                 # or if this is a sublist of a larger list
17442                 || $in_hierarchical_list
17443
17444                 # or if multiple commas and we don't have a long first or last
17445                 # term
17446                 || ( $comma_count > 1
17447                     && !( $long_last_term || $long_first_term ) )
17448               )
17449             {
17450                 foreach ( 0 .. $comma_count - 1 ) {
17451                     $self->set_forced_breakpoint( $rcomma_index->[$_] );
17452                 }
17453             }
17454             elsif ($long_last_term) {
17455
17456                 $self->set_forced_breakpoint($i_last_comma);
17457                 ${$rdo_not_break_apart} = 1 unless $must_break_open;
17458             }
17459             elsif ($long_first_term) {
17460
17461                 $self->set_forced_breakpoint($i_first_comma);
17462             }
17463             else {
17464
17465                 # let breaks be defined by default bond strength logic
17466             }
17467             return;
17468         }
17469
17470         # --------------------------------------------------------
17471         # We have a tentative field count that seems to work.
17472         # How many lines will this require?
17473         # --------------------------------------------------------
17474         my $formatted_lines = $item_count / ($number_of_fields);
17475         if ( $formatted_lines != int $formatted_lines ) {
17476             $formatted_lines = 1 + int $formatted_lines;
17477         }
17478
17479         # So far we've been trying to fill out to the right margin.  But
17480         # compact tables are easier to read, so let's see if we can use fewer
17481         # fields without increasing the number of lines.
17482         $number_of_fields =
17483           compactify_table( $item_count, $number_of_fields, $formatted_lines,
17484             $odd_or_even );
17485
17486         # How many spaces across the page will we fill?
17487         my $columns_per_line =
17488           ( int $number_of_fields / 2 ) * $pair_width +
17489           ( $number_of_fields % 2 ) * $max_width;
17490
17491         my $formatted_columns;
17492
17493         if ( $number_of_fields > 1 ) {
17494             $formatted_columns =
17495               ( $pair_width * ( int( $item_count / 2 ) ) +
17496                   ( $item_count % 2 ) * $max_width );
17497         }
17498         else {
17499             $formatted_columns = $max_width * $item_count;
17500         }
17501         if ( $formatted_columns < $packed_columns ) {
17502             $formatted_columns = $packed_columns;
17503         }
17504
17505         my $unused_columns = $formatted_columns - $packed_columns;
17506
17507         # set some empirical parameters to help decide if we should try to
17508         # align; high sparsity does not look good, especially with few lines
17509         my $sparsity = ($unused_columns) / ($formatted_columns);
17510         my $max_allowed_sparsity =
17511             ( $item_count < 3 )    ? 0.1
17512           : ( $packed_lines == 1 ) ? 0.15
17513           : ( $packed_lines == 2 ) ? 0.4
17514           :                          0.7;
17515
17516         # Begin check for shortcut methods, which avoid treating a list
17517         # as a table for relatively small parenthesized lists.  These
17518         # are usually easier to read if not formatted as tables.
17519         if (
17520             $packed_lines <= 2          # probably can fit in 2 lines
17521             && $item_count < 9          # doesn't have too many items
17522             && $opening_is_in_block     # not a sub-container
17523             && $opening_token eq '('    # is paren list
17524           )
17525         {
17526
17527             # Shortcut method 1: for -lp and just one comma:
17528             # This is a no-brainer, just break at the comma.
17529             if (
17530                 $rOpts_line_up_parentheses    # -lp
17531                 && $item_count == 2           # two items, one comma
17532                 && !$must_break_open
17533               )
17534             {
17535                 my $i_break = $rcomma_index->[0];
17536                 $self->set_forced_breakpoint($i_break);
17537                 ${$rdo_not_break_apart} = 1;
17538                 return;
17539
17540             }
17541
17542             # method 2 is for most small ragged lists which might look
17543             # best if not displayed as a table.
17544             if (
17545                 ( $number_of_fields == 2 && $item_count == 3 )
17546                 || (
17547                     $new_identifier_count > 0    # isn't all quotes
17548                     && $sparsity > 0.15
17549                 )    # would be fairly spaced gaps if aligned
17550               )
17551             {
17552
17553                 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
17554                     $ri_ragged_break_list );
17555                 ++$break_count if ($use_separate_first_term);
17556
17557                 # NOTE: we should really use the true break count here,
17558                 # which can be greater if there are large terms and
17559                 # little space, but usually this will work well enough.
17560                 unless ($must_break_open) {
17561
17562                     if ( $break_count <= 1 ) {
17563                         ${$rdo_not_break_apart} = 1;
17564                     }
17565                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
17566                     {
17567                         ${$rdo_not_break_apart} = 1;
17568                     }
17569                 }
17570                 return;
17571             }
17572
17573         }    # end shortcut methods
17574
17575         # debug stuff
17576         DEBUG_SPARSE && do {
17577             print STDOUT
17578 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
17579
17580         };
17581
17582         #---------------------------------------------------------------
17583         # Compound List Rule 2:
17584         # If this list is too long for one line, and it is an item of a
17585         # larger list, then we must format it, regardless of sparsity
17586         # (ian.t).  One reason that we have to do this is to trigger
17587         # Compound List Rule 1, above, which causes breaks at all commas of
17588         # all outer lists.  In this way, the structure will be properly
17589         # displayed.
17590         #---------------------------------------------------------------
17591
17592         # Decide if this list is too long for one line unless broken
17593         my $total_columns = table_columns_available($i_opening_paren);
17594         my $too_long      = $packed_columns > $total_columns;
17595
17596         # For a paren list, include the length of the token just before the
17597         # '(' because this is likely a sub call, and we would have to
17598         # include the sub name on the same line as the list.  This is still
17599         # imprecise, but not too bad.  (steve.t)
17600         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
17601
17602             $too_long = $self->excess_line_length( $i_opening_minus,
17603                 $i_effective_last_comma + 1 ) > 0;
17604         }
17605
17606         # FIXME: For an item after a '=>', try to include the length of the
17607         # thing before the '=>'.  This is crude and should be improved by
17608         # actually looking back token by token.
17609         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
17610             my $i_opening_minus = $i_opening_paren - 4;
17611             if ( $i_opening_minus >= 0 ) {
17612                 $too_long = $self->excess_line_length( $i_opening_minus,
17613                     $i_effective_last_comma + 1 ) > 0;
17614             }
17615         }
17616
17617         # Always break lists contained in '[' and '{' if too long for 1 line,
17618         # and always break lists which are too long and part of a more complex
17619         # structure.
17620         my $must_break_open_container = $must_break_open
17621           || ( $too_long
17622             && ( $in_hierarchical_list || $opening_token ne '(' ) );
17623
17624 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
17625
17626         #---------------------------------------------------------------
17627         # The main decision:
17628         # Now decide if we will align the data into aligned columns.  Do not
17629         # attempt to align columns if this is a tiny table or it would be
17630         # too spaced.  It seems that the more packed lines we have, the
17631         # sparser the list that can be allowed and still look ok.
17632         #---------------------------------------------------------------
17633
17634         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
17635             || ( $formatted_lines < 2 )
17636             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
17637           )
17638         {
17639
17640             #---------------------------------------------------------------
17641             # too sparse: would look ugly if aligned in a table;
17642             #---------------------------------------------------------------
17643
17644             # use old breakpoints if this is a 'big' list
17645             if ( $packed_lines > 2 && $item_count > 10 ) {
17646                 write_logfile_entry("List sparse: using old breakpoints\n");
17647                 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
17648             }
17649
17650             # let the continuation logic handle it if 2 lines
17651             else {
17652
17653                 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
17654                     $ri_ragged_break_list );
17655                 ++$break_count if ($use_separate_first_term);
17656
17657                 unless ($must_break_open_container) {
17658                     if ( $break_count <= 1 ) {
17659                         ${$rdo_not_break_apart} = 1;
17660                     }
17661                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
17662                     {
17663                         ${$rdo_not_break_apart} = 1;
17664                     }
17665                 }
17666             }
17667             return;
17668         }
17669
17670         #---------------------------------------------------------------
17671         # go ahead and format as a table
17672         #---------------------------------------------------------------
17673         write_logfile_entry(
17674             "List: auto formatting with $number_of_fields fields/row\n");
17675
17676         my $j_first_break =
17677           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
17678
17679         for (
17680             my $j = $j_first_break ;
17681             $j < $comma_count ;
17682             $j += $number_of_fields
17683           )
17684         {
17685             my $i = $rcomma_index->[$j];
17686             $self->set_forced_breakpoint($i);
17687         }
17688         return;
17689     }
17690 } ## end closure set_comma_breakpoints_do
17691
17692 sub study_list_complexity {
17693
17694     # Look for complex tables which should be formatted with one term per line.
17695     # Returns the following:
17696     #
17697     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
17698     #    which are hard to read
17699     #  $number_of_fields_best = suggested number of fields based on
17700     #    complexity; = 0 if any number may be used.
17701     #
17702     my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
17703     my $item_count            = @{$ri_term_begin};
17704     my $complex_item_count    = 0;
17705     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
17706     my $i_max                 = @{$ritem_lengths} - 1;
17707     ##my @item_complexity;
17708
17709     my $i_last_last_break = -3;
17710     my $i_last_break      = -2;
17711     my @i_ragged_break_list;
17712
17713     my $definitely_complex = 30;
17714     my $definitely_simple  = 12;
17715     my $quote_count        = 0;
17716
17717     for my $i ( 0 .. $i_max ) {
17718         my $ib = $ri_term_begin->[$i];
17719         my $ie = $ri_term_end->[$i];
17720
17721         # define complexity: start with the actual term length
17722         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
17723
17724         ##TBD: join types here and check for variations
17725         ##my $str=join "", @tokens_to_go[$ib..$ie];
17726
17727         my $is_quote = 0;
17728         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
17729             $is_quote = 1;
17730             $quote_count++;
17731         }
17732         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
17733             $quote_count++;
17734         }
17735
17736         if ( $ib eq $ie ) {
17737             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
17738                 $complex_item_count++;
17739                 $weighted_length *= 2;
17740             }
17741             else {
17742             }
17743         }
17744         else {
17745             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
17746                 $complex_item_count++;
17747                 $weighted_length *= 2;
17748             }
17749             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
17750                 $weighted_length += 4;
17751             }
17752         }
17753
17754         # add weight for extra tokens.
17755         $weighted_length += 2 * ( $ie - $ib );
17756
17757 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
17758 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
17759
17760 ##push @item_complexity, $weighted_length;
17761
17762         # now mark a ragged break after this item it if it is 'long and
17763         # complex':
17764         if ( $weighted_length >= $definitely_complex ) {
17765
17766             # if we broke after the previous term
17767             # then break before it too
17768             if (   $i_last_break == $i - 1
17769                 && $i > 1
17770                 && $i_last_last_break != $i - 2 )
17771             {
17772
17773                 ## FIXME: don't strand a small term
17774                 pop @i_ragged_break_list;
17775                 push @i_ragged_break_list, $i - 2;
17776                 push @i_ragged_break_list, $i - 1;
17777             }
17778
17779             push @i_ragged_break_list, $i;
17780             $i_last_last_break = $i_last_break;
17781             $i_last_break      = $i;
17782         }
17783
17784         # don't break before a small last term -- it will
17785         # not look good on a line by itself.
17786         elsif ($i == $i_max
17787             && $i_last_break == $i - 1
17788             && $weighted_length <= $definitely_simple )
17789         {
17790             pop @i_ragged_break_list;
17791         }
17792     }
17793
17794     my $identifier_count = $i_max + 1 - $quote_count;
17795
17796     # Need more tuning here..
17797     if (   $max_width > 12
17798         && $complex_item_count > $item_count / 2
17799         && $number_of_fields_best != 2 )
17800     {
17801         $number_of_fields_best = 1;
17802     }
17803
17804     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
17805 }
17806
17807 sub get_maximum_fields_wanted {
17808
17809     # Not all tables look good with more than one field of items.
17810     # This routine looks at a table and decides if it should be
17811     # formatted with just one field or not.
17812     # This coding is still under development.
17813     my ($ritem_lengths) = @_;
17814
17815     my $number_of_fields_best = 0;
17816
17817     # For just a few items, we tentatively assume just 1 field.
17818     my $item_count = @{$ritem_lengths};
17819     if ( $item_count <= 5 ) {
17820         $number_of_fields_best = 1;
17821     }
17822
17823     # For larger tables, look at it both ways and see what looks best
17824     else {
17825
17826         my $is_odd            = 1;
17827         my @max_length        = ( 0,     0 );
17828         my @last_length_2     = ( undef, undef );
17829         my @first_length_2    = ( undef, undef );
17830         my $last_length       = undef;
17831         my $total_variation_1 = 0;
17832         my $total_variation_2 = 0;
17833         my @total_variation_2 = ( 0, 0 );
17834
17835         foreach my $j ( 0 .. $item_count - 1 ) {
17836
17837             $is_odd = 1 - $is_odd;
17838             my $length = $ritem_lengths->[$j];
17839             if ( $length > $max_length[$is_odd] ) {
17840                 $max_length[$is_odd] = $length;
17841             }
17842
17843             if ( defined($last_length) ) {
17844                 my $dl = abs( $length - $last_length );
17845                 $total_variation_1 += $dl;
17846             }
17847             $last_length = $length;
17848
17849             my $ll = $last_length_2[$is_odd];
17850             if ( defined($ll) ) {
17851                 my $dl = abs( $length - $ll );
17852                 $total_variation_2[$is_odd] += $dl;
17853             }
17854             else {
17855                 $first_length_2[$is_odd] = $length;
17856             }
17857             $last_length_2[$is_odd] = $length;
17858         }
17859         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
17860
17861         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
17862         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
17863             $number_of_fields_best = 1;
17864         }
17865     }
17866     return ($number_of_fields_best);
17867 }
17868
17869 sub table_columns_available {
17870     my $i_first_comma = shift;
17871     my $columns =
17872       $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
17873       leading_spaces_to_go($i_first_comma);
17874
17875     # Patch: the vertical formatter does not line up lines whose lengths
17876     # exactly equal the available line length because of allowances
17877     # that must be made for side comments.  Therefore, the number of
17878     # available columns is reduced by 1 character.
17879     $columns -= 1;
17880     return $columns;
17881 }
17882
17883 sub maximum_number_of_fields {
17884
17885     # how many fields will fit in the available space?
17886     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
17887     my $max_pairs        = int( $columns / $pair_width );
17888     my $number_of_fields = $max_pairs * 2;
17889     if (   $odd_or_even == 1
17890         && $max_pairs * $pair_width + $max_width <= $columns )
17891     {
17892         $number_of_fields++;
17893     }
17894     return $number_of_fields;
17895 }
17896
17897 sub compactify_table {
17898
17899     # given a table with a certain number of fields and a certain number
17900     # of lines, see if reducing the number of fields will make it look
17901     # better.
17902     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
17903     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
17904         my $min_fields;
17905
17906         for (
17907             $min_fields = $number_of_fields ;
17908             $min_fields >= $odd_or_even
17909             && $min_fields * $formatted_lines >= $item_count ;
17910             $min_fields -= $odd_or_even
17911           )
17912         {
17913             $number_of_fields = $min_fields;
17914         }
17915     }
17916     return $number_of_fields;
17917 }
17918
17919 sub set_ragged_breakpoints {
17920
17921     # Set breakpoints in a list that cannot be formatted nicely as a
17922     # table.
17923     my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
17924
17925     my $break_count = 0;
17926     foreach ( @{$ri_ragged_break_list} ) {
17927         my $j = $ri_term_comma->[$_];
17928         if ($j) {
17929             $self->set_forced_breakpoint($j);
17930             $break_count++;
17931         }
17932     }
17933     return $break_count;
17934 }
17935
17936 sub copy_old_breakpoints {
17937     my ( $self, $i_first_comma, $i_last_comma ) = @_;
17938     for my $i ( $i_first_comma .. $i_last_comma ) {
17939         if ( $old_breakpoint_to_go[$i] ) {
17940             $self->set_forced_breakpoint($i);
17941         }
17942     }
17943     return;
17944 }
17945
17946 sub set_nobreaks {
17947     my ( $self, $i, $j ) = @_;
17948     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
17949
17950         0 && do {
17951             my ( $a, $b, $c ) = caller();
17952             my $forced_breakpoint_count = get_forced_breakpoint_count();
17953             print STDOUT
17954 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
17955         };
17956
17957         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
17958     }
17959
17960     # shouldn't happen; non-critical error
17961     else {
17962         0 && do {
17963             my ( $a, $b, $c ) = caller();
17964             print STDOUT
17965               "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
17966         };
17967     }
17968     return;
17969 }
17970
17971 ###############################################
17972 # CODE SECTION 12: Code for setting indentation
17973 ###############################################
17974
17975 sub token_sequence_length {
17976
17977     # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
17978     # returns 0 if $ibeg > $iend (shouldn't happen)
17979     my ( $ibeg, $iend ) = @_;
17980     return 0 if ( !defined($iend) || $iend < 0 || $ibeg > $iend );
17981     return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
17982     return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
17983 }
17984
17985 sub total_line_length {
17986
17987     # return length of a line of tokens ($ibeg .. $iend)
17988     my ( $ibeg, $iend ) = @_;
17989
17990     # original coding:
17991     #return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
17992
17993     # this is basically sub 'leading_spaces_to_go':
17994     my $indentation = $leading_spaces_to_go[$ibeg];
17995     if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
17996
17997     return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
17998       $summed_lengths_to_go[$ibeg];
17999 }
18000
18001 sub excess_line_length {
18002
18003     # return number of characters by which a line of tokens ($ibeg..$iend)
18004     # exceeds the allowable line length.
18005
18006     # NOTE: Profiling shows that this is a critical routine for efficiency.
18007     # Therefore I have eliminated additional calls to subs from it.
18008     my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
18009
18010     # Original expression for line length
18011     ##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
18012
18013     # This is basically sub 'leading_spaces_to_go':
18014     my $indentation = $leading_spaces_to_go[$ibeg];
18015     if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
18016
18017     my $length =
18018       $indentation +
18019       $summed_lengths_to_go[ $iend + 1 ] -
18020       $summed_lengths_to_go[$ibeg];
18021
18022     # Include right weld lengths unless requested not to.
18023     if (   $total_weld_count
18024         && !$ignore_right_weld
18025         && $type_sequence_to_go[$iend] )
18026     {
18027         my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
18028         $length += $wr if defined($wr);
18029     }
18030
18031     # return the excess
18032     return $length - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
18033 }
18034
18035 sub get_spaces {
18036
18037     # return the number of leading spaces associated with an indentation
18038     # variable $indentation is either a constant number of spaces or an object
18039     # with a get_spaces method.
18040     my $indentation = shift;
18041     return ref($indentation) ? $indentation->get_spaces() : $indentation;
18042 }
18043
18044 sub get_recoverable_spaces {
18045
18046     # return the number of spaces (+ means shift right, - means shift left)
18047     # that we would like to shift a group of lines with the same indentation
18048     # to get them to line up with their opening parens
18049     my $indentation = shift;
18050     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
18051 }
18052
18053 sub get_available_spaces_to_go {
18054
18055     my ( $self, $ii ) = @_;
18056     my $item = $leading_spaces_to_go[$ii];
18057
18058     # return the number of available leading spaces associated with an
18059     # indentation variable.  $indentation is either a constant number of
18060     # spaces or an object with a get_available_spaces method.
18061     return ref($item) ? $item->get_available_spaces() : 0;
18062 }
18063
18064 {    ## begin closure set_leading_whitespace (for -lp indentation)
18065
18066     # These routines are called batch-by-batch to handle the -lp indentation
18067     # option.  The coding is rather complex, but is only for -lp.
18068
18069     my $gnu_position_predictor;
18070     my $gnu_sequence_number;
18071     my $line_start_index_to_go;
18072     my $max_gnu_item_index;
18073     my $max_gnu_stack_index;
18074     my %gnu_arrow_count;
18075     my %gnu_comma_count;
18076     my %last_gnu_equals;
18077     my @gnu_item_list;
18078     my @gnu_stack;
18079
18080     sub initialize_gnu_vars {
18081
18082         # initialize gnu variables for a new file;
18083         # must be called once at the start of a new file.
18084
18085         # initialize the leading whitespace stack to negative levels
18086         # so that we can never run off the end of the stack
18087         $gnu_position_predictor =
18088           0;    # where the current token is predicted to be
18089         $max_gnu_stack_index = 0;
18090         $max_gnu_item_index  = -1;
18091         $gnu_stack[0]        = new_lp_indentation_item( 0, -1, -1, 0, 0 );
18092         @gnu_item_list       = ();
18093         return;
18094     }
18095
18096     sub initialize_gnu_batch_vars {
18097
18098         # initialize gnu variables for a new batch;
18099         # must be called before each new batch
18100         $gnu_sequence_number++;    # increment output batch counter
18101         %last_gnu_equals        = ();
18102         %gnu_comma_count        = ();
18103         %gnu_arrow_count        = ();
18104         $line_start_index_to_go = 0;
18105         $max_gnu_item_index     = UNDEFINED_INDEX;
18106         return;
18107     }
18108
18109     sub new_lp_indentation_item {
18110
18111         # this is an interface to the IndentationItem class
18112         my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
18113
18114         # A negative level implies not to store the item in the item_list
18115         my $index = 0;
18116         if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
18117
18118         my $starting_index_K = 0;
18119         if (   defined($line_start_index_to_go)
18120             && $line_start_index_to_go >= 0
18121             && $line_start_index_to_go <= $max_index_to_go )
18122         {
18123             $starting_index_K = $K_to_go[$line_start_index_to_go];
18124         }
18125
18126         my $item = Perl::Tidy::IndentationItem->new(
18127             spaces              => $spaces,
18128             level               => $level,
18129             ci_level            => $ci_level,
18130             available_spaces    => $available_spaces,
18131             index               => $index,
18132             gnu_sequence_number => $gnu_sequence_number,
18133             align_paren         => $align_paren,
18134             stack_depth         => $max_gnu_stack_index,
18135             starting_index_K    => $starting_index_K,
18136         );
18137
18138         if ( $level >= 0 ) {
18139             $gnu_item_list[$max_gnu_item_index] = $item;
18140         }
18141
18142         return $item;
18143     }
18144
18145     sub set_leading_whitespace {
18146
18147         # This routine defines leading whitespace for the case of -lp formatting
18148         # given: the level and continuation_level of a token,
18149         # define: space count of leading string which would apply if it
18150         # were the first token of a new line.
18151
18152         my ( $self, $Kj, $K_last_nonblank, $K_last_last_nonblank,
18153             $level_abs, $ci_level, $in_continued_quote )
18154           = @_;
18155
18156         return unless ($rOpts_line_up_parentheses);
18157         return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
18158
18159         my $rbreak_container          = $self->[_rbreak_container_];
18160         my $rshort_nested             = $self->[_rshort_nested_];
18161         my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
18162         my $rLL                       = $self->[_rLL_];
18163         my $rbreak_before_container_by_seqno =
18164           $self->[_rbreak_before_container_by_seqno_];
18165
18166         # find needed previous nonblank tokens
18167         my $last_nonblank_token      = '';
18168         my $last_nonblank_type       = '';
18169         my $last_nonblank_block_type = '';
18170
18171         # and previous nonblank tokens, just in this batch:
18172         my $last_nonblank_token_in_batch     = '';
18173         my $last_nonblank_type_in_batch      = '';
18174         my $last_last_nonblank_type_in_batch = '';
18175
18176         if ( defined($K_last_nonblank) ) {
18177             $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
18178             $last_nonblank_type  = $rLL->[$K_last_nonblank]->[_TYPE_];
18179             $last_nonblank_block_type =
18180               $rLL->[$K_last_nonblank]->[_BLOCK_TYPE_];
18181
18182             if ( $K_last_nonblank >= $K_to_go[0] ) {
18183                 $last_nonblank_token_in_batch = $last_nonblank_token;
18184                 $last_nonblank_type_in_batch  = $last_nonblank_type;
18185                 if ( defined($K_last_last_nonblank)
18186                     && $K_last_last_nonblank > $K_to_go[0] )
18187                 {
18188                     $last_last_nonblank_type_in_batch =
18189                       $rLL->[$K_last_last_nonblank]->[_TYPE_];
18190                 }
18191             }
18192         }
18193
18194         ################################################################
18195
18196         # Adjust levels if necessary to recycle whitespace:
18197         my $level            = $level_abs;
18198         my $radjusted_levels = $self->[_radjusted_levels_];
18199         my $nK               = @{$rLL};
18200         my $nws              = @{$radjusted_levels};
18201         if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
18202             $level = $radjusted_levels->[$Kj];
18203             if ( $level < 0 ) { $level = 0 }    # note: this should not happen
18204         }
18205
18206         # The continued_quote flag means that this is the first token of a
18207         # line, and it is the continuation of some kind of multi-line quote
18208         # or pattern.  It requires special treatment because it must have no
18209         # added leading whitespace. So we create a special indentation item
18210         # which is not in the stack.
18211         if ($in_continued_quote) {
18212             my $space_count     = 0;
18213             my $available_space = 0;
18214             $level = -1;    # flag to prevent storing in item_list
18215             $leading_spaces_to_go[$max_index_to_go] =
18216               $reduced_spaces_to_go[$max_index_to_go] =
18217               new_lp_indentation_item( $space_count, $level, $ci_level,
18218                 $available_space, 0 );
18219             return;
18220         }
18221
18222         # get the top state from the stack
18223         my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_spaces();
18224         my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_level();
18225         my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
18226
18227         my $type        = $types_to_go[$max_index_to_go];
18228         my $token       = $tokens_to_go[$max_index_to_go];
18229         my $total_depth = $nesting_depth_to_go[$max_index_to_go];
18230
18231         if ( $type eq '{' || $type eq '(' ) {
18232
18233             $gnu_comma_count{ $total_depth + 1 } = 0;
18234             $gnu_arrow_count{ $total_depth + 1 } = 0;
18235
18236             # If we come to an opening token after an '=' token of some type,
18237             # see if it would be helpful to 'break' after the '=' to save space
18238             my $last_equals = $last_gnu_equals{$total_depth};
18239             if ( $last_equals && $last_equals > $line_start_index_to_go ) {
18240
18241                 my $seqno = $type_sequence_to_go[$max_index_to_go];
18242
18243                 # find the position if we break at the '='
18244                 my $i_test = $last_equals;
18245                 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
18246
18247                 # TESTING
18248                 ##my $too_close = ($i_test==$max_index_to_go-1);
18249
18250                 my $test_position =
18251                   total_line_length( $i_test, $max_index_to_go );
18252                 my $mll =
18253                   $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
18254
18255                 my $bbc_flag = $break_before_container_types{$token};
18256
18257                 if (
18258
18259                     # the equals is not just before an open paren (testing)
18260                     ##!$too_close &&
18261
18262                     # if we are beyond the midpoint
18263                     $gnu_position_predictor >
18264                     $mll - $rOpts_maximum_line_length / 2
18265
18266                     # if a -bbx flag WANTS a break before this opening token
18267                     || ( $seqno && $rbreak_before_container_by_seqno->{$seqno} )
18268
18269                     # or if we MIGHT want a break (fixes case b826 b909 b989)
18270                     || ( $bbc_flag && $bbc_flag >= 2 )
18271
18272                     # or we are beyond the 1/4 point and there was an old
18273                     # break at an assignment (not '=>') [fix for b1035]
18274                     || (
18275                         $gnu_position_predictor >
18276                         $mll - $rOpts_maximum_line_length * 3 / 4
18277                         && $types_to_go[$last_equals] ne '=>'
18278                         && (
18279                             $old_breakpoint_to_go[$last_equals]
18280                             || (   $last_equals > 0
18281                                 && $old_breakpoint_to_go[ $last_equals - 1 ] )
18282                             || (   $last_equals > 1
18283                                 && $types_to_go[ $last_equals - 1 ] eq 'b'
18284                                 && $old_breakpoint_to_go[ $last_equals - 2 ] )
18285                         )
18286                     )
18287                   )
18288                 {
18289
18290                     # then make the switch -- note that we do not set a real
18291                     # breakpoint here because we may not really need one; sub
18292                     # scan_list will do that if necessary
18293                     $line_start_index_to_go = $i_test + 1;
18294                     $gnu_position_predictor = $test_position;
18295                 }
18296             }
18297         }
18298
18299         my $halfway =
18300           $maximum_line_length_at_level[$level] -
18301           $rOpts_maximum_line_length / 2;
18302
18303         # Check for decreasing depth ..
18304         # Note that one token may have both decreasing and then increasing
18305         # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
18306         # in this example we would first go back to (1,0) then up to (2,0)
18307         # in a single call.
18308         if ( $level < $current_level || $ci_level < $current_ci_level ) {
18309
18310             # loop to find the first entry at or completely below this level
18311             my ( $lev, $ci_lev );
18312             while (1) {
18313                 if ($max_gnu_stack_index) {
18314
18315                     # save index of token which closes this level
18316                     $gnu_stack[$max_gnu_stack_index]
18317                       ->set_closed($max_index_to_go);
18318
18319                     # Undo any extra indentation if we saw no commas
18320                     my $available_spaces =
18321                       $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
18322
18323                     my $comma_count = 0;
18324                     my $arrow_count = 0;
18325                     if ( $type eq '}' || $type eq ')' ) {
18326                         $comma_count = $gnu_comma_count{$total_depth};
18327                         $arrow_count = $gnu_arrow_count{$total_depth};
18328                         $comma_count = 0 unless $comma_count;
18329                         $arrow_count = 0 unless $arrow_count;
18330                     }
18331                     $gnu_stack[$max_gnu_stack_index]
18332                       ->set_comma_count($comma_count);
18333                     $gnu_stack[$max_gnu_stack_index]
18334                       ->set_arrow_count($arrow_count);
18335
18336                     if ( $available_spaces > 0 ) {
18337
18338                         if ( $comma_count <= 0 || $arrow_count > 0 ) {
18339
18340                             my $i =
18341                               $gnu_stack[$max_gnu_stack_index]->get_index();
18342                             my $seqno =
18343                               $gnu_stack[$max_gnu_stack_index]
18344                               ->get_sequence_number();
18345
18346                             # Be sure this item was created in this batch.  This
18347                             # should be true because we delete any available
18348                             # space from open items at the end of each batch.
18349                             if (   $gnu_sequence_number != $seqno
18350                                 || $i > $max_gnu_item_index )
18351                             {
18352                                 warning(
18353 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
18354                                 );
18355                                 report_definite_bug();
18356                             }
18357
18358                             else {
18359                                 if ( $arrow_count == 0 ) {
18360                                     $gnu_item_list[$i]
18361                                       ->permanently_decrease_available_spaces(
18362                                         $available_spaces);
18363                                 }
18364                                 else {
18365                                     $gnu_item_list[$i]
18366                                       ->tentatively_decrease_available_spaces(
18367                                         $available_spaces);
18368                                 }
18369                                 foreach my $j ( $i + 1 .. $max_gnu_item_index )
18370                                 {
18371                                     $gnu_item_list[$j]
18372                                       ->decrease_SPACES($available_spaces);
18373                                 }
18374                             }
18375                         }
18376                     }
18377
18378                     # go down one level
18379                     --$max_gnu_stack_index;
18380                     $lev    = $gnu_stack[$max_gnu_stack_index]->get_level();
18381                     $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
18382
18383                     # stop when we reach a level at or below the current level
18384                     if ( $lev <= $level && $ci_lev <= $ci_level ) {
18385                         $space_count =
18386                           $gnu_stack[$max_gnu_stack_index]->get_spaces();
18387                         $current_level    = $lev;
18388                         $current_ci_level = $ci_lev;
18389                         last;
18390                     }
18391                 }
18392
18393                 # reached bottom of stack .. should never happen because
18394                 # only negative levels can get here, and $level was forced
18395                 # to be positive above.
18396                 else {
18397                     warning(
18398 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
18399                     );
18400                     report_definite_bug();
18401                     last;
18402                 }
18403             }
18404         }
18405
18406         # handle increasing depth
18407         if ( $level > $current_level || $ci_level > $current_ci_level ) {
18408
18409             # Compute the standard incremental whitespace.  This will be
18410             # the minimum incremental whitespace that will be used.  This
18411             # choice results in a smooth transition between the gnu-style
18412             # and the standard style.
18413             my $standard_increment =
18414               ( $level - $current_level ) *
18415               $rOpts_indent_columns +
18416               ( $ci_level - $current_ci_level ) *
18417               $rOpts_continuation_indentation;
18418
18419             # Now we have to define how much extra incremental space
18420             # ("$available_space") we want.  This extra space will be
18421             # reduced as necessary when long lines are encountered or when
18422             # it becomes clear that we do not have a good list.
18423             my $available_space = 0;
18424             my $align_paren     = 0;
18425             my $excess          = 0;
18426
18427             my $last_nonblank_seqno;
18428             if ( defined($K_last_nonblank) ) {
18429                 $last_nonblank_seqno =
18430                   $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
18431             }
18432
18433             # initialization on empty stack..
18434             if ( $max_gnu_stack_index == 0 ) {
18435                 $space_count = $level * $rOpts_indent_columns;
18436             }
18437
18438             # if this is a BLOCK, add the standard increment
18439             elsif ($last_nonblank_block_type) {
18440                 $space_count += $standard_increment;
18441             }
18442
18443             # add the standard increment for containers excluded by user rules
18444             # or which contain here-docs or multiline qw text
18445             elsif ( defined($last_nonblank_seqno)
18446                 && $ris_excluded_lp_container->{$last_nonblank_seqno} )
18447             {
18448                 $space_count += $standard_increment;
18449             }
18450
18451             # if last nonblank token was not structural indentation,
18452             # just use standard increment
18453             elsif ( $last_nonblank_type ne '{' ) {
18454                 $space_count += $standard_increment;
18455             }
18456
18457             # otherwise use the space to the first non-blank level change token
18458             else {
18459
18460                 $space_count = $gnu_position_predictor;
18461
18462                 my $min_gnu_indentation =
18463                   $gnu_stack[$max_gnu_stack_index]->get_spaces();
18464
18465                 $available_space = $space_count - $min_gnu_indentation;
18466                 if ( $available_space >= $standard_increment ) {
18467                     $min_gnu_indentation += $standard_increment;
18468                 }
18469                 elsif ( $available_space > 1 ) {
18470                     $min_gnu_indentation += $available_space + 1;
18471                 }
18472                 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
18473                     if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
18474                         $min_gnu_indentation += 2;
18475                     }
18476                     else {
18477                         $min_gnu_indentation += 1;
18478                     }
18479                 }
18480                 else {
18481                     $min_gnu_indentation += $standard_increment;
18482                 }
18483                 $available_space = $space_count - $min_gnu_indentation;
18484
18485                 if ( $available_space < 0 ) {
18486                     $space_count     = $min_gnu_indentation;
18487                     $available_space = 0;
18488                 }
18489                 $align_paren = 1;
18490             }
18491
18492             # update state, but not on a blank token
18493             if ( $types_to_go[$max_index_to_go] ne 'b' ) {
18494
18495                 $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
18496
18497                 ++$max_gnu_stack_index;
18498                 $gnu_stack[$max_gnu_stack_index] =
18499                   new_lp_indentation_item( $space_count, $level, $ci_level,
18500                     $available_space, $align_paren );
18501
18502                 # If the opening paren is beyond the half-line length, then
18503                 # we will use the minimum (standard) indentation.  This will
18504                 # help avoid problems associated with running out of space
18505                 # near the end of a line.  As a result, in deeply nested
18506                 # lists, there will be some indentations which are limited
18507                 # to this minimum standard indentation. But the most deeply
18508                 # nested container will still probably be able to shift its
18509                 # parameters to the right for proper alignment, so in most
18510                 # cases this will not be noticeable.
18511                 if ( $available_space > 0 && $space_count > $halfway ) {
18512                     $gnu_stack[$max_gnu_stack_index]
18513                       ->tentatively_decrease_available_spaces($available_space);
18514                 }
18515             }
18516         }
18517
18518         # Count commas and look for non-list characters.  Once we see a
18519         # non-list character, we give up and don't look for any more commas.
18520         if ( $type eq '=>' ) {
18521             $gnu_arrow_count{$total_depth}++;
18522
18523             # remember '=>' like '=' for estimating breaks (but see above note
18524             # for b1035)
18525             $last_gnu_equals{$total_depth} = $max_index_to_go;
18526         }
18527
18528         elsif ( $type eq ',' ) {
18529             $gnu_comma_count{$total_depth}++;
18530         }
18531
18532         elsif ( $is_assignment{$type} ) {
18533             $last_gnu_equals{$total_depth} = $max_index_to_go;
18534         }
18535
18536         # this token might start a new line
18537         # if this is a non-blank..
18538         if ( $type ne 'b' ) {
18539
18540             # and if ..
18541             if (
18542
18543                 # this is the first nonblank token of the line
18544                 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
18545
18546                 # or previous character was one of these:
18547                 || $last_nonblank_type_in_batch =~ /^([\:\?\,f])$/
18548
18549                 # or previous character was opening and this does not close it
18550                 || ( $last_nonblank_type_in_batch eq '{' && $type ne '}' )
18551                 || ( $last_nonblank_type_in_batch eq '(' and $type ne ')' )
18552
18553                 # or this token is one of these:
18554                 || $type =~ /^([\.]|\|\||\&\&)$/
18555
18556                 # or this is a closing structure
18557                 || (   $last_nonblank_type_in_batch eq '}'
18558                     && $last_nonblank_token_in_batch eq
18559                     $last_nonblank_type_in_batch )
18560
18561                 # or previous token was keyword 'return'
18562                 || (
18563                     $last_nonblank_type_in_batch eq 'k'
18564                     && (   $last_nonblank_token_in_batch eq 'return'
18565                         && $type ne '{' )
18566                 )
18567
18568                 # or starting a new line at certain keywords is fine
18569                 || (   $type eq 'k'
18570                     && $is_if_unless_and_or_last_next_redo_return{$token} )
18571
18572                 # or this is after an assignment after a closing structure
18573                 || (
18574                     $is_assignment{$last_nonblank_type_in_batch}
18575                     && (
18576                         $last_last_nonblank_type_in_batch =~ /^[\}\)\]]$/
18577
18578                         # and it is significantly to the right
18579                         || $gnu_position_predictor > $halfway
18580                     )
18581                 )
18582               )
18583             {
18584                 check_for_long_gnu_style_lines($max_index_to_go);
18585                 $line_start_index_to_go = $max_index_to_go;
18586
18587                 # back up 1 token if we want to break before that type
18588                 # otherwise, we may strand tokens like '?' or ':' on a line
18589                 if ( $line_start_index_to_go > 0 ) {
18590                     if ( $last_nonblank_type_in_batch eq 'k' ) {
18591
18592                         if ( $want_break_before{$last_nonblank_token_in_batch} )
18593                         {
18594                             $line_start_index_to_go--;
18595                         }
18596                     }
18597                     elsif ( $want_break_before{$last_nonblank_type_in_batch} ) {
18598                         $line_start_index_to_go--;
18599                     }
18600                 }
18601             }
18602         }
18603
18604         # remember the predicted position of this token on the output line
18605         if ( $max_index_to_go > $line_start_index_to_go ) {
18606             $gnu_position_predictor =
18607               total_line_length( $line_start_index_to_go, $max_index_to_go );
18608         }
18609         else {
18610             $gnu_position_predictor =
18611               $space_count + $token_lengths_to_go[$max_index_to_go];
18612         }
18613
18614         # store the indentation object for this token
18615         # this allows us to manipulate the leading whitespace
18616         # (in case we have to reduce indentation to fit a line) without
18617         # having to change any token values
18618         $leading_spaces_to_go[$max_index_to_go] =
18619           $gnu_stack[$max_gnu_stack_index];
18620         $reduced_spaces_to_go[$max_index_to_go] =
18621           ( $max_gnu_stack_index > 0 && $ci_level )
18622           ? $gnu_stack[ $max_gnu_stack_index - 1 ]
18623           : $gnu_stack[$max_gnu_stack_index];
18624         return;
18625     }
18626
18627     sub check_for_long_gnu_style_lines {
18628
18629         # look at the current estimated maximum line length, and
18630         # remove some whitespace if it exceeds the desired maximum
18631         my ($mx_index_to_go) = @_;
18632
18633         # this is only for the '-lp' style
18634         return unless ($rOpts_line_up_parentheses);
18635
18636         # nothing can be done if no stack items defined for this line
18637         return if ( $max_gnu_item_index == UNDEFINED_INDEX );
18638
18639         # see if we have exceeded the maximum desired line length
18640         # keep 2 extra free because they are needed in some cases
18641         # (result of trial-and-error testing)
18642         my $spaces_needed =
18643           $gnu_position_predictor -
18644           $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
18645
18646         return if ( $spaces_needed <= 0 );
18647
18648         # We are over the limit, so try to remove a requested number of
18649         # spaces from leading whitespace.  We are only allowed to remove
18650         # from whitespace items created on this batch, since others have
18651         # already been used and cannot be undone.
18652         my @candidates = ();
18653         my $i;
18654
18655         # loop over all whitespace items created for the current batch
18656         for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
18657             my $item = $gnu_item_list[$i];
18658
18659             # item must still be open to be a candidate (otherwise it
18660             # cannot influence the current token)
18661             next if ( $item->get_closed() >= 0 );
18662
18663             my $available_spaces = $item->get_available_spaces();
18664
18665             if ( $available_spaces > 0 ) {
18666                 push( @candidates, [ $i, $available_spaces ] );
18667             }
18668         }
18669
18670         return unless (@candidates);
18671
18672         # sort by available whitespace so that we can remove whitespace
18673         # from the maximum available first
18674         @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
18675
18676         # keep removing whitespace until we are done or have no more
18677         foreach my $candidate (@candidates) {
18678             my ( $i, $available_spaces ) = @{$candidate};
18679             my $deleted_spaces =
18680               ( $available_spaces > $spaces_needed )
18681               ? $spaces_needed
18682               : $available_spaces;
18683
18684             # remove the incremental space from this item
18685             $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
18686
18687             my $i_debug = $i;
18688
18689             # update the leading whitespace of this item and all items
18690             # that came after it
18691             for ( ; $i <= $max_gnu_item_index ; $i++ ) {
18692
18693                 my $old_spaces = $gnu_item_list[$i]->get_spaces();
18694                 if ( $old_spaces >= $deleted_spaces ) {
18695                     $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
18696                 }
18697
18698                 # shouldn't happen except for code bug:
18699                 else {
18700                     my $level        = $gnu_item_list[$i_debug]->get_level();
18701                     my $ci_level     = $gnu_item_list[$i_debug]->get_ci_level();
18702                     my $old_level    = $gnu_item_list[$i]->get_level();
18703                     my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
18704                     warning(
18705 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
18706                     );
18707                     report_definite_bug();
18708                 }
18709             }
18710             $gnu_position_predictor -= $deleted_spaces;
18711             $spaces_needed          -= $deleted_spaces;
18712             last unless ( $spaces_needed > 0 );
18713         }
18714         return;
18715     }
18716
18717     sub finish_lp_batch {
18718
18719         # This routine is called once after each output stream batch is
18720         # finished to undo indentation for all incomplete -lp
18721         # indentation levels.  It is too risky to leave a level open,
18722         # because then we can't backtrack in case of a long line to follow.
18723         # This means that comments and blank lines will disrupt this
18724         # indentation style.  But the vertical aligner may be able to
18725         # get the space back if there are side comments.
18726
18727         # this is only for the 'lp' style
18728         return unless ($rOpts_line_up_parentheses);
18729
18730         # nothing can be done if no stack items defined for this line
18731         return if ( $max_gnu_item_index == UNDEFINED_INDEX );
18732
18733         # loop over all whitespace items created for the current batch
18734         foreach my $i ( 0 .. $max_gnu_item_index ) {
18735             my $item = $gnu_item_list[$i];
18736
18737             # only look for open items
18738             next if ( $item->get_closed() >= 0 );
18739
18740             # Tentatively remove all of the available space
18741             # (The vertical aligner will try to get it back later)
18742             my $available_spaces = $item->get_available_spaces();
18743             if ( $available_spaces > 0 ) {
18744
18745                 # delete incremental space for this item
18746                 $gnu_item_list[$i]
18747                   ->tentatively_decrease_available_spaces($available_spaces);
18748
18749                 # Reduce the total indentation space of any nodes that follow
18750                 # Note that any such nodes must necessarily be dependents
18751                 # of this node.
18752                 foreach ( $i + 1 .. $max_gnu_item_index ) {
18753                     $gnu_item_list[$_]->decrease_SPACES($available_spaces);
18754                 }
18755             }
18756         }
18757         return;
18758     }
18759 } ## end closure set_leading_whitespace
18760
18761 sub reduce_lp_indentation {
18762
18763     # reduce the leading whitespace at token $i if possible by $spaces_needed
18764     # (a large value of $spaces_needed will remove all excess space)
18765     # NOTE: to be called from scan_list only for a sequence of tokens
18766     # contained between opening and closing parens/braces/brackets
18767
18768     my ( $self, $i, $spaces_wanted ) = @_;
18769     my $deleted_spaces = 0;
18770
18771     my $item             = $leading_spaces_to_go[$i];
18772     my $available_spaces = $item->get_available_spaces();
18773
18774     if (
18775         $available_spaces > 0
18776         && ( ( $spaces_wanted <= $available_spaces )
18777             || !$item->get_have_child() )
18778       )
18779     {
18780
18781         # we'll remove these spaces, but mark them as recoverable
18782         $deleted_spaces =
18783           $item->tentatively_decrease_available_spaces($spaces_wanted);
18784     }
18785
18786     return $deleted_spaces;
18787 }
18788
18789 ###########################################################
18790 # CODE SECTION 13: Preparing batches for vertical alignment
18791 ###########################################################
18792
18793 sub send_lines_to_vertical_aligner {
18794
18795     my ($self) = @_;
18796
18797     # This routine receives a batch of code for which the final line breaks
18798     # have been defined. Here we prepare the lines for passing to the vertical
18799     # aligner.  We do the following tasks:
18800     # - mark certain vertical alignment tokens, such as '=', in each line
18801     # - make minor indentation adjustments
18802     # - do logical padding: insert extra blank spaces to help display certain
18803     #   logical constructions
18804
18805     my $this_batch = $self->[_this_batch_];
18806     my $rlines_K   = $this_batch->[_rlines_K_];
18807     if ( !@{$rlines_K} ) {
18808
18809         # This can't happen because sub grind_batch_of_CODE always receives
18810         # tokens which it turns into one or more lines. If we get here it means
18811         # that a programming error has caused those lines to be lost.
18812         Fault("Unexpected call with no lines");
18813         return;
18814     }
18815     my $n_last_line = @{$rlines_K} - 1;
18816
18817     my $do_not_pad               = $this_batch->[_do_not_pad_];
18818     my $peak_batch_size          = $this_batch->[_peak_batch_size_];
18819     my $starting_in_quote        = $this_batch->[_starting_in_quote_];
18820     my $ending_in_quote          = $this_batch->[_ending_in_quote_];
18821     my $is_static_block_comment  = $this_batch->[_is_static_block_comment_];
18822     my $ibeg0                    = $this_batch->[_ibeg0_];
18823     my $rK_to_go                 = $this_batch->[_rK_to_go_];
18824     my $batch_count              = $this_batch->[_batch_count_];
18825     my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
18826
18827     my $rLL    = $self->[_rLL_];
18828     my $Klimit = $self->[_Klimit_];
18829
18830     my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
18831     my $type_beg_next  = $rLL->[$Kbeg_next]->[_TYPE_];
18832     my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
18833     my $type_end_next  = $rLL->[$Kend_next]->[_TYPE_];
18834
18835     # Construct indexes to the global_to_go arrays so that called routines can
18836     # still access those arrays. This might eventually be removed
18837     # when all called routines have been converted to access token values
18838     # in the rLL array instead.
18839     my $Kbeg0 = $Kbeg_next;
18840     my ( $ri_first, $ri_last );
18841     foreach my $rline ( @{$rlines_K} ) {
18842         my ( $Kbeg, $Kend ) = @{$rline};
18843         my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
18844         my $iend = $ibeg0 + $Kend - $Kbeg0;
18845         push @{$ri_first}, $ibeg;
18846         push @{$ri_last},  $iend;
18847     }
18848
18849     my ( $cscw_block_comment, $closing_side_comment );
18850     if ( $rOpts->{'closing-side-comments'} ) {
18851         ( $closing_side_comment, $cscw_block_comment ) =
18852           $self->add_closing_side_comment();
18853     }
18854
18855     my $rindentation_list = [0];    # ref to indentations for each line
18856
18857     # define the array @{$ralignment_type_to_go} for the output tokens
18858     # which will be non-blank for each special token (such as =>)
18859     # for which alignment is required.
18860     my $ralignment_type_to_go =
18861       $self->set_vertical_alignment_markers( $ri_first, $ri_last );
18862
18863     # flush before a long if statement to avoid unwanted alignment
18864     if (   $n_last_line > 0
18865         && $type_beg_next eq 'k'
18866         && $token_beg_next =~ /^(if|unless)$/ )
18867     {
18868         $self->flush_vertical_aligner();
18869     }
18870
18871     $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci );
18872
18873     $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
18874         $starting_in_quote )
18875       if ( $rOpts->{'logical-padding'} );
18876
18877     # Resum lengths. We need accurate lengths for making alignment patterns,
18878     # and we may have unmasked a semicolon which was not included at the start.
18879     for ( 0 .. $max_index_to_go ) {
18880         $summed_lengths_to_go[ $_ + 1 ] =
18881           $summed_lengths_to_go[$_] + $token_lengths_to_go[$_];
18882     }
18883
18884     # loop to prepare each line for shipment
18885     my ( $Kbeg, $type_beg, $token_beg );
18886     my ( $Kend, $type_end );
18887     for my $n ( 0 .. $n_last_line ) {
18888
18889         my $ibeg              = $ri_first->[$n];
18890         my $iend              = $ri_last->[$n];
18891         my $rline             = $rlines_K->[$n];
18892         my $forced_breakpoint = $rline->[2];
18893
18894         # we may need to look at variables on three consecutive lines ...
18895
18896         # Some vars on line [n-1], if any:
18897         my $Kbeg_last      = $Kbeg;
18898         my $type_beg_last  = $type_beg;
18899         my $token_beg_last = $token_beg;
18900         my $Kend_last      = $Kend;
18901         my $type_end_last  = $type_end;
18902
18903         # Some vars on line [n]:
18904         $Kbeg      = $Kbeg_next;
18905         $type_beg  = $type_beg_next;
18906         $token_beg = $token_beg_next;
18907         $Kend      = $Kend_next;
18908         $type_end  = $type_end_next;
18909
18910         # Only forward ending K values of non-comments down the pipeline.
18911         # This is equivalent to checking that the last CODE_type is blank or
18912         # equal to 'VER'. See also sub resync_lines_and_tokens for related
18913         # coding.  Note that '$batch_CODE_type' is the code type of the line
18914         # to which the ending token belongs.
18915         my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
18916         my $Kend_code =
18917           $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
18918
18919         # We use two slightly different definitions of level jump at the end
18920         # of line:
18921         #  $ljump is the level jump needed by 'sub set_adjusted_indentation'
18922         #  $level_jump is the level jump needed by the vertical aligner.
18923         my $ljump = 0;    # level jump at end of line
18924
18925         # Get some vars on line [n+1], if any:
18926         if ( $n < $n_last_line ) {
18927             ( $Kbeg_next, $Kend_next ) =
18928               @{ $rlines_K->[ $n + 1 ] };
18929             $type_beg_next  = $rLL->[$Kbeg_next]->[_TYPE_];
18930             $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
18931             $type_end_next  = $rLL->[$Kend_next]->[_TYPE_];
18932             $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
18933         }
18934         else {
18935
18936             # Patch for git #51, a bare closing qw paren was not outdented
18937             # if the flag '-nodelete-old-newlines is set
18938             my $Kbeg_next = $self->K_next_code($Kend);
18939             if ( defined($Kbeg_next) ) {
18940                 $ljump =
18941                   $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
18942             }
18943         }
18944
18945         # level jump at end of line for the vertical aligner:
18946         my $level_jump =
18947           $Kend >= $Klimit
18948           ? 0
18949           : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
18950
18951         $self->delete_needless_alignments( $ibeg, $iend,
18952             $ralignment_type_to_go );
18953
18954         my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
18955           $self->make_alignment_patterns( $ibeg, $iend,
18956             $ralignment_type_to_go );
18957
18958         my ( $indentation, $lev, $level_end, $terminal_type,
18959             $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
18960           = $self->set_adjusted_indentation( $ibeg, $iend, $rfields,
18961             $rpatterns,         $ri_first, $ri_last,
18962             $rindentation_list, $ljump,    $starting_in_quote,
18963             $is_static_block_comment, );
18964
18965         # we will allow outdenting of long lines..
18966         my $outdent_long_lines = (
18967
18968             # which are long quotes, if allowed
18969             ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
18970
18971             # which are long block comments, if allowed
18972               || (
18973                    $type_beg eq '#'
18974                 && $rOpts->{'outdent-long-comments'}
18975
18976                 # but not if this is a static block comment
18977                 && !$is_static_block_comment
18978               )
18979         );
18980
18981         my $break_alignment_before = $is_outdented_line || $do_not_pad;
18982         my $break_alignment_after  = $is_outdented_line;
18983
18984         # flush at an 'if' which follows a line with (1) terminal semicolon
18985         # or (2) terminal block_type which is not an 'if'.  This prevents
18986         # unwanted alignment between the lines.
18987         if ( $type_beg eq 'k' && $token_beg eq 'if' ) {
18988             my $Km           = $self->K_previous_code($Kbeg);
18989             my $type_m       = 'b';
18990             my $block_type_m = 'b';
18991             if ( defined($Km) ) {
18992                 $type_m       = $rLL->[$Km]->[_TYPE_];
18993                 $block_type_m = $rLL->[$Km]->[_BLOCK_TYPE_];
18994             }
18995
18996             # break after anything that is not if-like
18997             $break_alignment_before ||= $type_m eq ';'
18998               || ( $type_m eq '}'
18999                 && $block_type_m ne 'if'
19000                 && $block_type_m ne 'unless'
19001                 && $block_type_m ne 'elsif'
19002                 && $block_type_m ne 'else' );
19003         }
19004
19005         my $rvertical_tightness_flags =
19006           $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
19007             $ri_first, $ri_last, $ending_in_quote, $closing_side_comment );
19008
19009         # Set a flag at the final ':' of a ternary chain to request
19010         # vertical alignment of the final term.  Here is a
19011         # slightly complex example:
19012         #
19013         # $self->{_text} = (
19014         #    !$section        ? ''
19015         #   : $type eq 'item' ? "the $section entry"
19016         #   :                   "the section on $section"
19017         # )
19018         # . (
19019         #   $page
19020         #   ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
19021         #   : ' elsewhere in this document'
19022         # );
19023         #
19024         my $is_terminal_ternary = 0;
19025
19026         if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
19027             my $last_leading_type = $n > 0 ? $type_beg_last : ':';
19028             if (   $terminal_type ne ';'
19029                 && $n_last_line > $n
19030                 && $level_end == $lev )
19031             {
19032                 $level_end     = $rLL->[$Kbeg_next]->[_LEVEL_];
19033                 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
19034             }
19035             if (
19036                 $last_leading_type eq ':'
19037                 && (   ( $terminal_type eq ';' && $level_end <= $lev )
19038                     || ( $terminal_type ne ':' && $level_end < $lev ) )
19039               )
19040             {
19041
19042                 # the terminal term must not contain any ternary terms, as in
19043                 # my $ECHO = (
19044                 #       $Is_MSWin32 ? ".\\echo$$"
19045                 #     : $Is_MacOS   ? ":echo$$"
19046                 #     : ( $Is_NetWare ? "echo$$" : "./echo$$" )
19047                 # );
19048                 $is_terminal_ternary = 1;
19049
19050                 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
19051                 while ( defined($KP) && $KP <= $Kend ) {
19052                     my $type_KP = $rLL->[$KP]->[_TYPE_];
19053                     if ( $type_KP eq '?' || $type_KP eq ':' ) {
19054                         $is_terminal_ternary = 0;
19055                         last;
19056                     }
19057                     $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
19058                 }
19059             }
19060         }
19061
19062         my $level_adj        = $lev;
19063         my $radjusted_levels = $self->[_radjusted_levels_];
19064         if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
19065             $level_adj = $radjusted_levels->[$Kbeg];
19066             if ( $level_adj < 0 ) { $level_adj = 0 }
19067         }
19068
19069         # add any new closing side comment to the last line
19070         if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
19071             $rfields->[-1] .= " $closing_side_comment";
19072
19073             # NOTE: Patch for csc. We can just use 1 for the length of the csc
19074             # because its length should not be a limiting factor from here on.
19075             $rfield_lengths->[-1] += 2;
19076         }
19077
19078         # Programming check: (shouldn't happen)
19079         # The number of tokens which separate the fields must always be
19080         # one less than the number of fields. If this is not true then
19081         # an error has been introduced in sub make_alignment_patterns.
19082         if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
19083             my $nt  = @{$rtokens};
19084             my $nf  = @{$rfields};
19085             my $msg = <<EOM;
19086 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
19087 The number of tokens = $nt should be one less than number of fields: $nf
19088 EOM
19089             Fault($msg);
19090         }
19091
19092         # Set flag which tells if this line is contained in a multi-line list
19093         my $list_seqno = $self->is_list_by_K($Kbeg);
19094
19095         # send this new line down the pipe
19096         my $rvalign_hash = {};
19097         $rvalign_hash->{level}                     = $lev;
19098         $rvalign_hash->{level_end}                 = $level_end;
19099         $rvalign_hash->{level_adj}                 = $level_adj;
19100         $rvalign_hash->{indentation}               = $indentation;
19101         $rvalign_hash->{list_seqno}                = $list_seqno;
19102         $rvalign_hash->{outdent_long_lines}        = $outdent_long_lines;
19103         $rvalign_hash->{is_terminal_ternary}       = $is_terminal_ternary;
19104         $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
19105         $rvalign_hash->{level_jump}                = $level_jump;
19106         $rvalign_hash->{rfields}                   = $rfields;
19107         $rvalign_hash->{rpatterns}                 = $rpatterns;
19108         $rvalign_hash->{rtokens}                   = $rtokens;
19109         $rvalign_hash->{rfield_lengths}            = $rfield_lengths;
19110         $rvalign_hash->{terminal_block_type}       = $terminal_block_type;
19111         $rvalign_hash->{batch_count}               = $batch_count;
19112         $rvalign_hash->{break_alignment_before}    = $break_alignment_before;
19113         $rvalign_hash->{break_alignment_after}     = $break_alignment_after;
19114         $rvalign_hash->{Kend}                      = $Kend_code;
19115         $rvalign_hash->{ci_level}                  = $ci_levels_to_go[$ibeg];
19116
19117         my $vao = $self->[_vertical_aligner_object_];
19118         $vao->valign_input($rvalign_hash);
19119
19120         $do_not_pad = 0;
19121
19122         # Set flag indicating if this line ends in an opening
19123         # token and is very short, so that a blank line is not
19124         # needed if the subsequent line is a comment.
19125         # Examples of what we are looking for:
19126         #   {
19127         #   && (
19128         #   BEGIN {
19129         #   default {
19130         #   sub {
19131         $self->[_last_output_short_opening_token_]
19132
19133           # line ends in opening token
19134           #              /^[\{\(\[L]$/
19135           = $is_opening_type{$type_end}
19136
19137           # and either
19138           && (
19139             # line has either single opening token
19140             $Kend == $Kbeg
19141
19142             # or is a single token followed by opening token.
19143             # Note that sub identifiers have blanks like 'sub doit'
19144             #                                 $token_beg !~ /\s+/
19145             || ( $Kend - $Kbeg <= 2 && index( $token_beg, ' ' ) < 0 )
19146           )
19147
19148           # and limit total to 10 character widths
19149           && token_sequence_length( $ibeg, $iend ) <= 10;
19150
19151     }    # end of loop to output each line
19152
19153     # remember indentation of lines containing opening containers for
19154     # later use by sub set_adjusted_indentation
19155     $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
19156
19157     # output any new -cscw block comment
19158     if ($cscw_block_comment) {
19159         $self->flush_vertical_aligner();
19160         my $file_writer_object = $self->[_file_writer_object_];
19161         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
19162     }
19163     return;
19164 }
19165
19166 {    ## begin closure set_vertical_alignment_markers
19167     my %is_vertical_alignment_type;
19168     my %is_not_vertical_alignment_token;
19169     my %is_vertical_alignment_keyword;
19170     my %is_terminal_alignment_type;
19171     my %is_low_level_alignment_token;
19172
19173     BEGIN {
19174
19175         my @q;
19176
19177         # Replaced =~ and // in the list.  // had been removed in RT 119588
19178         @q = qw#
19179           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
19180           { ? : => && || ~~ !~~ =~ !~ // <=> ->
19181           #;
19182         @is_vertical_alignment_type{@q} = (1) x scalar(@q);
19183
19184         # These 'tokens' are not aligned. We need this to remove [
19185         # from the above list because it has type ='{'
19186         @q = qw([);
19187         @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
19188
19189         # these are the only types aligned at a line end
19190         @q = qw(&& || =>);
19191         @is_terminal_alignment_type{@q} = (1) x scalar(@q);
19192
19193         # these tokens only align at line level
19194         @q = ( '{', '(' );
19195         @is_low_level_alignment_token{@q} = (1) x scalar(@q);
19196
19197         # eq and ne were removed from this list to improve alignment chances
19198         @q = qw(if unless and or err for foreach while until);
19199         @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
19200     }
19201
19202     sub set_vertical_alignment_markers {
19203
19204         # This routine takes the first step toward vertical alignment of the
19205         # lines of output text.  It looks for certain tokens which can serve as
19206         # vertical alignment markers (such as an '=').
19207         #
19208         # Method: We look at each token $i in this output batch and set
19209         # $ralignment_type_to_go->[$i] equal to those tokens at which we would
19210         # accept vertical alignment.
19211
19212         my ( $self, $ri_first, $ri_last ) = @_;
19213         my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
19214
19215         my $ralignment_type_to_go;
19216
19217         # Initialize the alignment array. Note that closing side comments can
19218         # insert up to 2 additional tokens beyond the original
19219         # $max_index_to_go, so we need to check ri_last for the last index.
19220         my $max_line = @{$ri_first} - 1;
19221         my $iend     = $ri_last->[$max_line];
19222         if ( $iend < $max_index_to_go ) { $iend = $max_index_to_go }
19223
19224         # nothing to do if we aren't allowed to change whitespace
19225         # or there is only 1 token
19226         if ( $iend == 0 || !$rOpts_add_whitespace ) {
19227             for my $i ( 0 .. $iend ) {
19228                 $ralignment_type_to_go->[$i] = '';
19229             }
19230             return $ralignment_type_to_go;
19231         }
19232
19233         # remember the index of last nonblank token before any sidecomment
19234         my $i_terminal = $max_index_to_go;
19235         if ( $types_to_go[$i_terminal] eq '#' ) {
19236             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
19237                 if ( $i_terminal > 0 ) { --$i_terminal }
19238             }
19239         }
19240
19241         # look at each line of this batch..
19242         my $last_vertical_alignment_before_index;
19243         my $vert_last_nonblank_type;
19244         my $vert_last_nonblank_token;
19245         my $vert_last_nonblank_block_type;
19246
19247         foreach my $line ( 0 .. $max_line ) {
19248             my $ibeg = $ri_first->[$line];
19249             my $iend = $ri_last->[$line];
19250             $last_vertical_alignment_before_index = -1;
19251             $vert_last_nonblank_type              = '';
19252             $vert_last_nonblank_token             = '';
19253             $vert_last_nonblank_block_type        = '';
19254
19255             # look at each token in this output line..
19256             my $level_beg = $levels_to_go[$ibeg];
19257             foreach my $i ( $ibeg .. $iend ) {
19258                 my $alignment_type = '';
19259                 my $type           = $types_to_go[$i];
19260                 my $block_type     = $block_type_to_go[$i];
19261                 my $token          = $tokens_to_go[$i];
19262
19263                 # do not align tokens at lower level then start of line
19264                 # except for side comments
19265                 if (   $levels_to_go[$i] < $levels_to_go[$ibeg]
19266                     && $type ne '#' )
19267                 {
19268                     $ralignment_type_to_go->[$i] = '';
19269                     next;
19270                 }
19271
19272                 #--------------------------------------------------------
19273                 # First see if we want to align BEFORE this token
19274                 #--------------------------------------------------------
19275
19276                 # The first possible token that we can align before
19277                 # is index 2 because: 1) it doesn't normally make sense to
19278                 # align before the first token and 2) the second
19279                 # token must be a blank if we are to align before
19280                 # the third
19281                 if ( $i < $ibeg + 2 ) { }
19282
19283                 # must follow a blank token
19284                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
19285
19286                 # align a side comment --
19287                 elsif ( $type eq '#' ) {
19288
19289                     my $KK      = $K_to_go[$i];
19290                     my $sc_type = $rspecial_side_comment_type->{$KK};
19291
19292                     unless (
19293
19294                         # it is any specially marked side comment
19295                         $sc_type
19296
19297                         # or it is a static side comment
19298                         || (   $rOpts->{'static-side-comments'}
19299                             && $token =~ /$static_side_comment_pattern/ )
19300
19301                         # or a closing side comment
19302                         || (   $vert_last_nonblank_block_type
19303                             && $token =~
19304                             /$closing_side_comment_prefix_pattern/ )
19305                       )
19306                     {
19307                         $alignment_type = $type;
19308                     }    ## Example of a static side comment
19309                 }
19310
19311                 # otherwise, do not align two in a row to create a
19312                 # blank field
19313                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
19314
19315                 # align before one of these keywords
19316                 # (within a line, since $i>1)
19317                 elsif ( $type eq 'k' ) {
19318
19319                     #  /^(if|unless|and|or|eq|ne)$/
19320                     if ( $is_vertical_alignment_keyword{$token} ) {
19321                         $alignment_type = $token;
19322                     }
19323                 }
19324
19325                 # align before one of these types..
19326                 # Note: add '.' after new vertical aligner is operational
19327                 elsif ( $is_vertical_alignment_type{$type}
19328                     && !$is_not_vertical_alignment_token{$token} )
19329                 {
19330                     $alignment_type = $token;
19331
19332                     # Do not align a terminal token.  Although it might
19333                     # occasionally look ok to do this, this has been found to be
19334                     # a good general rule.  The main problems are:
19335                     # (1) that the terminal token (such as an = or :) might get
19336                     # moved far to the right where it is hard to see because
19337                     # nothing follows it, and
19338                     # (2) doing so may prevent other good alignments.
19339                     # Current exceptions are && and || and =>
19340                     if ( $i == $iend || $i >= $i_terminal ) {
19341                         $alignment_type = ""
19342                           unless ( $is_terminal_alignment_type{$type} );
19343                     }
19344
19345                     # Do not align leading ': (' or '. ('.  This would prevent
19346                     # alignment in something like the following:
19347                     #   $extra_space .=
19348                     #       ( $input_line_number < 10 )  ? "  "
19349                     #     : ( $input_line_number < 100 ) ? " "
19350                     #     :                                "";
19351                     # or
19352                     #  $code =
19353                     #      ( $case_matters ? $accessor : " lc($accessor) " )
19354                     #    . ( $yesno        ? " eq "       : " ne " )
19355
19356                     # Also, do not align a ( following a leading ? so we can
19357                     # align something like this:
19358                     #   $converter{$_}->{ushortok} =
19359                     #     $PDL::IO::Pic::biggrays
19360                     #     ? ( m/GIF/          ? 0 : 1 )
19361                     #     : ( m/GIF|RAST|IFF/ ? 0 : 1 );
19362                     if (
19363                            $i == $ibeg + 2
19364                         && $types_to_go[ $i - 1 ] eq 'b'
19365                         && (   $types_to_go[$ibeg] eq '.'
19366                             || $types_to_go[$ibeg] eq ':'
19367                             || $types_to_go[$ibeg] eq '?' )
19368                       )
19369                     {
19370                         $alignment_type = "";
19371                     }
19372
19373                     # Certain tokens only align at the same level as the
19374                     # initial line level
19375                     if (   $is_low_level_alignment_token{$token}
19376                         && $levels_to_go[$i] != $level_beg )
19377                     {
19378                         $alignment_type = "";
19379                     }
19380
19381                     # For a paren after keyword, only align something like this:
19382                     #    if    ( $a ) { &a }
19383                     #    elsif ( $b ) { &b }
19384                     if ( $token eq '(' ) {
19385
19386                         if ( $vert_last_nonblank_type eq 'k' ) {
19387                             $alignment_type = ""
19388                               unless $vert_last_nonblank_token =~
19389                               /^(if|unless|elsif)$/;
19390                         }
19391
19392                         # Do not align a spaced-function-paren if requested.
19393                         # Issue git #53.  Note that $i-1 is a blank token if we
19394                         # get here.
19395                         if (  !$rOpts_function_paren_vertical_alignment
19396                             && $i > $ibeg + 1 )
19397                         {
19398                             my $type_m  = $types_to_go[ $i - 2 ];
19399                             my $token_m = $tokens_to_go[ $i - 2 ];
19400
19401                             # this is the same test as 'space-function-paren'
19402                             if (   $type_m =~ /^[wUG]$/
19403                                 || $type_m eq '->'
19404                                 || $type_m  =~ /^[wi]$/
19405                                 && $token_m =~ /^(\&|->)/ )
19406                             {
19407                                 $alignment_type = "";
19408                             }
19409                         }
19410                     }
19411
19412                     # be sure the alignment tokens are unique
19413                     # This didn't work well: reason not determined
19414                     # if ($token ne $type) {$alignment_type .= $type}
19415                 }
19416
19417                 # NOTE: This is deactivated because it causes the previous
19418                 # if/elsif alignment to fail
19419                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
19420                 #{ $alignment_type = $type; }
19421
19422                 if ($alignment_type) {
19423                     $last_vertical_alignment_before_index = $i;
19424                 }
19425
19426                 #--------------------------------------------------------
19427                 # Next see if we want to align AFTER the previous nonblank
19428                 #--------------------------------------------------------
19429
19430                 # We want to line up ',' and interior ';' tokens, with the added
19431                 # space AFTER these tokens.  (Note: interior ';' is included
19432                 # because it may occur in short blocks).
19433                 if (
19434
19435                     # we haven't already set it
19436                     !$alignment_type
19437
19438                     # and its not the first token of the line
19439                     && ( $i > $ibeg )
19440
19441                     # and it follows a blank
19442                     && $types_to_go[ $i - 1 ] eq 'b'
19443
19444                     # and previous token IS one of these:
19445                     && (   $vert_last_nonblank_type eq ','
19446                         || $vert_last_nonblank_type eq ';' )
19447
19448                     # and it's NOT one of these
19449                     && (   $type ne 'b'
19450                         && $type ne '#'
19451                         && !$is_closing_token{$type} )
19452
19453                     # then go ahead and align
19454                   )
19455
19456                 {
19457                     $alignment_type = $vert_last_nonblank_type;
19458                 }
19459
19460                 #--------------------------------------------------------
19461                 # Undo alignment in special cases
19462                 #--------------------------------------------------------
19463                 if ($alignment_type) {
19464
19465                     # do not align the opening brace of an anonymous sub
19466                     if ( $token eq '{' && $block_type =~ /$ASUB_PATTERN/ ) {
19467                         $alignment_type = "";
19468                     }
19469                 }
19470
19471                 #--------------------------------------------------------
19472                 # then store the value
19473                 #--------------------------------------------------------
19474                 $ralignment_type_to_go->[$i] = $alignment_type;
19475                 if ( $type ne 'b' ) {
19476                     $vert_last_nonblank_type       = $type;
19477                     $vert_last_nonblank_token      = $token;
19478                     $vert_last_nonblank_block_type = $block_type;
19479                 }
19480             }
19481         }
19482         return $ralignment_type_to_go;
19483     }
19484 } ## end closure set_vertical_alignment_markers
19485
19486 sub get_seqno {
19487
19488     # get opening and closing sequence numbers of a token for the vertical
19489     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
19490     # to be treated somewhat like opening and closing tokens for stacking
19491     # tokens by the vertical aligner.
19492     my ( $self, $ii, $ending_in_quote ) = @_;
19493
19494     my $rLL        = $self->[_rLL_];
19495     my $this_batch = $self->[_this_batch_];
19496     my $rK_to_go   = $this_batch->[_rK_to_go_];
19497
19498     my $KK    = $rK_to_go->[$ii];
19499     my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
19500
19501     if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
19502         my $SEQ_QW = -1;
19503         my $token  = $rLL->[$KK]->[_TOKEN_];
19504         if ( $ii > 0 ) {
19505             $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
19506         }
19507         else {
19508             if ( !$ending_in_quote ) {
19509                 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
19510             }
19511         }
19512     }
19513     return ($seqno);
19514 }
19515
19516 {
19517     my %undo_extended_ci;
19518
19519     sub initialize_undo_ci {
19520         %undo_extended_ci = ();
19521         return;
19522     }
19523
19524     sub undo_ci {
19525
19526         # Undo continuation indentation in certain sequences
19527         my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
19528         my ( $line_1, $line_2, $lev_last );
19529         my $this_line_is_semicolon_terminated;
19530         my $max_line = @{$ri_first} - 1;
19531
19532         my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
19533
19534         # Prepare a list of controlling indexes for each line if required.
19535         # This is used for efficient processing below.  Note: this is
19536         # critical for speed. In the initial implementation I just looped
19537         # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
19538         # found that this routine was causing a huge run time in large lists.
19539         # On a very large list test case, this new coding dropped the run time
19540         # of this routine from 30 seconds to 169 milliseconds.
19541         my @i_controlling_ci;
19542         if ( @{$rix_seqno_controlling_ci} ) {
19543             my @tmp     = reverse @{$rix_seqno_controlling_ci};
19544             my $ix_next = pop @tmp;
19545             foreach my $line ( 0 .. $max_line ) {
19546                 my $iend = $ri_last->[$line];
19547                 while ( defined($ix_next) && $ix_next <= $iend ) {
19548                     push @{ $i_controlling_ci[$line] }, $ix_next;
19549                     $ix_next = pop @tmp;
19550                 }
19551             }
19552         }
19553
19554         # Loop over all lines of the batch ...
19555
19556         # Workaround for problem c007, in which the combination -lp -xci
19557         # can produce a "Program bug" message in unusual circumstances.
19558         my $skip_SECTION_1 = $rOpts_line_up_parentheses
19559           && $rOpts->{'extended-continuation-indentation'};
19560
19561         foreach my $line ( 0 .. $max_line ) {
19562
19563             my $ibeg = $ri_first->[$line];
19564             my $iend = $ri_last->[$line];
19565             my $lev  = $levels_to_go[$ibeg];
19566
19567             ####################################
19568             # SECTION 1: Undo needless common CI
19569             ####################################
19570
19571             # We are looking at leading tokens and looking for a sequence all
19572             # at the same level and all at a higher level than enclosing lines.
19573
19574             # For example, we can undo continuation indentation in sort/map/grep
19575             # chains
19576
19577             #    my $dat1 = pack( "n*",
19578             #        map { $_, $lookup->{$_} }
19579             #          sort { $a <=> $b }
19580             #          grep { $lookup->{$_} ne $default } keys %$lookup );
19581
19582             # to become
19583
19584             #    my $dat1 = pack( "n*",
19585             #        map { $_, $lookup->{$_} }
19586             #        sort { $a <=> $b }
19587             #        grep { $lookup->{$_} ne $default } keys %$lookup );
19588
19589             if ( $line > 0 && !$skip_SECTION_1 ) {
19590
19591                 # if we have started a chain..
19592                 if ($line_1) {
19593
19594                     # see if it continues..
19595                     if ( $lev == $lev_last ) {
19596                         if (   $types_to_go[$ibeg] eq 'k'
19597                             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
19598                         {
19599
19600                             # chain continues...
19601                             # check for chain ending at end of a statement
19602                             if ( $line == $max_line ) {
19603
19604                                 # see of this line ends a statement
19605                                 $this_line_is_semicolon_terminated =
19606                                   $types_to_go[$iend] eq ';'
19607
19608                                   # with possible side comment
19609                                   || ( $types_to_go[$iend] eq '#'
19610                                     && $iend - $ibeg >= 2
19611                                     && $types_to_go[ $iend - 2 ] eq ';'
19612                                     && $types_to_go[ $iend - 1 ] eq 'b' );
19613                             }
19614                             $line_2 = $line
19615                               if ($this_line_is_semicolon_terminated);
19616                         }
19617                         else {
19618
19619                             # kill chain
19620                             $line_1 = undef;
19621                         }
19622                     }
19623                     elsif ( $lev < $lev_last ) {
19624
19625                         # chain ends with previous line
19626                         $line_2 = $line - 1;
19627                     }
19628                     elsif ( $lev > $lev_last ) {
19629
19630                         # kill chain
19631                         $line_1 = undef;
19632                     }
19633
19634                     # undo the continuation indentation if a chain ends
19635                     if ( defined($line_2) && defined($line_1) ) {
19636                         my $continuation_line_count = $line_2 - $line_1 + 1;
19637                         @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
19638                           = (0) x ($continuation_line_count)
19639                           if ( $continuation_line_count >= 0 );
19640                         @leading_spaces_to_go[ @{$ri_first}
19641                           [ $line_1 .. $line_2 ] ] =
19642                           @reduced_spaces_to_go[ @{$ri_first}
19643                           [ $line_1 .. $line_2 ] ];
19644                         $line_1 = undef;
19645                     }
19646                 }
19647
19648                 # not in a chain yet..
19649                 else {
19650
19651                     # look for start of a new sort/map/grep chain
19652                     if ( $lev > $lev_last ) {
19653                         if (   $types_to_go[$ibeg] eq 'k'
19654                             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
19655                         {
19656                             $line_1 = $line;
19657                         }
19658                     }
19659                 }
19660             }
19661
19662             ######################################
19663             # SECTION 2: Undo ci at cuddled blocks
19664             ######################################
19665
19666             # Note that sub set_adjusted_indentation will be called later to
19667             # actually do this, but for now we will tentatively mark cuddled
19668             # lines with ci=0 so that the the -xci loop which follows will be
19669             # correct at cuddles.
19670             if (
19671                 $types_to_go[$ibeg] eq '}'
19672                 && ( $nesting_depth_to_go[$iend] + 1 ==
19673                     $nesting_depth_to_go[$ibeg] )
19674               )
19675             {
19676                 my $terminal_type = $types_to_go[$iend];
19677                 if ( $terminal_type eq '#' && $iend > $ibeg ) {
19678                     $terminal_type = $types_to_go[ $iend - 1 ];
19679                     if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
19680                         $terminal_type = $types_to_go[ $iend - 2 ];
19681                     }
19682                 }
19683                 if ( $terminal_type eq '{' ) {
19684                     my $Kbeg = $K_to_go[$ibeg];
19685                     $ci_levels_to_go[$ibeg] = 0;
19686                 }
19687             }
19688
19689             #########################################################
19690             # SECTION 3: Undo ci set by sub extended_ci if not needed
19691             #########################################################
19692
19693             # Undo the ci of the leading token if its controlling token
19694             # went out on a previous line without ci
19695             if ( $ci_levels_to_go[$ibeg] ) {
19696                 my $Kbeg  = $K_to_go[$ibeg];
19697                 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
19698                 if ( $seqno && $undo_extended_ci{$seqno} ) {
19699
19700                     # but do not undo ci set by the -lp flag
19701                     if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
19702                         $ci_levels_to_go[$ibeg] = 0;
19703                         $leading_spaces_to_go[$ibeg] =
19704                           $reduced_spaces_to_go[$ibeg];
19705                     }
19706                 }
19707             }
19708
19709             # Flag any controlling opening tokens in lines without ci.  This
19710             # will be used later in the above if statement to undo the ci which
19711             # they added.  The array i_controlling_ci[$line] was prepared at
19712             # the top of this routine.
19713             if ( !$ci_levels_to_go[$ibeg]
19714                 && defined( $i_controlling_ci[$line] ) )
19715             {
19716                 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
19717                     my $seqno = $type_sequence_to_go[$i];
19718                     $undo_extended_ci{$seqno} = 1;
19719                 }
19720             }
19721
19722             $lev_last = $lev;
19723         }
19724
19725         return;
19726     }
19727 }
19728
19729 {    ## begin closure set_logical_padding
19730     my %is_math_op;
19731
19732     BEGIN {
19733
19734         my @q = qw( + - * / );
19735         @is_math_op{@q} = (1) x scalar(@q);
19736     }
19737
19738     sub set_logical_padding {
19739
19740         # Look at a batch of lines and see if extra padding can improve the
19741         # alignment when there are certain leading operators. Here is an
19742         # example, in which some extra space is introduced before
19743         # '( $year' to make it line up with the subsequent lines:
19744         #
19745         #       if (   ( $Year < 1601 )
19746         #           || ( $Year > 2899 )
19747         #           || ( $EndYear < 1601 )
19748         #           || ( $EndYear > 2899 ) )
19749         #       {
19750         #           &Error_OutOfRange;
19751         #       }
19752         #
19753         my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
19754           = @_;
19755         my $max_line = @{$ri_first} - 1;
19756
19757         my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
19758             $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
19759
19760         # Patch to produce padding in the first line of short code blocks.
19761         # This is part of an update to fix cases b562 .. b983.
19762         # This is needed to compensate for a change which was made in 'sub
19763         # starting_one_line_block' to prevent blinkers.  Previously, that sub
19764         # would not look at the total block size and rely on sub
19765         # set_continuation_breaks to break up long blocks. Consequently, the
19766         # first line of those batches would end in the opening block brace of a
19767         # sort/map/grep/eval block.  When this was changed to immediately check
19768         # for blocks which were too long, the opening block brace would go out
19769         # in a single batch, and the block contents would go out as the next
19770         # batch.  This caused the logic in this routine which decides if the
19771         # first line should be padded to be incorrect.  To fix this, we set a
19772         # flag if the previous batch ended in an opening sort/map/grep/eval
19773         # block brace, and use it to adjust the logic to compensate.
19774
19775         # For example, the following would have previously been a single batch
19776         # but now is two batches.  We want to pad the line starting in '$dir':
19777         #    my (@indices) =                      # batch n-1  (prev batch n)
19778         #      sort {                             # batch n-1  (prev batch n)
19779         #            $dir eq 'left'               # batch n
19780         #          ? $cells[$a] <=> $cells[$b]    # batch n
19781         #          : $cells[$b] <=> $cells[$a];   # batch n
19782         #      } ( 0 .. $#cells );                # batch n
19783
19784         my $rLL   = $self->[_rLL_];
19785         my $K0    = $K_to_go[0];
19786         my $Kprev = $self->K_previous_code($K0);
19787         my $is_short_block;
19788         if ( defined($Kprev)
19789             && $rLL->[$Kprev]->[_BLOCK_TYPE_] )
19790         {
19791             my $block_type = $rLL->[$Kprev]->[_BLOCK_TYPE_];
19792             $is_short_block = $is_sort_map_grep_eval{$block_type};
19793             $is_short_block ||= $want_one_line_block{$block_type};
19794         }
19795
19796         # looking at each line of this batch..
19797         foreach my $line ( 0 .. $max_line - 1 ) {
19798
19799             # see if the next line begins with a logical operator
19800             $ibeg      = $ri_first->[$line];
19801             $iend      = $ri_last->[$line];
19802             $ibeg_next = $ri_first->[ $line + 1 ];
19803             $tok_next  = $tokens_to_go[$ibeg_next];
19804             $type_next = $types_to_go[$ibeg_next];
19805
19806             $has_leading_op_next = ( $tok_next =~ /^\w/ )
19807               ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
19808               : $is_chain_operator{$type_next};    # and, or
19809
19810             next unless ($has_leading_op_next);
19811
19812             # next line must not be at lesser depth
19813             next
19814               if ( $nesting_depth_to_go[$ibeg] >
19815                 $nesting_depth_to_go[$ibeg_next] );
19816
19817             # identify the token in this line to be padded on the left
19818             $ipad = undef;
19819
19820             # handle lines at same depth...
19821             if ( $nesting_depth_to_go[$ibeg] ==
19822                 $nesting_depth_to_go[$ibeg_next] )
19823             {
19824
19825                 # if this is not first line of the batch ...
19826                 if ( $line > 0 ) {
19827
19828                     # and we have leading operator..
19829                     next if $has_leading_op;
19830
19831                     # Introduce padding if..
19832                     # 1. the previous line is at lesser depth, or
19833                     # 2. the previous line ends in an assignment
19834                     # 3. the previous line ends in a 'return'
19835                     # 4. the previous line ends in a comma
19836                     # Example 1: previous line at lesser depth
19837                     #       if (   ( $Year < 1601 )      # <- we are here but
19838                     #           || ( $Year > 2899 )      #  list has not yet
19839                     #           || ( $EndYear < 1601 )   # collapsed vertically
19840                     #           || ( $EndYear > 2899 ) )
19841                     #       {
19842                     #
19843                     # Example 2: previous line ending in assignment:
19844                     #    $leapyear =
19845                     #        $year % 4   ? 0     # <- We are here
19846                     #      : $year % 100 ? 1
19847                     #      : $year % 400 ? 0
19848                     #      : 1;
19849                     #
19850                     # Example 3: previous line ending in comma:
19851                     #    push @expr,
19852                     #        /test/   ? undef
19853                     #      : eval($_) ? 1
19854                     #      : eval($_) ? 1
19855                     #      :            0;
19856
19857                    # be sure levels agree (do not indent after an indented 'if')
19858                     next
19859                       if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
19860
19861                     # allow padding on first line after a comma but only if:
19862                     # (1) this is line 2 and
19863                     # (2) there are at more than three lines and
19864                     # (3) lines 3 and 4 have the same leading operator
19865                     # These rules try to prevent padding within a long
19866                     # comma-separated list.
19867                     my $ok_comma;
19868                     if (   $types_to_go[$iendm] eq ','
19869                         && $line == 1
19870                         && $max_line > 2 )
19871                     {
19872                         my $ibeg_next_next = $ri_first->[ $line + 2 ];
19873                         my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
19874                         $ok_comma = $tok_next_next eq $tok_next;
19875                     }
19876
19877                     next
19878                       unless (
19879                            $is_assignment{ $types_to_go[$iendm] }
19880                         || $ok_comma
19881                         || ( $nesting_depth_to_go[$ibegm] <
19882                             $nesting_depth_to_go[$ibeg] )
19883                         || (   $types_to_go[$iendm] eq 'k'
19884                             && $tokens_to_go[$iendm] eq 'return' )
19885                       );
19886
19887                     # we will add padding before the first token
19888                     $ipad = $ibeg;
19889                 }
19890
19891                 # for first line of the batch..
19892                 else {
19893
19894                     # WARNING: Never indent if first line is starting in a
19895                     # continued quote, which would change the quote.
19896                     next if $starting_in_quote;
19897
19898                     # if this is text after closing '}'
19899                     # then look for an interior token to pad
19900                     if ( $types_to_go[$ibeg] eq '}' ) {
19901
19902                     }
19903
19904                     # otherwise, we might pad if it looks really good
19905                     elsif ($is_short_block) {
19906                         $ipad = $ibeg;
19907                     }
19908                     else {
19909
19910                         # we might pad token $ibeg, so be sure that it
19911                         # is at the same depth as the next line.
19912                         next
19913                           if ( $nesting_depth_to_go[$ibeg] !=
19914                             $nesting_depth_to_go[$ibeg_next] );
19915
19916                         # We can pad on line 1 of a statement if at least 3
19917                         # lines will be aligned. Otherwise, it
19918                         # can look very confusing.
19919
19920                  # We have to be careful not to pad if there are too few
19921                  # lines.  The current rule is:
19922                  # (1) in general we require at least 3 consecutive lines
19923                  # with the same leading chain operator token,
19924                  # (2) but an exception is that we only require two lines
19925                  # with leading colons if there are no more lines.  For example,
19926                  # the first $i in the following snippet would get padding
19927                  # by the second rule:
19928                  #
19929                  #   $i == 1 ? ( "First", "Color" )
19930                  # : $i == 2 ? ( "Then",  "Rarity" )
19931                  # :           ( "Then",  "Name" );
19932
19933                         if ( $max_line > 1 ) {
19934                             my $leading_token = $tokens_to_go[$ibeg_next];
19935                             my $tokens_differ;
19936
19937                             # never indent line 1 of a '.' series because
19938                             # previous line is most likely at same level.
19939                             # TODO: we should also look at the leading_spaces
19940                             # of the last output line and skip if it is same
19941                             # as this line.
19942                             next if ( $leading_token eq '.' );
19943
19944                             my $count = 1;
19945                             foreach my $l ( 2 .. 3 ) {
19946                                 last if ( $line + $l > $max_line );
19947                                 my $ibeg_next_next = $ri_first->[ $line + $l ];
19948                                 if ( $tokens_to_go[$ibeg_next_next] ne
19949                                     $leading_token )
19950                                 {
19951                                     $tokens_differ = 1;
19952                                     last;
19953                                 }
19954                                 $count++;
19955                             }
19956                             next if ($tokens_differ);
19957                             next if ( $count < 3 && $leading_token ne ':' );
19958                             $ipad = $ibeg;
19959                         }
19960                         else {
19961                             next;
19962                         }
19963                     }
19964                 }
19965             }
19966
19967             # find interior token to pad if necessary
19968             if ( !defined($ipad) ) {
19969
19970                 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
19971
19972                     # find any unclosed container
19973                     next
19974                       unless ( $type_sequence_to_go[$i]
19975                         && $mate_index_to_go[$i] > $iend );
19976
19977                     # find next nonblank token to pad
19978                     $ipad = $inext_to_go[$i];
19979                     last if ( $ipad > $iend );
19980                 }
19981                 last unless $ipad;
19982             }
19983
19984             # We cannot pad the first leading token of a file because
19985             # it could cause a bug in which the starting indentation
19986             # level is guessed incorrectly each time the code is run
19987             # though perltidy, thus causing the code to march off to
19988             # the right.  For example, the following snippet would have
19989             # this problem:
19990
19991 ##     ov_method mycan( $package, '(""' ),       $package
19992 ##  or ov_method mycan( $package, '(0+' ),       $package
19993 ##  or ov_method mycan( $package, '(bool' ),     $package
19994 ##  or ov_method mycan( $package, '(nomethod' ), $package;
19995
19996             # If this snippet is within a block this won't happen
19997             # unless the user just processes the snippet alone within
19998             # an editor.  In that case either the user will see and
19999             # fix the problem or it will be corrected next time the
20000             # entire file is processed with perltidy.
20001             next if ( $ipad == 0 && $peak_batch_size <= 1 );
20002
20003 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
20004 ## IT DID MORE HARM THAN GOOD
20005 ##            ceil(
20006 ##                      $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
20007 ##                    / $upem
20008 ##            ),
20009 ##?            # do not put leading padding for just 2 lines of math
20010 ##?            if (   $ipad == $ibeg
20011 ##?                && $line > 0
20012 ##?                && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
20013 ##?                && $is_math_op{$type_next}
20014 ##?                && $line + 2 <= $max_line )
20015 ##?            {
20016 ##?                my $ibeg_next_next = $ri_first->[ $line + 2 ];
20017 ##?                my $type_next_next = $types_to_go[$ibeg_next_next];
20018 ##?                next if !$is_math_op{$type_next_next};
20019 ##?            }
20020
20021             # next line must not be at greater depth
20022             my $iend_next = $ri_last->[ $line + 1 ];
20023             next
20024               if ( $nesting_depth_to_go[ $iend_next + 1 ] >
20025                 $nesting_depth_to_go[$ipad] );
20026
20027             # lines must be somewhat similar to be padded..
20028             my $inext_next = $inext_to_go[$ibeg_next];
20029             my $type       = $types_to_go[$ipad];
20030             my $type_next  = $types_to_go[ $ipad + 1 ];
20031
20032             # see if there are multiple continuation lines
20033             my $logical_continuation_lines = 1;
20034             if ( $line + 2 <= $max_line ) {
20035                 my $leading_token  = $tokens_to_go[$ibeg_next];
20036                 my $ibeg_next_next = $ri_first->[ $line + 2 ];
20037                 if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
20038                     && $nesting_depth_to_go[$ibeg_next] eq
20039                     $nesting_depth_to_go[$ibeg_next_next] )
20040                 {
20041                     $logical_continuation_lines++;
20042                 }
20043             }
20044
20045             # see if leading types match
20046             my $types_match = $types_to_go[$inext_next] eq $type;
20047             my $matches_without_bang;
20048
20049             # if first line has leading ! then compare the following token
20050             if ( !$types_match && $type eq '!' ) {
20051                 $types_match = $matches_without_bang =
20052                   $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
20053             }
20054             if (
20055
20056                 # either we have multiple continuation lines to follow
20057                 # and we are not padding the first token
20058                 (
20059                     $logical_continuation_lines > 1
20060                     && ( $ipad > 0 || $is_short_block )
20061                 )
20062
20063                 # or..
20064                 || (
20065
20066                     # types must match
20067                     $types_match
20068
20069                     # and keywords must match if keyword
20070                     && !(
20071                            $type eq 'k'
20072                         && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
20073                     )
20074                 )
20075               )
20076             {
20077
20078                 #----------------------begin special checks--------------
20079                 #
20080                 # SPECIAL CHECK 1:
20081                 # A check is needed before we can make the pad.
20082                 # If we are in a list with some long items, we want each
20083                 # item to stand out.  So in the following example, the
20084                 # first line beginning with '$casefold->' would look good
20085                 # padded to align with the next line, but then it
20086                 # would be indented more than the last line, so we
20087                 # won't do it.
20088                 #
20089                 #  ok(
20090                 #      $casefold->{code}         eq '0041'
20091                 #        && $casefold->{status}  eq 'C'
20092                 #        && $casefold->{mapping} eq '0061',
20093                 #      'casefold 0x41'
20094                 #  );
20095                 #
20096                 # Note:
20097                 # It would be faster, and almost as good, to use a comma
20098                 # count, and not pad if comma_count > 1 and the previous
20099                 # line did not end with a comma.
20100                 #
20101                 my $ok_to_pad = 1;
20102
20103                 my $ibg   = $ri_first->[ $line + 1 ];
20104                 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
20105
20106                 # just use simplified formula for leading spaces to avoid
20107                 # needless sub calls
20108                 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
20109
20110                 # look at each line beyond the next ..
20111                 my $l = $line + 1;
20112                 foreach my $ltest ( $line + 2 .. $max_line ) {
20113                     $l = $ltest;
20114                     my $ibg = $ri_first->[$l];
20115
20116                     # quit looking at the end of this container
20117                     last
20118                       if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
20119                       || ( $nesting_depth_to_go[$ibg] < $depth );
20120
20121                     # cannot do the pad if a later line would be
20122                     # outdented more
20123                     if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
20124                         $ok_to_pad = 0;
20125                         last;
20126                     }
20127                 }
20128
20129                 # don't pad if we end in a broken list
20130                 if ( $l == $max_line ) {
20131                     my $i2 = $ri_last->[$l];
20132                     if ( $types_to_go[$i2] eq '#' ) {
20133                         my $i1 = $ri_first->[$l];
20134                         next if terminal_type_i( $i1, $i2 ) eq ',';
20135                     }
20136                 }
20137
20138                 # SPECIAL CHECK 2:
20139                 # a minus may introduce a quoted variable, and we will
20140                 # add the pad only if this line begins with a bare word,
20141                 # such as for the word 'Button' here:
20142                 #    [
20143                 #         Button      => "Print letter \"~$_\"",
20144                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
20145                 #        -accelerator => "Meta+$_"
20146                 #    ];
20147                 #
20148                 #  On the other hand, if 'Button' is quoted, it looks best
20149                 #  not to pad:
20150                 #    [
20151                 #        'Button'     => "Print letter \"~$_\"",
20152                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
20153                 #        -accelerator => "Meta+$_"
20154                 #    ];
20155                 if ( $types_to_go[$ibeg_next] eq 'm' ) {
20156                     $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
20157                 }
20158
20159                 next unless $ok_to_pad;
20160
20161                 #----------------------end special check---------------
20162
20163                 my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
20164                 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
20165                 $pad_spaces = $length_2 - $length_1;
20166
20167                 # If the first line has a leading ! and the second does
20168                 # not, then remove one space to try to align the next
20169                 # leading characters, which are often the same.  For example:
20170                 #  if (  !$ts
20171                 #      || $ts == $self->Holder
20172                 #      || $self->Holder->Type eq "Arena" )
20173                 #
20174                 # This usually helps readability, but if there are subsequent
20175                 # ! operators things will still get messed up.  For example:
20176                 #
20177                 #  if (  !exists $Net::DNS::typesbyname{$qtype}
20178                 #      && exists $Net::DNS::classesbyname{$qtype}
20179                 #      && !exists $Net::DNS::classesbyname{$qclass}
20180                 #      && exists $Net::DNS::typesbyname{$qclass} )
20181                 # We can't fix that.
20182                 if ($matches_without_bang) { $pad_spaces-- }
20183
20184                 # make sure this won't change if -lp is used
20185                 my $indentation_1 = $leading_spaces_to_go[$ibeg];
20186                 if ( ref($indentation_1) ) {
20187                     if ( $indentation_1->get_recoverable_spaces() == 0 ) {
20188                         my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
20189                         unless ( $indentation_2->get_recoverable_spaces() == 0 )
20190                         {
20191                             $pad_spaces = 0;
20192                         }
20193                     }
20194                 }
20195
20196                 # we might be able to handle a pad of -1 by removing a blank
20197                 # token
20198                 if ( $pad_spaces < 0 ) {
20199
20200                     # Deactivated for -kpit due to conflict. This block deletes
20201                     # a space in an attempt to improve alignment in some cases,
20202                     # but it may conflict with user spacing requests.  For now
20203                     # it is just deactivated if the -kpit option is used.
20204                     if ( $pad_spaces == -1 ) {
20205                         if (   $ipad > $ibeg
20206                             && $types_to_go[ $ipad - 1 ] eq 'b'
20207                             && !%keyword_paren_inner_tightness )
20208                         {
20209                             $self->pad_token( $ipad - 1, $pad_spaces );
20210                         }
20211                     }
20212                     $pad_spaces = 0;
20213                 }
20214
20215                 # now apply any padding for alignment
20216                 if ( $ipad >= 0 && $pad_spaces ) {
20217
20218                     my $length_t = total_line_length( $ibeg, $iend );
20219                     if ( $pad_spaces + $length_t <=
20220                         $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
20221                     {
20222                         $self->pad_token( $ipad, $pad_spaces );
20223                     }
20224                 }
20225             }
20226         }
20227         continue {
20228             $iendm          = $iend;
20229             $ibegm          = $ibeg;
20230             $has_leading_op = $has_leading_op_next;
20231         }    # end of loop over lines
20232         return;
20233     }
20234 } ## end closure set_logical_padding
20235
20236 sub pad_token {
20237
20238     # insert $pad_spaces before token number $ipad
20239     my ( $self, $ipad, $pad_spaces ) = @_;
20240     my $rLL     = $self->[_rLL_];
20241     my $KK      = $K_to_go[$ipad];
20242     my $tok     = $rLL->[$KK]->[_TOKEN_];
20243     my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
20244
20245     if ( $pad_spaces > 0 ) {
20246         $tok = ' ' x $pad_spaces . $tok;
20247         $tok_len += $pad_spaces;
20248     }
20249     elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
20250         $tok     = "";
20251         $tok_len = 0;
20252     }
20253     else {
20254
20255         # shouldn't happen
20256         return;
20257     }
20258
20259     $tok     = $rLL->[$KK]->[_TOKEN_]        = $tok;
20260     $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
20261
20262     $token_lengths_to_go[$ipad] += $pad_spaces;
20263     $tokens_to_go[$ipad] = $tok;
20264
20265     foreach my $i ( $ipad .. $max_index_to_go ) {
20266         $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
20267     }
20268     return;
20269 }
20270
20271 {    ## begin closure make_alignment_patterns
20272
20273     my %block_type_map;
20274     my %keyword_map;
20275     my %operator_map;
20276     my %is_w_n_C;
20277
20278     BEGIN {
20279
20280         # map related block names into a common name to
20281         # allow alignment
20282         %block_type_map = (
20283             'unless'  => 'if',
20284             'else'    => 'if',
20285             'elsif'   => 'if',
20286             'when'    => 'if',
20287             'default' => 'if',
20288             'case'    => 'if',
20289             'sort'    => 'map',
20290             'grep'    => 'map',
20291         );
20292
20293         # map certain keywords to the same 'if' class to align
20294         # long if/elsif sequences. [elsif.pl]
20295         %keyword_map = (
20296             'unless'  => 'if',
20297             'else'    => 'if',
20298             'elsif'   => 'if',
20299             'when'    => 'given',
20300             'default' => 'given',
20301             'case'    => 'switch',
20302
20303             # treat an 'undef' similar to numbers and quotes
20304             'undef' => 'Q',
20305         );
20306
20307         # map certain operators to the same class for pattern matching
20308         %operator_map = (
20309             '!~' => '=~',
20310             '+=' => '+=',
20311             '-=' => '+=',
20312             '*=' => '+=',
20313             '/=' => '+=',
20314         );
20315
20316         %is_w_n_C = (
20317             'w' => 1,
20318             'n' => 1,
20319             'C' => 1,
20320         );
20321     }
20322
20323     sub delete_needless_alignments {
20324         my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
20325
20326         # Remove unwanted alignments.  This routine is a place to remove
20327         # alignments which might cause problems at later stages.  There are
20328         # currently two types of fixes:
20329
20330         # 1. Remove excess parens
20331         # 2. Remove alignments within 'elsif' conditions
20332
20333         # Patch #1: Excess alignment of parens can prevent other good
20334         # alignments.  For example, note the parens in the first two rows of
20335         # the following snippet.  They would normally get marked for alignment
20336         # and aligned as follows:
20337
20338         #    my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
20339         #    my $h = $rows * $cell_h +    ( $rows + 1 ) * $border;
20340         #    my $img = new Gimp::Image( $w, $h, RGB );
20341
20342         # This causes unnecessary paren alignment and prevents the third equals
20343         # from aligning. If we remove the unwanted alignments we get:
20344
20345         #    my $w   = $columns * $cell_w + ( $columns + 1 ) * $border;
20346         #    my $h   = $rows * $cell_h + ( $rows + 1 ) * $border;
20347         #    my $img = new Gimp::Image( $w, $h, RGB );
20348
20349         # A rule for doing this which works well is to remove alignment of
20350         # parens whose containers do not contain other aligning tokens, with
20351         # the exception that we always keep alignment of the first opening
20352         # paren on a line (for things like 'if' and 'elsif' statements).
20353
20354         # Setup needed constants
20355         my $i_good_paren  = -1;
20356         my $imin_match    = $iend + 1;
20357         my $i_elsif_close = $ibeg - 1;
20358         my $i_elsif_open  = $iend + 1;
20359         if ( $iend > $ibeg ) {
20360             if ( $types_to_go[$ibeg] eq 'k' ) {
20361
20362                 # Paren patch: mark a location of a paren we should keep, such
20363                 # as one following something like a leading 'if', 'elsif',..
20364                 $i_good_paren = $ibeg + 1;
20365                 if ( $types_to_go[$i_good_paren] eq 'b' ) {
20366                     $i_good_paren++;
20367                 }
20368
20369                 # 'elsif' patch: remember the range of the parens of an elsif,
20370                 # and do not make alignments within them because this can cause
20371                 # loss of padding and overall brace alignment in the vertical
20372                 # aligner.
20373                 if (   $tokens_to_go[$ibeg] eq 'elsif'
20374                     && $i_good_paren < $iend
20375                     && $tokens_to_go[$i_good_paren] eq '(' )
20376                 {
20377                     $i_elsif_open  = $i_good_paren;
20378                     $i_elsif_close = $mate_index_to_go[$i_good_paren];
20379                 }
20380             }
20381         }
20382
20383         # Loop to make the fixes on this line
20384         my @imatch_list;
20385         for my $i ( $ibeg .. $iend ) {
20386
20387             if ( $ralignment_type_to_go->[$i] ) {
20388
20389                 # Patch #2: undo alignment within elsif parens
20390                 if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
20391                     $ralignment_type_to_go->[$i] = '';
20392                     next;
20393                 }
20394                 push @imatch_list, $i;
20395
20396             }
20397             if ( $tokens_to_go[$i] eq ')' ) {
20398
20399                 # Patch #1: undo the corresponding opening paren if:
20400                 # - it is at the top of the stack
20401                 # - and not the first overall opening paren
20402                 # - does not follow a leading keyword on this line
20403                 my $imate = $mate_index_to_go[$i];
20404                 if (   @imatch_list
20405                     && $imatch_list[-1] eq $imate
20406                     && ( $ibeg > 1 || @imatch_list > 1 )
20407                     && $imate > $i_good_paren )
20408                 {
20409                     $ralignment_type_to_go->[$imate] = '';
20410                     pop @imatch_list;
20411                 }
20412             }
20413         }
20414         return;
20415     }
20416
20417     sub make_alignment_patterns {
20418
20419         # Here we do some important preliminary work for the
20420         # vertical aligner.  We create three arrays for one
20421         # output line. These arrays contain strings that can
20422         # be tested by the vertical aligner to see if
20423         # consecutive lines can be aligned vertically.
20424         #
20425         # The three arrays are indexed on the vertical
20426         # alignment fields and are:
20427         # @tokens - a list of any vertical alignment tokens for this line.
20428         #   These are tokens, such as '=' '&&' '#' etc which
20429         #   we want to might align vertically.  These are
20430         #   decorated with various information such as
20431         #   nesting depth to prevent unwanted vertical
20432         #   alignment matches.
20433         # @fields - the actual text of the line between the vertical alignment
20434         #   tokens.
20435         # @patterns - a modified list of token types, one for each alignment
20436         #   field.  These should normally each match before alignment is
20437         #   allowed, even when the alignment tokens match.
20438         my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
20439         my @tokens        = ();
20440         my @fields        = ();
20441         my @patterns      = ();
20442         my @field_lengths = ();
20443         my $i_start       = $ibeg;
20444
20445         # For a 'use' statement, use the module name as container name.
20446         # Fixes issue rt136416.
20447         my $cname = "";
20448         if ( $types_to_go[$ibeg] eq 'k' && $tokens_to_go[$ibeg] eq 'use' ) {
20449             my $inext = $inext_to_go[$ibeg];
20450             if ( $inext <= $iend ) { $cname = $tokens_to_go[$inext] }
20451         }
20452
20453         my $depth          = 0;
20454         my %container_name = ( 0 => "$cname" );
20455
20456         my $j = 0;    # field index
20457
20458         $patterns[0] = "";
20459         my %token_count;
20460         for my $i ( $ibeg .. $iend ) {
20461
20462             # Keep track of containers balanced on this line only.
20463             # These are used below to prevent unwanted cross-line alignments.
20464             # Unbalanced containers already avoid aligning across
20465             # container boundaries.
20466
20467             my $type       = $types_to_go[$i];
20468             my $token      = $tokens_to_go[$i];
20469             my $depth_last = $depth;
20470             if ( $type_sequence_to_go[$i] ) {
20471                 if ( $is_opening_type{$token} ) {
20472
20473                     # if container is balanced on this line...
20474                     my $i_mate = $mate_index_to_go[$i];
20475                     if ( $i_mate > $i && $i_mate <= $iend ) {
20476                         $depth++;
20477
20478                      # Append the previous token name to make the container name
20479                      # more unique.  This name will also be given to any commas
20480                      # within this container, and it helps avoid undesirable
20481                      # alignments of different types of containers.
20482
20483                      # Containers beginning with { and [ are given those names
20484                      # for uniqueness. That way commas in different containers
20485                      # will not match. Here is an example of what this prevents:
20486                      #  a => [ 1,       2, 3 ],
20487                      #   b => { b1 => 4, b2 => 5 },
20488                      # Here is another example of what we avoid by labeling the
20489                      # commas properly:
20490
20491                    # is_d( [ $a,        $a ], [ $b,               $c ] );
20492                    # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
20493                    # is_d( [ \$a,       \$a ], [ \$b,             \$c ] );
20494
20495                         my $name = $token;
20496                         if ( $token eq '(' ) {
20497                             $name = $self->make_paren_name($i);
20498                         }
20499                         $container_name{$depth} = "+" . $name;
20500
20501                        # Make the container name even more unique if necessary.
20502                        # If we are not vertically aligning this opening paren,
20503                        # append a character count to avoid bad alignment because
20504                        # it usually looks bad to align commas within containers
20505                        # for which the opening parens do not align.  Here
20506                        # is an example very BAD alignment of commas (because
20507                        # the atan2 functions are not all aligned):
20508                        #    $XY =
20509                        #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
20510                        #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
20511                        #      $X * atan2( $X,            1 ) -
20512                        #      $Y * atan2( $Y,            1 );
20513                        #
20514                        # On the other hand, it is usually okay to align commas
20515                        # if opening parens align, such as:
20516                        #    glVertex3d( $cx + $s * $xs, $cy,            $z );
20517                        #    glVertex3d( $cx,            $cy + $s * $ys, $z );
20518                        #    glVertex3d( $cx - $s * $xs, $cy,            $z );
20519                        #    glVertex3d( $cx,            $cy - $s * $ys, $z );
20520                        #
20521                        # To distinguish between these situations, we will append
20522                        # the length of the line from the previous matching
20523                        # token, or beginning of line, to the function name.
20524                        # This will allow the vertical aligner to reject
20525                        # undesirable matches.
20526
20527                         # if we are not aligning on this paren...
20528                         if ( !$ralignment_type_to_go->[$i] ) {
20529
20530                             # Sum length from previous alignment
20531                             my $len = token_sequence_length( $i_start, $i - 1 );
20532
20533                             # Minor patch: do not include the length of any '!'.
20534                             # Otherwise, commas in the following line will not
20535                             # match
20536                             #  ok( 20, tapprox( ( pdl 2,  3 ), ( pdl 2, 3 ) ) );
20537                             #  ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
20538                             if ( grep { $_ eq '!' }
20539                                 @types_to_go[ $i_start .. $i - 1 ] )
20540                             {
20541                                 $len -= 1;
20542                             }
20543
20544                             if ( $i_start == $ibeg ) {
20545
20546                               # For first token, use distance from start of line
20547                               # but subtract off the indentation due to level.
20548                               # Otherwise, results could vary with indentation.
20549                                 $len +=
20550                                   leading_spaces_to_go($ibeg) -
20551                                   $levels_to_go[$i_start] *
20552                                   $rOpts_indent_columns;
20553                                 if ( $len < 0 ) { $len = 0 }
20554                             }
20555
20556                             # tack this length onto the container name to try
20557                             # to make a unique token name
20558                             $container_name{$depth} .= "-" . $len;
20559                         }
20560                     }
20561                 }
20562                 elsif ( $is_closing_type{$token} ) {
20563                     $depth-- if $depth > 0;
20564                 }
20565             }
20566
20567             # if we find a new synchronization token, we are done with
20568             # a field
20569             if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
20570
20571                 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
20572
20573                 # map similar items
20574                 my $tok_map = $operator_map{$tok};
20575                 $tok = $tok_map if ($tok_map);
20576
20577                 # make separators in different nesting depths unique
20578                 # by appending the nesting depth digit.
20579                 if ( $raw_tok ne '#' ) {
20580                     $tok .= "$nesting_depth_to_go[$i]";
20581                 }
20582
20583                 # also decorate commas with any container name to avoid
20584                 # unwanted cross-line alignments.
20585                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
20586
20587                   # If we are at an opening token which increased depth, we have
20588                   # to use the name from the previous depth.
20589                     my $depth_p =
20590                       ( $depth_last < $depth ? $depth_last : $depth );
20591                     if ( $container_name{$depth_p} ) {
20592                         $tok .= $container_name{$depth_p};
20593                     }
20594                 }
20595
20596                 # Patch to avoid aligning leading and trailing if, unless.
20597                 # Mark trailing if, unless statements with container names.
20598                 # This makes them different from leading if, unless which
20599                 # are not so marked at present.  If we ever need to name
20600                 # them too, we could use ci to distinguish them.
20601                 # Example problem to avoid:
20602                 #    return ( 2, "DBERROR" )
20603                 #      if ( $retval == 2 );
20604                 #    if   ( scalar @_ ) {
20605                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
20606                 #    }
20607                 if ( $raw_tok eq '(' ) {
20608                     if (   $ci_levels_to_go[$ibeg]
20609                         && $container_name{$depth} =~ /^\+(if|unless)/ )
20610                     {
20611                         $tok .= $container_name{$depth};
20612                     }
20613                 }
20614
20615                 # Decorate block braces with block types to avoid
20616                 # unwanted alignments such as the following:
20617                 # foreach ( @{$routput_array} ) { $fh->print($_) }
20618                 # eval                          { $fh->close() };
20619                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
20620                     my $block_type = $block_type_to_go[$i];
20621
20622                     # map certain related block types to allow
20623                     # else blocks to align
20624                     $block_type = $block_type_map{$block_type}
20625                       if ( defined( $block_type_map{$block_type} ) );
20626
20627                     # remove sub names to allow one-line sub braces to align
20628                     # regardless of name
20629                     if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
20630
20631                     # allow all control-type blocks to align
20632                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
20633
20634                     $tok .= $block_type;
20635                 }
20636
20637                 # Mark multiple copies of certain tokens with the copy number
20638                 # This will allow the aligner to decide if they are matched.
20639                 # For now, only do this for equals. For example, the two
20640                 # equals on the next line will be labeled '=0' and '=0.2'.
20641                 # Later, the '=0.2' will be ignored in alignment because it
20642                 # has no match.
20643
20644                 # $|          = $debug = 1 if $opt_d;
20645                 # $full_index = 1          if $opt_i;
20646
20647                 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
20648                     $token_count{$tok}++;
20649                     if ( $token_count{$tok} > 1 ) {
20650                         $tok .= '.' . $token_count{$tok};
20651                     }
20652                 }
20653
20654                 # concatenate the text of the consecutive tokens to form
20655                 # the field
20656                 push( @fields,
20657                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
20658
20659                 push @field_lengths,
20660                   $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
20661
20662                 # store the alignment token for this field
20663                 push( @tokens, $tok );
20664
20665                 # get ready for the next batch
20666                 $i_start = $i;
20667                 $j++;
20668                 $patterns[$j] = "";
20669             }
20670
20671             # continue accumulating tokens
20672
20673             # for keywords we have to use the actual text
20674             if ( $type eq 'k' ) {
20675
20676                 my $tok_fix = $tokens_to_go[$i];
20677
20678                 # but map certain keywords to a common string to allow
20679                 # alignment.
20680                 $tok_fix = $keyword_map{$tok_fix}
20681                   if ( defined( $keyword_map{$tok_fix} ) );
20682                 $patterns[$j] .= $tok_fix;
20683             }
20684
20685             elsif ( $type eq 'b' ) {
20686                 $patterns[$j] .= $type;
20687             }
20688
20689             # handle non-keywords..
20690             else {
20691
20692                 my $type_fix = $type;
20693
20694                 # Mark most things before arrows as a quote to
20695                 # get them to line up. Testfile: mixed.pl.
20696                 #                      $type =~ /^[wnC]$/
20697                 if ( $i < $iend - 1 && $is_w_n_C{$type} ) {
20698                     my $next_type = $types_to_go[ $i + 1 ];
20699                     my $i_next_nonblank =
20700                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
20701
20702                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
20703                         $type_fix = 'Q';
20704
20705                         # Patch to ignore leading minus before words,
20706                         # by changing pattern 'mQ' into just 'Q',
20707                         # so that we can align things like this:
20708                         #  Button   => "Print letter \"~$_\"",
20709                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
20710                         if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
20711                     }
20712                 }
20713
20714                 # Convert a bareword within braces into a quote for matching.
20715                 # This will allow alignment of expressions like this:
20716                 #    local ( $SIG{'INT'} ) = IGNORE;
20717                 #    local ( $SIG{ALRM} )  = 'POSTMAN';
20718                 if (   $type eq 'w'
20719                     && $i > $ibeg
20720                     && $i < $iend
20721                     && $types_to_go[ $i - 1 ] eq 'L'
20722                     && $types_to_go[ $i + 1 ] eq 'R' )
20723                 {
20724                     $type_fix = 'Q';
20725                 }
20726
20727                 # patch to make numbers and quotes align
20728                 if ( $type eq 'n' ) { $type_fix = 'Q' }
20729
20730                 # patch to ignore any ! in patterns
20731                 if ( $type eq '!' ) { $type_fix = '' }
20732
20733                 $patterns[$j] .= $type_fix;
20734
20735                 # remove any zero-level name at first fat comma
20736                 if ( $depth == 0 && $type eq '=>' ) {
20737                     $container_name{$depth} = "";
20738                 }
20739
20740             }
20741         }
20742
20743         # done with this line .. join text of tokens to make the last field
20744         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
20745         push @field_lengths,
20746           $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
20747
20748         return ( \@tokens, \@fields, \@patterns, \@field_lengths );
20749     }
20750
20751 } ## end closure make_alignment_patterns
20752
20753 sub make_paren_name {
20754     my ( $self, $i ) = @_;
20755
20756     # The token at index $i is a '('.
20757     # Create an alignment name for it to avoid incorrect alignments.
20758
20759     # Start with the name of the previous nonblank token...
20760     my $name = "";
20761     my $im   = $i - 1;
20762     return "" if ( $im < 0 );
20763     if ( $types_to_go[$im] eq 'b' ) { $im--; }
20764     return "" if ( $im < 0 );
20765     $name = $tokens_to_go[$im];
20766
20767     # Prepend any sub name to an isolated -> to avoid unwanted alignments
20768     # [test case is test8/penco.pl]
20769     if ( $name eq '->' ) {
20770         $im--;
20771         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
20772             $name = $tokens_to_go[$im] . $name;
20773         }
20774     }
20775
20776     # Finally, remove any leading arrows
20777     if ( substr( $name, 0, 2 ) eq '->' ) {
20778         $name = substr( $name, 2 );
20779     }
20780     return $name;
20781 }
20782
20783 {    ## begin closure set_adjusted_indentation
20784
20785     my ( $last_indentation_written, $last_unadjusted_indentation,
20786         $last_leading_token );
20787
20788     sub initialize_adjusted_indentation {
20789         $last_indentation_written    = 0;
20790         $last_unadjusted_indentation = 0;
20791         $last_leading_token          = "";
20792         return;
20793     }
20794
20795     sub set_adjusted_indentation {
20796
20797         # This routine has the final say regarding the actual indentation of
20798         # a line.  It starts with the basic indentation which has been
20799         # defined for the leading token, and then takes into account any
20800         # options that the user has set regarding special indenting and
20801         # outdenting.
20802
20803         # This routine has to resolve a number of complex interacting issues,
20804         # including:
20805         # 1. The various -cti=n type flags, which contain the desired change in
20806         #    indentation for lines ending in commas and semicolons, should be
20807         #    followed,
20808         # 2. qw quotes require special processing and do not fit perfectly
20809         #    with normal containers,
20810         # 3. formatting with -wn can complicate things, especially with qw
20811         #    quotes,
20812         # 4. formatting with the -lp option is complicated, and does not
20813         #    work well with qw quotes and with -wn formatting.
20814         # 5. a number of special situations, such as 'cuddled' formatting.
20815         # 6. This routine is mainly concerned with outdenting closing tokens
20816         #    but note that there is some overlap with the functions of sub
20817         #    undo_ci, which was processed earlier, so care has to be taken to
20818         #    keep them coordinated.
20819
20820         my (
20821             $self,       $ibeg,
20822             $iend,       $rfields,
20823             $rpatterns,  $ri_first,
20824             $ri_last,    $rindentation_list,
20825             $level_jump, $starting_in_quote,
20826             $is_static_block_comment,
20827         ) = @_;
20828
20829         my $rLL                      = $self->[_rLL_];
20830         my $ris_bli_container        = $self->[_ris_bli_container_];
20831         my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
20832         my $rwant_reduced_ci         = $self->[_rwant_reduced_ci_];
20833         my $rK_weld_left             = $self->[_rK_weld_left_];
20834
20835         # we need to know the last token of this line
20836         my ( $terminal_type, $i_terminal ) = terminal_type_i( $ibeg, $iend );
20837
20838         my $terminal_block_type = $block_type_to_go[$i_terminal];
20839         my $is_outdented_line   = 0;
20840
20841         my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
20842
20843         my $type_beg      = $types_to_go[$ibeg];
20844         my $token_beg     = $tokens_to_go[$ibeg];
20845         my $K_beg         = $K_to_go[$ibeg];
20846         my $ibeg_weld_fix = $ibeg;
20847         my $seqno_beg     = $type_sequence_to_go[$ibeg];
20848         my $is_bli_beg    = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
20849
20850         # QW INDENTATION PATCH 3:
20851         my $seqno_qw_closing;
20852         if ( $type_beg eq 'q' && $ibeg == 0 ) {
20853             my $KK = $K_to_go[$ibeg];
20854             $seqno_qw_closing =
20855               $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
20856         }
20857
20858         my $is_semicolon_terminated = $terminal_type eq ';'
20859           && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
20860             || $seqno_qw_closing );
20861
20862         # NOTE: A future improvement would be to make it semicolon terminated
20863         # even if it does not have a semicolon but is followed by a closing
20864         # block brace. This would undo ci even for something like the
20865         # following, in which the final paren does not have a semicolon because
20866         # it is a possible weld location:
20867
20868         # if ($BOLD_MATH) {
20869         #     (
20870         #         $labels, $comment,
20871         #         join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
20872         #     )
20873         # }
20874         #
20875
20876         # MOJO: Set a flag if this lines begins with ')->'
20877         my $leading_paren_arrow = (
20878                  $types_to_go[$ibeg] eq '}'
20879               && $tokens_to_go[$ibeg] eq ')'
20880               && (
20881                 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
20882                 || (   $ibeg < $i_terminal - 1
20883                     && $types_to_go[ $ibeg + 1 ] eq 'b'
20884                     && $types_to_go[ $ibeg + 2 ] eq '->' )
20885               )
20886         );
20887
20888         ##########################################################
20889         # Section 1: set a flag and a default indentation
20890         #
20891         # Most lines are indented according to the initial token.
20892         # But it is common to outdent to the level just after the
20893         # terminal token in certain cases...
20894         # adjust_indentation flag:
20895         #       0 - do not adjust
20896         #       1 - outdent
20897         #       2 - vertically align with opening token
20898         #       3 - indent
20899         ##########################################################
20900         my $adjust_indentation         = 0;
20901         my $default_adjust_indentation = $adjust_indentation;
20902
20903         my (
20904             $opening_indentation, $opening_offset,
20905             $is_leading,          $opening_exists
20906         );
20907
20908         # Honor any flag to reduce -ci set by the -bbxi=n option
20909         if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
20910
20911             # if this is an opening, it must be alone on the line ...
20912             if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
20913                 $adjust_indentation = 1;
20914             }
20915
20916             # ... or a single welded unit (fix for b1173)
20917             elsif ($total_weld_count) {
20918                 my $Kterm      = $K_to_go[$i_terminal];
20919                 my $Kterm_test = $rK_weld_left->{$Kterm};
20920                 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
20921                     $Kterm = $Kterm_test;
20922                 }
20923                 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
20924             }
20925         }
20926
20927         # Update the $is_bli flag as we go. It is initially 1.
20928         # We note seeing a leading opening brace by setting it to 2.
20929         # If we get to the closing brace without seeing the opening then we
20930         # turn it off.  This occurs if the opening brace did not get output
20931         # at the start of a line, so we will then indent the closing brace
20932         # in the default way.
20933         if ( $is_bli_beg && $is_bli_beg == 1 ) {
20934             my $K_opening_container = $self->[_K_opening_container_];
20935             my $K_opening           = $K_opening_container->{$seqno_beg};
20936             if ( $K_beg eq $K_opening ) {
20937                 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
20938             }
20939             else { $is_bli_beg = 0 }
20940         }
20941
20942         # QW PATCH for the combination -lp -wn
20943         # For -lp formatting use $ibeg_weld_fix to get around the problem
20944         # that with -lp type formatting the opening and closing tokens to not
20945         # have sequence numbers.
20946         if ( $seqno_qw_closing && $total_weld_count ) {
20947             my $K_next_nonblank = $self->K_next_code($K_beg);
20948             if (   defined($K_next_nonblank)
20949                 && defined( $rK_weld_left->{$K_next_nonblank} ) )
20950             {
20951                 my $itest = $ibeg + ( $K_next_nonblank - $K_beg );
20952                 if ( $itest <= $max_index_to_go ) {
20953                     $ibeg_weld_fix = $itest;
20954                 }
20955             }
20956         }
20957
20958         # if we are at a closing token of some type..
20959         if ( $is_closing_type{$type_beg} || $seqno_qw_closing ) {
20960
20961             # get the indentation of the line containing the corresponding
20962             # opening token
20963             (
20964                 $opening_indentation, $opening_offset,
20965                 $is_leading,          $opening_exists
20966               )
20967               = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
20968                 $ri_last, $rindentation_list, $seqno_qw_closing );
20969
20970             # First set the default behavior:
20971             if (
20972
20973                 # default behavior is to outdent closing lines
20974                 # of the form:   ");  };  ];  )->xxx;"
20975                 $is_semicolon_terminated
20976
20977                 # and 'cuddled parens' of the form:   ")->pack("
20978                 # Bug fix for RT #123749]: the types here were
20979                 # incorrectly '(' and ')'.  Corrected to be '{' and '}'
20980                 || (
20981                        $terminal_type eq '{'
20982                     && $type_beg eq '}'
20983                     && ( $nesting_depth_to_go[$iend] + 1 ==
20984                         $nesting_depth_to_go[$ibeg] )
20985                 )
20986
20987                 # remove continuation indentation for any line like
20988                 #       } ... {
20989                 # or without ending '{' and unbalanced, such as
20990                 #       such as '}->{$operator}'
20991                 || (
20992                     $type_beg eq '}'
20993
20994                     && (   $types_to_go[$iend] eq '{'
20995                         || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
20996                 )
20997
20998                 # and when the next line is at a lower indentation level...
20999
21000                 # PATCH #1: and only if the style allows undoing continuation
21001                 # for all closing token types. We should really wait until
21002                 # the indentation of the next line is known and then make
21003                 # a decision, but that would require another pass.
21004
21005                 # PATCH #2: and not if this token is under -xci control
21006                 || (   $level_jump < 0
21007                     && !$some_closing_token_indentation
21008                     && !$rseqno_controlling_my_ci->{$K_beg} )
21009
21010                 # Patch for -wn=2, multiple welded closing tokens
21011                 || (   $i_terminal > $ibeg
21012                     && $is_closing_type{ $types_to_go[$iend] } )
21013
21014                 # Alternate Patch for git #51, isolated closing qw token not
21015                 # outdented if no-delete-old-newlines is set. This works, but
21016                 # a more general patch elsewhere fixes the real problem: ljump.
21017                 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
21018
21019               )
21020             {
21021                 $adjust_indentation = 1;
21022             }
21023
21024             # outdent something like '),'
21025             if (
21026                 $terminal_type eq ','
21027
21028                 # Removed this constraint for -wn
21029                 # OLD: allow just one character before the comma
21030                 # && $i_terminal == $ibeg + 1
21031
21032                 # require LIST environment; otherwise, we may outdent too much -
21033                 # this can happen in calls without parentheses (overload.t);
21034                 && $terminal_is_in_list
21035               )
21036             {
21037                 $adjust_indentation = 1;
21038             }
21039
21040             # undo continuation indentation of a terminal closing token if
21041             # it is the last token before a level decrease.  This will allow
21042             # a closing token to line up with its opening counterpart, and
21043             # avoids an indentation jump larger than 1 level.
21044             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
21045                 && $i_terminal == $ibeg
21046                 && defined($K_beg) )
21047             {
21048                 my $K_next_nonblank = $self->K_next_code($K_beg);
21049
21050                 if ( !$is_bli_beg && defined($K_next_nonblank) ) {
21051                     my $lev        = $rLL->[$K_beg]->[_LEVEL_];
21052                     my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
21053
21054                     # and do not undo ci if it was set by the -xci option
21055                     $adjust_indentation = 1
21056                       if ( $level_next < $lev
21057                         && !$rseqno_controlling_my_ci->{$K_beg} );
21058                 }
21059
21060                 # Patch for RT #96101, in which closing brace of anonymous subs
21061                 # was not outdented.  We should look ahead and see if there is
21062                 # a level decrease at the next token (i.e., a closing token),
21063                 # but right now we do not have that information.  For now
21064                 # we see if we are in a list, and this works well.
21065                 # See test files 'sub*.t' for good test cases.
21066                 if (   $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
21067                     && $terminal_is_in_list
21068                     && !$rOpts->{'indent-closing-brace'} )
21069                 {
21070                     (
21071                         $opening_indentation, $opening_offset,
21072                         $is_leading,          $opening_exists
21073                       )
21074                       = $self->get_opening_indentation( $ibeg, $ri_first,
21075                         $ri_last, $rindentation_list );
21076                     my $indentation = $leading_spaces_to_go[$ibeg];
21077                     if ( defined($opening_indentation)
21078                         && get_spaces($indentation) >
21079                         get_spaces($opening_indentation) )
21080                     {
21081                         $adjust_indentation = 1;
21082                     }
21083                 }
21084             }
21085
21086             # YVES patch 1 of 2:
21087             # Undo ci of line with leading closing eval brace,
21088             # but not beyond the indention of the line with
21089             # the opening brace.
21090             if (   $block_type_to_go[$ibeg] eq 'eval'
21091                 && !$rOpts->{'line-up-parentheses'}
21092                 && !$rOpts->{'indent-closing-brace'} )
21093             {
21094                 (
21095                     $opening_indentation, $opening_offset,
21096                     $is_leading,          $opening_exists
21097                   )
21098                   = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
21099                     $rindentation_list );
21100                 my $indentation = $leading_spaces_to_go[$ibeg];
21101                 if ( defined($opening_indentation)
21102                     && get_spaces($indentation) >
21103                     get_spaces($opening_indentation) )
21104                 {
21105                     $adjust_indentation = 1;
21106                 }
21107             }
21108
21109             # patch for issue git #40: -bli setting has priority
21110             $adjust_indentation = 0 if ($is_bli_beg);
21111
21112             $default_adjust_indentation = $adjust_indentation;
21113
21114             # Now modify default behavior according to user request:
21115             # handle option to indent non-blocks of the form );  };  ];
21116             # But don't do special indentation to something like ')->pack('
21117             if ( !$block_type_to_go[$ibeg] ) {
21118
21119                 # Note that logical padding has already been applied, so we may
21120                 # need to remove some spaces to get a valid hash key.
21121                 my $tok = $tokens_to_go[$ibeg];
21122                 my $cti = $closing_token_indentation{$tok};
21123
21124                 # Fix the value of 'cti' for an isloated non-welded closing qw
21125                 # delimiter.
21126                 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
21127
21128                     # A quote delimiter which is not a container will not have
21129                     # a cti value defined.  In this case use the style of a
21130                     # paren. For example
21131                     #   my @fars = (
21132                     #      qw<
21133                     #        far
21134                     #        farfar
21135                     #        farfars-far
21136                     #      >,
21137                     #   );
21138                     if ( !defined($cti) && length($tok) == 1 ) {
21139
21140                         # something other than ')', '}', ']' ; use flag for ')'
21141                         $cti = $closing_token_indentation{')'};
21142
21143                         # But for now, do not outdent non-container qw
21144                         # delimiters because it would would change existing
21145                         # formatting.
21146                         if ( $tok ne '>' ) { $cti = 3 }
21147                     }
21148
21149                     # A non-welded closing qw cannot currently use -cti=1
21150                     # because that option requires a sequence number to find
21151                     # the opening indentation, and qw quote delimiters are not
21152                     # sequenced items.
21153                     if ( defined($cti) && $cti == 1 ) { $cti = 0 }
21154                 }
21155
21156                 if ( !defined($cti) ) {
21157
21158                     # $cti may not be defined for several reasons.
21159                     # -padding may have been applied so the character
21160                     #  has a length > 1
21161                     # - we may have welded to a closing quote token.
21162                     #   Here is an example (perltidy -wn):
21163                     #       __PACKAGE__->load_components( qw(
21164                     #  >         Core
21165                     #  >
21166                     #  >     ) );
21167                     $adjust_indentation = 0;
21168
21169                 }
21170                 elsif ( $cti == 1 ) {
21171                     if (   $i_terminal <= $ibeg + 1
21172                         || $is_semicolon_terminated )
21173                     {
21174                         $adjust_indentation = 2;
21175                     }
21176                     else {
21177                         $adjust_indentation = 0;
21178                     }
21179                 }
21180                 elsif ( $cti == 2 ) {
21181                     if ($is_semicolon_terminated) {
21182                         $adjust_indentation = 3;
21183                     }
21184                     else {
21185                         $adjust_indentation = 0;
21186                     }
21187                 }
21188                 elsif ( $cti == 3 ) {
21189                     $adjust_indentation = 3;
21190                 }
21191             }
21192
21193             # handle option to indent blocks
21194             else {
21195                 if (
21196                     $rOpts->{'indent-closing-brace'}
21197                     && (
21198                         $i_terminal == $ibeg    #  isolated terminal '}'
21199                         || $is_semicolon_terminated
21200                     )
21201                   )                             #  } xxxx ;
21202                 {
21203                     $adjust_indentation = 3;
21204                 }
21205             }
21206         }
21207
21208         # if at ');', '};', '>;', and '];' of a terminal qw quote
21209         elsif ($rpatterns->[0] =~ /^qb*;$/
21210             && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
21211         {
21212             if ( $closing_token_indentation{$1} == 0 ) {
21213                 $adjust_indentation = 1;
21214             }
21215             else {
21216                 $adjust_indentation = 3;
21217             }
21218         }
21219
21220         # if line begins with a ':', align it with any
21221         # previous line leading with corresponding ?
21222         elsif ( $types_to_go[$ibeg] eq ':' ) {
21223             (
21224                 $opening_indentation, $opening_offset,
21225                 $is_leading,          $opening_exists
21226               )
21227               = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
21228                 $rindentation_list );
21229             if ($is_leading) { $adjust_indentation = 2; }
21230         }
21231
21232         ##########################################################
21233         # Section 2: set indentation according to flag set above
21234         #
21235         # Select the indentation object to define leading
21236         # whitespace.  If we are outdenting something like '} } );'
21237         # then we want to use one level below the last token
21238         # ($i_terminal) in order to get it to fully outdent through
21239         # all levels.
21240         ##########################################################
21241         my $indentation;
21242         my $lev;
21243         my $level_end = $levels_to_go[$iend];
21244
21245         if ( $adjust_indentation == 0 ) {
21246             $indentation = $leading_spaces_to_go[$ibeg];
21247             $lev         = $levels_to_go[$ibeg];
21248         }
21249         elsif ( $adjust_indentation == 1 ) {
21250
21251             # Change the indentation to be that of a different token on the line
21252             # Previously, the indentation of the terminal token was used:
21253             # OLD CODING:
21254             # $indentation = $reduced_spaces_to_go[$i_terminal];
21255             # $lev         = $levels_to_go[$i_terminal];
21256
21257             # Generalization for MOJO:
21258             # Use the lowest level indentation of the tokens on the line.
21259             # For example, here we can use the indentation of the ending ';':
21260             #    } until ($selection > 0 and $selection < 10);   # ok to use ';'
21261             # But this will not outdent if we use the terminal indentation:
21262             #    )->then( sub {      # use indentation of the ->, not the {
21263             # Warning: reduced_spaces_to_go[] may be a reference, do not
21264             # do numerical checks with it
21265
21266             my $i_ind = $ibeg;
21267             $indentation = $reduced_spaces_to_go[$i_ind];
21268             $lev         = $levels_to_go[$i_ind];
21269             while ( $i_ind < $i_terminal ) {
21270                 $i_ind++;
21271                 if ( $levels_to_go[$i_ind] < $lev ) {
21272                     $indentation = $reduced_spaces_to_go[$i_ind];
21273                     $lev         = $levels_to_go[$i_ind];
21274                 }
21275             }
21276         }
21277
21278         # handle indented closing token which aligns with opening token
21279         elsif ( $adjust_indentation == 2 ) {
21280
21281             # handle option to align closing token with opening token
21282             $lev = $levels_to_go[$ibeg];
21283
21284             # calculate spaces needed to align with opening token
21285             my $space_count =
21286               get_spaces($opening_indentation) + $opening_offset;
21287
21288             # Indent less than the previous line.
21289             #
21290             # Problem: For -lp we don't exactly know what it was if there
21291             # were recoverable spaces sent to the aligner.  A good solution
21292             # would be to force a flush of the vertical alignment buffer, so
21293             # that we would know.  For now, this rule is used for -lp:
21294             #
21295             # When the last line did not start with a closing token we will
21296             # be optimistic that the aligner will recover everything wanted.
21297             #
21298             # This rule will prevent us from breaking a hierarchy of closing
21299             # tokens, and in a worst case will leave a closing paren too far
21300             # indented, but this is better than frequently leaving it not
21301             # indented enough.
21302             my $last_spaces = get_spaces($last_indentation_written);
21303             if ( !$is_closing_token{$last_leading_token} ) {
21304                 $last_spaces +=
21305                   get_recoverable_spaces($last_indentation_written);
21306             }
21307
21308             # reset the indentation to the new space count if it works
21309             # only options are all or none: nothing in-between looks good
21310             $lev = $levels_to_go[$ibeg];
21311             if ( $space_count < $last_spaces ) {
21312                 if ($rOpts_line_up_parentheses) {
21313                     my $lev = $levels_to_go[$ibeg];
21314                     $indentation =
21315                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
21316                 }
21317                 else {
21318                     $indentation = $space_count;
21319                 }
21320             }
21321
21322             # revert to default if it doesn't work
21323             else {
21324                 $space_count = leading_spaces_to_go($ibeg);
21325                 if ( $default_adjust_indentation == 0 ) {
21326                     $indentation = $leading_spaces_to_go[$ibeg];
21327                 }
21328                 elsif ( $default_adjust_indentation == 1 ) {
21329                     $indentation = $reduced_spaces_to_go[$i_terminal];
21330                     $lev         = $levels_to_go[$i_terminal];
21331                 }
21332             }
21333         }
21334
21335         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
21336         else {
21337
21338             # handle -icb (indented closing code block braces)
21339             # Updated method for indented block braces: indent one full level if
21340             # there is no continuation indentation.  This will occur for major
21341             # structures such as sub, if, else, but not for things like map
21342             # blocks.
21343             #
21344             # Note: only code blocks without continuation indentation are
21345             # handled here (if, else, unless, ..). In the following snippet,
21346             # the terminal brace of the sort block will have continuation
21347             # indentation as shown so it will not be handled by the coding
21348             # here.  We would have to undo the continuation indentation to do
21349             # this, but it probably looks ok as is.  This is a possible future
21350             # update for semicolon terminated lines.
21351             #
21352             #     if ($sortby eq 'date' or $sortby eq 'size') {
21353             #         @files = sort {
21354             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
21355             #                 or $a cmp $b
21356             #                 } @files;
21357             #         }
21358             #
21359             if (   $block_type_to_go[$ibeg]
21360                 && $ci_levels_to_go[$i_terminal] == 0 )
21361             {
21362                 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
21363                 $indentation = $spaces + $rOpts_indent_columns;
21364
21365                 # NOTE: for -lp we could create a new indentation object, but
21366                 # there is probably no need to do it
21367             }
21368
21369             # handle -icp and any -icb block braces which fall through above
21370             # test such as the 'sort' block mentioned above.
21371             else {
21372
21373                 # There are currently two ways to handle -icp...
21374                 # One way is to use the indentation of the previous line:
21375                 # $indentation = $last_indentation_written;
21376
21377                 # The other way is to use the indentation that the previous line
21378                 # would have had if it hadn't been adjusted:
21379                 $indentation = $last_unadjusted_indentation;
21380
21381                 # Current method: use the minimum of the two. This avoids
21382                 # inconsistent indentation.
21383                 if ( get_spaces($last_indentation_written) <
21384                     get_spaces($indentation) )
21385                 {
21386                     $indentation = $last_indentation_written;
21387                 }
21388             }
21389
21390             # use previous indentation but use own level
21391             # to cause list to be flushed properly
21392             $lev = $levels_to_go[$ibeg];
21393         }
21394
21395         # remember indentation except for multi-line quotes, which get
21396         # no indentation
21397         unless ( $ibeg == 0 && $starting_in_quote ) {
21398             $last_indentation_written    = $indentation;
21399             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
21400             $last_leading_token          = $tokens_to_go[$ibeg];
21401
21402             # Patch to make a line which is the end of a qw quote work with the
21403             # -lp option.  Make $token_beg look like a closing token as some
21404             # type even if it is not.  This veriable will become
21405             # $last_leading_token at the end of this loop.  Then, if the -lp
21406             # style is selected, and the next line is also a
21407             # closing token, it will not get more indentation than this line.
21408             # We need to do this because qw quotes (at present) only get
21409             # continuation indentation, not one level of indentation, so we
21410             # need to turn off the -lp indentation.
21411
21412             # ... a picture is worth a thousand words:
21413
21414             # perltidy -wn -gnu (Without this patch):
21415             #   ok(defined(
21416             #       $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
21417             #       2981014)])
21418             #             ));
21419
21420             # perltidy -wn -gnu (With this patch):
21421             #  ok(defined(
21422             #      $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
21423             #      2981014)])
21424             #  ));
21425             ## if ($seqno_qw_closing) { $last_leading_token = ')' }
21426             if ( $seqno_qw_closing
21427                 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
21428             {
21429                 $last_leading_token = ')';
21430             }
21431         }
21432
21433         # be sure lines with leading closing tokens are not outdented more
21434         # than the line which contained the corresponding opening token.
21435
21436         #############################################################
21437         # updated per bug report in alex_bug.pl: we must not
21438         # mess with the indentation of closing logical braces so
21439         # we must treat something like '} else {' as if it were
21440         # an isolated brace
21441         #############################################################
21442         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
21443           && (
21444             $i_terminal == $ibeg
21445             || $is_if_elsif_else_unless_while_until_for_foreach{
21446                 $block_type_to_go[$ibeg]
21447             }
21448           );
21449
21450         # only do this for a ':; which is aligned with its leading '?'
21451         my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
21452
21453         if (
21454             defined($opening_indentation)
21455             && !$leading_paren_arrow    # MOJO
21456             && !$is_isolated_block_brace
21457             && !$is_unaligned_colon
21458           )
21459         {
21460             if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
21461                 $indentation = $opening_indentation;
21462             }
21463         }
21464
21465         # remember the indentation of each line of this batch
21466         push @{$rindentation_list}, $indentation;
21467
21468         # outdent lines with certain leading tokens...
21469         if (
21470
21471             # must be first word of this batch
21472             $ibeg == 0
21473
21474             # and ...
21475             && (
21476
21477                 # certain leading keywords if requested
21478                 (
21479                        $rOpts->{'outdent-keywords'}
21480                     && $types_to_go[$ibeg] eq 'k'
21481                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
21482                 )
21483
21484                 # or labels if requested
21485                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
21486
21487                 # or static block comments if requested
21488                 || (   $types_to_go[$ibeg] eq '#'
21489                     && $rOpts->{'outdent-static-block-comments'}
21490                     && $is_static_block_comment )
21491             )
21492           )
21493
21494         {
21495             my $space_count = leading_spaces_to_go($ibeg);
21496             if ( $space_count > 0 ) {
21497                 $space_count -= $rOpts_continuation_indentation;
21498                 $is_outdented_line = 1;
21499                 if ( $space_count < 0 ) { $space_count = 0 }
21500
21501                 # do not promote a spaced static block comment to non-spaced;
21502                 # this is not normally necessary but could be for some
21503                 # unusual user inputs (such as -ci = -i)
21504                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
21505                     $space_count = 1;
21506                 }
21507
21508                 if ($rOpts_line_up_parentheses) {
21509                     $indentation =
21510                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
21511                 }
21512                 else {
21513                     $indentation = $space_count;
21514                 }
21515             }
21516         }
21517
21518         return ( $indentation, $lev, $level_end, $terminal_type,
21519             $terminal_block_type, $is_semicolon_terminated,
21520             $is_outdented_line );
21521     }
21522 } ## end closure set_adjusted_indentation
21523
21524 sub get_opening_indentation {
21525
21526     # get the indentation of the line which output the opening token
21527     # corresponding to a given closing token in the current output batch.
21528     #
21529     # given:
21530     # $i_closing - index in this line of a closing token ')' '}' or ']'
21531     #
21532     # $ri_first - reference to list of the first index $i for each output
21533     #               line in this batch
21534     # $ri_last - reference to list of the last index $i for each output line
21535     #              in this batch
21536     # $rindentation_list - reference to a list containing the indentation
21537     #            used for each line.
21538     # $qw_seqno - optional sequence number to use if normal seqno not defined
21539     #           (TODO: would be more general to just look this up from index i)
21540     #
21541     # return:
21542     #   -the indentation of the line which contained the opening token
21543     #    which matches the token at index $i_opening
21544     #   -and its offset (number of columns) from the start of the line
21545     #
21546     my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
21547       = @_;
21548
21549     # first, see if the opening token is in the current batch
21550     my $i_opening = $mate_index_to_go[$i_closing];
21551     my ( $indent, $offset, $is_leading, $exists );
21552     $exists = 1;
21553     if ( defined($i_opening) && $i_opening >= 0 ) {
21554
21555         # it is..look up the indentation
21556         ( $indent, $offset, $is_leading ) =
21557           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
21558             $rindentation_list );
21559     }
21560
21561     # if not, it should have been stored in the hash by a previous batch
21562     else {
21563         my $seqno = $type_sequence_to_go[$i_closing];
21564         $seqno = $qw_seqno unless ($seqno);
21565         ( $indent, $offset, $is_leading, $exists ) =
21566           get_saved_opening_indentation($seqno);
21567     }
21568     return ( $indent, $offset, $is_leading, $exists );
21569 }
21570
21571 sub set_vertical_tightness_flags {
21572
21573     my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
21574         $ending_in_quote, $closing_side_comment )
21575       = @_;
21576
21577     # Define vertical tightness controls for the nth line of a batch.
21578     # We create an array of parameters which tell the vertical aligner
21579     # if we should combine this line with the next line to achieve the
21580     # desired vertical tightness.  The array of parameters contains:
21581     #
21582     #   [0] type: 1=opening non-block    2=closing non-block
21583     #             3=opening block brace  4=closing block brace
21584     #
21585     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
21586     #             if closing: spaces of padding to use
21587     #   [2] sequence number of container
21588     #   [3] valid flag: do not append if this flag is false. Will be
21589     #       true if appropriate -vt flag is set.  Otherwise, Will be
21590     #       made true only for 2 line container in parens with -lp
21591     #
21592     # These flags are used by sub set_leading_whitespace in
21593     # the vertical aligner
21594
21595     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
21596
21597     # The vertical tightness mechanism can add whitespace, so whitespace can
21598     # continually increase if we allowed it when the -fws flag is set.
21599     # See case b499 for an example.
21600     return $rvertical_tightness_flags if ($rOpts_freeze_whitespace);
21601
21602     # Uses these parameters:
21603     #   $rOpts_block_brace_tightness
21604     #   $rOpts_block_brace_vertical_tightness
21605     #   $rOpts_stack_closing_block_brace
21606     #   %opening_vertical_tightness
21607     #   %closing_vertical_tightness
21608     #   %opening_token_right
21609     #   %stack_closing_token
21610     #   %stack_opening_token
21611
21612     #--------------------------------------------------------------
21613     # Vertical Tightness Flags Section 1:
21614     # Handle Lines 1 .. n-1 but not the last line
21615     # For non-BLOCK tokens, we will need to examine the next line
21616     # too, so we won't consider the last line.
21617     #--------------------------------------------------------------
21618     if ( $n < $n_last_line ) {
21619
21620         #--------------------------------------------------------------
21621         # Vertical Tightness Flags Section 1a:
21622         # Look for Type 1, last token of this line is a non-block opening token
21623         #--------------------------------------------------------------
21624         my $ibeg_next = $ri_first->[ $n + 1 ];
21625         my $token_end = $tokens_to_go[$iend];
21626         my $iend_next = $ri_last->[ $n + 1 ];
21627         if (
21628                $type_sequence_to_go[$iend]
21629             && !$block_type_to_go[$iend]
21630             && $is_opening_token{$token_end}
21631             && (
21632                 $opening_vertical_tightness{$token_end} > 0
21633
21634                 # allow 2-line method call to be closed up
21635                 || (   $rOpts_line_up_parentheses
21636                     && $token_end eq '('
21637                     && $iend > $ibeg
21638                     && $types_to_go[ $iend - 1 ] ne 'b' )
21639             )
21640           )
21641         {
21642
21643             # avoid multiple jumps in nesting depth in one line if
21644             # requested
21645             my $ovt       = $opening_vertical_tightness{$token_end};
21646             my $iend_next = $ri_last->[ $n + 1 ];
21647             unless (
21648                 $ovt < 2
21649                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
21650                     $nesting_depth_to_go[$ibeg_next] )
21651               )
21652             {
21653
21654                 # If -vt flag has not been set, mark this as invalid
21655                 # and aligner will validate it if it sees the closing paren
21656                 # within 2 lines.
21657                 my $valid_flag = $ovt;
21658                 @{$rvertical_tightness_flags} =
21659                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
21660             }
21661         }
21662
21663         #--------------------------------------------------------------
21664         # Vertical Tightness Flags Section 1b:
21665         # Look for Type 2, first token of next line is a non-block closing
21666         # token .. and be sure this line does not have a side comment
21667         #--------------------------------------------------------------
21668         my $token_next = $tokens_to_go[$ibeg_next];
21669         if (   $type_sequence_to_go[$ibeg_next]
21670             && !$block_type_to_go[$ibeg_next]
21671             && $is_closing_token{$token_next}
21672             && $types_to_go[$iend] ne '#' )    # for safety, shouldn't happen!
21673         {
21674             my $ovt = $opening_vertical_tightness{$token_next};
21675             my $cvt = $closing_vertical_tightness{$token_next};
21676
21677             # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
21678             # otherwise.  Added for rt136417.
21679             if ( $cvt == 3 ) {
21680                 my $seqno = $type_sequence_to_go[$ibeg_next];
21681                 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
21682             }
21683
21684             if (
21685
21686                 # Never append a trailing line like   ')->pack(' because it
21687                 # will throw off later alignment.  So this line must start at a
21688                 # deeper level than the next line (fix1 for welding, git #45).
21689                 (
21690                     $nesting_depth_to_go[$ibeg_next] >=
21691                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
21692                 )
21693                 && (
21694                     $cvt == 2
21695                     || (
21696                         !$self->is_in_list_by_i($ibeg_next)
21697                         && (
21698                             $cvt == 1
21699
21700                             # allow closing up 2-line method calls
21701                             || (   $rOpts_line_up_parentheses
21702                                 && $token_next eq ')' )
21703                         )
21704                     )
21705                 )
21706               )
21707             {
21708
21709                 # decide which trailing closing tokens to append..
21710                 my $ok = 0;
21711                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
21712                 else {
21713                     my $str = join( '',
21714                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
21715
21716                     # append closing token if followed by comment or ';'
21717                     # or another closing token (fix2 for welding, git #45)
21718                     if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
21719                 }
21720
21721                 if ($ok) {
21722                     my $valid_flag = $cvt;
21723                     @{$rvertical_tightness_flags} = (
21724                         2,
21725                         $tightness{$token_next} == 2 ? 0 : 1,
21726                         $type_sequence_to_go[$ibeg_next], $valid_flag,
21727                     );
21728                 }
21729             }
21730         }
21731
21732         #--------------------------------------------------------------
21733         # Vertical Tightness Flags Section 1c:
21734         # Implement the Opening Token Right flag (Type 2)..
21735         # If requested, move an isolated trailing opening token to the end of
21736         # the previous line which ended in a comma.  We could do this
21737         # in sub recombine_breakpoints but that would cause problems
21738         # with -lp formatting.  The problem is that indentation will
21739         # quickly move far to the right in nested expressions.  By
21740         # doing it after indentation has been set, we avoid changes
21741         # to the indentation.  Actual movement of the token takes place
21742         # in sub valign_output_step_B.
21743
21744         # Note added 4 May 2021: the man page suggests that the -otr flags
21745         # are mainly for opening tokens following commas.  But this seems
21746         # to have been generalized long ago to include other situations.
21747         # I checked the coding back to 2012 and it is essentially the same
21748         # as here, so it is best to leave this unchanged for now.
21749         #--------------------------------------------------------------
21750         if (
21751             $opening_token_right{ $tokens_to_go[$ibeg_next] }
21752
21753             # previous line is not opening
21754             # (use -sot to combine with it)
21755             && !$is_opening_token{$token_end}
21756
21757             # previous line ended in one of these
21758             # (add other cases if necessary; '=>' and '.' are not necessary
21759             && !$block_type_to_go[$ibeg_next]
21760
21761             # this is a line with just an opening token
21762             && (   $iend_next == $ibeg_next
21763                 || $iend_next == $ibeg_next + 2
21764                 && $types_to_go[$iend_next] eq '#' )
21765
21766             # Fix for case b1060 when both -baoo and -otr are set:
21767             # to avoid blinking, honor the -baoo flag over the -otr flag.
21768             && $token_end ne '||' && $token_end ne '&&'
21769
21770             # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
21771             && !( $token_end eq '=' && $rOpts_line_up_parentheses )
21772
21773             # looks bad if we align vertically with the wrong container
21774             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
21775           )
21776         {
21777             my $valid_flag = 1;
21778             my $spaces     = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
21779             @{$rvertical_tightness_flags} =
21780               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
21781         }
21782
21783         #--------------------------------------------------------------
21784         # Vertical Tightness Flags Section 1d:
21785         # Stacking of opening and closing tokens (Type 2)
21786         #--------------------------------------------------------------
21787         my $stackable;
21788         my $token_beg_next = $tokens_to_go[$ibeg_next];
21789
21790         # patch to make something like 'qw(' behave like an opening paren
21791         # (aran.t)
21792         if ( $types_to_go[$ibeg_next] eq 'q' ) {
21793             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
21794                 $token_beg_next = $1;
21795             }
21796         }
21797
21798         if (   $is_closing_token{$token_end}
21799             && $is_closing_token{$token_beg_next} )
21800         {
21801             $stackable = $stack_closing_token{$token_beg_next}
21802               unless ( $block_type_to_go[$ibeg_next] )
21803               ;    # shouldn't happen; just checking
21804         }
21805         elsif ($is_opening_token{$token_end}
21806             && $is_opening_token{$token_beg_next} )
21807         {
21808             $stackable = $stack_opening_token{$token_beg_next}
21809               unless ( $block_type_to_go[$ibeg_next] )
21810               ;    # shouldn't happen; just checking
21811         }
21812
21813         if ($stackable) {
21814
21815             my $is_semicolon_terminated;
21816             if ( $n + 1 == $n_last_line ) {
21817                 my ( $terminal_type, $i_terminal ) =
21818                   terminal_type_i( $ibeg_next, $iend_next );
21819                 $is_semicolon_terminated = $terminal_type eq ';'
21820                   && $nesting_depth_to_go[$iend_next] <
21821                   $nesting_depth_to_go[$ibeg_next];
21822             }
21823
21824             # this must be a line with just an opening token
21825             # or end in a semicolon
21826             if (
21827                 $is_semicolon_terminated
21828                 || (   $iend_next == $ibeg_next
21829                     || $iend_next == $ibeg_next + 2
21830                     && $types_to_go[$iend_next] eq '#' )
21831               )
21832             {
21833                 my $valid_flag = 1;
21834                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
21835                 @{$rvertical_tightness_flags} = (
21836                     2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
21837                 );
21838             }
21839         }
21840     }
21841
21842     #--------------------------------------------------------------
21843     # Vertical Tightness Flags Section 2:
21844     # Handle type 3, opening block braces on last line of the batch
21845     # Check for a last line with isolated opening BLOCK curly
21846     #--------------------------------------------------------------
21847     elsif ($rOpts_block_brace_vertical_tightness
21848         && $ibeg eq $iend
21849         && $types_to_go[$iend] eq '{'
21850         && $block_type_to_go[$iend] =~
21851         /$block_brace_vertical_tightness_pattern/ )
21852     {
21853         @{$rvertical_tightness_flags} =
21854           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
21855     }
21856
21857     #--------------------------------------------------------------
21858     # Vertical Tightness Flags Section 3:
21859     # Handle type 4, a closing block brace on the last line of the batch Check
21860     # for a last line with isolated closing BLOCK curly
21861     # Patch: added a check for any new closing side comment which the
21862     # -csc option may generate. If it exists, there will be a side comment
21863     # so we cannot combine with a brace on the next line.  This issue
21864     # occurs for the combination -scbb and -csc is used.
21865     #--------------------------------------------------------------
21866     elsif ($rOpts_stack_closing_block_brace
21867         && $ibeg eq $iend
21868         && $block_type_to_go[$iend]
21869         && $types_to_go[$iend] eq '}'
21870         && ( !$closing_side_comment || $n < $n_last_line ) )
21871     {
21872         my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
21873         @{$rvertical_tightness_flags} =
21874           ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
21875     }
21876
21877     # pack in the sequence numbers of the ends of this line
21878     my $seqno_beg = $type_sequence_to_go[$ibeg];
21879     if ( !$seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
21880         $seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
21881     }
21882     my $seqno_end = $type_sequence_to_go[$iend];
21883     if ( !$seqno_end && $types_to_go[$iend] eq 'q' ) {
21884         $seqno_end = $self->get_seqno( $iend, $ending_in_quote );
21885     }
21886     $rvertical_tightness_flags->[4] = $seqno_beg;
21887     $rvertical_tightness_flags->[5] = $seqno_end;
21888     return $rvertical_tightness_flags;
21889 }
21890
21891 ##########################################################
21892 # CODE SECTION 14: Code for creating closing side comments
21893 ##########################################################
21894
21895 {    ## begin closure accumulate_csc_text
21896
21897 # These routines are called once per batch when the --closing-side-comments flag
21898 # has been set.
21899
21900     my %block_leading_text;
21901     my %block_opening_line_number;
21902     my $csc_new_statement_ok;
21903     my $csc_last_label;
21904     my %csc_block_label;
21905     my $accumulating_text_for_block;
21906     my $leading_block_text;
21907     my $rleading_block_if_elsif_text;
21908     my $leading_block_text_level;
21909     my $leading_block_text_length_exceeded;
21910     my $leading_block_text_line_length;
21911     my $leading_block_text_line_number;
21912
21913     sub initialize_csc_vars {
21914         %block_leading_text           = ();
21915         %block_opening_line_number    = ();
21916         $csc_new_statement_ok         = 1;
21917         $csc_last_label               = "";
21918         %csc_block_label              = ();
21919         $rleading_block_if_elsif_text = [];
21920         $accumulating_text_for_block  = "";
21921         reset_block_text_accumulator();
21922         return;
21923     }
21924
21925     sub reset_block_text_accumulator {
21926
21927         # save text after 'if' and 'elsif' to append after 'else'
21928         if ($accumulating_text_for_block) {
21929
21930             if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
21931                 push @{$rleading_block_if_elsif_text}, $leading_block_text;
21932             }
21933         }
21934         $accumulating_text_for_block        = "";
21935         $leading_block_text                 = "";
21936         $leading_block_text_level           = 0;
21937         $leading_block_text_length_exceeded = 0;
21938         $leading_block_text_line_number     = 0;
21939         $leading_block_text_line_length     = 0;
21940         return;
21941     }
21942
21943     sub set_block_text_accumulator {
21944         my ( $self, $i ) = @_;
21945         $accumulating_text_for_block = $tokens_to_go[$i];
21946         if ( $accumulating_text_for_block !~ /^els/ ) {
21947             $rleading_block_if_elsif_text = [];
21948         }
21949         $leading_block_text                 = "";
21950         $leading_block_text_level           = $levels_to_go[$i];
21951         $leading_block_text_line_number     = $self->get_output_line_number();
21952         $leading_block_text_length_exceeded = 0;
21953
21954         # this will contain the column number of the last character
21955         # of the closing side comment
21956         $leading_block_text_line_length =
21957           length($csc_last_label) +
21958           length($accumulating_text_for_block) +
21959           length( $rOpts->{'closing-side-comment-prefix'} ) +
21960           $leading_block_text_level * $rOpts_indent_columns + 3;
21961         return;
21962     }
21963
21964     sub accumulate_block_text {
21965         my ( $self, $i ) = @_;
21966
21967         # accumulate leading text for -csc, ignoring any side comments
21968         if (   $accumulating_text_for_block
21969             && !$leading_block_text_length_exceeded
21970             && $types_to_go[$i] ne '#' )
21971         {
21972
21973             my $added_length = $token_lengths_to_go[$i];
21974             $added_length += 1 if $i == 0;
21975             my $new_line_length =
21976               $leading_block_text_line_length + $added_length;
21977
21978             # we can add this text if we don't exceed some limits..
21979             if (
21980
21981                 # we must not have already exceeded the text length limit
21982                 length($leading_block_text) <
21983                 $rOpts_closing_side_comment_maximum_text
21984
21985                 # and either:
21986                 # the new total line length must be below the line length limit
21987                 # or the new length must be below the text length limit
21988                 # (ie, we may allow one token to exceed the text length limit)
21989                 && (
21990                     $new_line_length <
21991                     $maximum_line_length_at_level[$leading_block_text_level]
21992
21993                     || length($leading_block_text) + $added_length <
21994                     $rOpts_closing_side_comment_maximum_text
21995                 )
21996
21997                # UNLESS: we are adding a closing paren before the brace we seek.
21998                # This is an attempt to avoid situations where the ... to be
21999                # added are longer than the omitted right paren, as in:
22000
22001              #   foreach my $item (@a_rather_long_variable_name_here) {
22002              #      &whatever;
22003              #   } ## end foreach my $item (@a_rather_long_variable_name_here...
22004
22005                 || (
22006                     $tokens_to_go[$i] eq ')'
22007                     && (
22008                         (
22009                                $i + 1 <= $max_index_to_go
22010                             && $block_type_to_go[ $i + 1 ] eq
22011                             $accumulating_text_for_block
22012                         )
22013                         || (   $i + 2 <= $max_index_to_go
22014                             && $block_type_to_go[ $i + 2 ] eq
22015                             $accumulating_text_for_block )
22016                     )
22017                 )
22018               )
22019             {
22020
22021                 # add an extra space at each newline
22022                 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
22023                     $leading_block_text .= ' ';
22024                 }
22025
22026                 # add the token text
22027                 $leading_block_text .= $tokens_to_go[$i];
22028                 $leading_block_text_line_length = $new_line_length;
22029             }
22030
22031             # show that text was truncated if necessary
22032             elsif ( $types_to_go[$i] ne 'b' ) {
22033                 $leading_block_text_length_exceeded = 1;
22034                 $leading_block_text .= '...';
22035             }
22036         }
22037         return;
22038     }
22039
22040     sub accumulate_csc_text {
22041
22042         my ($self) = @_;
22043
22044         # called once per output buffer when -csc is used. Accumulates
22045         # the text placed after certain closing block braces.
22046         # Defines and returns the following for this buffer:
22047
22048         my $block_leading_text = "";    # the leading text of the last '}'
22049         my $rblock_leading_if_elsif_text;
22050         my $i_block_leading_text =
22051           -1;    # index of token owning block_leading_text
22052         my $block_line_count    = 100;    # how many lines the block spans
22053         my $terminal_type       = 'b';    # type of last nonblank token
22054         my $i_terminal          = 0;      # index of last nonblank token
22055         my $terminal_block_type = "";
22056
22057         # update most recent statement label
22058         $csc_last_label = "" unless ($csc_last_label);
22059         if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
22060         my $block_label = $csc_last_label;
22061
22062         # Loop over all tokens of this batch
22063         for my $i ( 0 .. $max_index_to_go ) {
22064             my $type       = $types_to_go[$i];
22065             my $block_type = $block_type_to_go[$i];
22066             my $token      = $tokens_to_go[$i];
22067
22068             # remember last nonblank token type
22069             if ( $type ne '#' && $type ne 'b' ) {
22070                 $terminal_type       = $type;
22071                 $terminal_block_type = $block_type;
22072                 $i_terminal          = $i;
22073             }
22074
22075             my $type_sequence = $type_sequence_to_go[$i];
22076             if ( $block_type && $type_sequence ) {
22077
22078                 if ( $token eq '}' ) {
22079
22080                     # restore any leading text saved when we entered this block
22081                     if ( defined( $block_leading_text{$type_sequence} ) ) {
22082                         ( $block_leading_text, $rblock_leading_if_elsif_text )
22083                           = @{ $block_leading_text{$type_sequence} };
22084                         $i_block_leading_text = $i;
22085                         delete $block_leading_text{$type_sequence};
22086                         $rleading_block_if_elsif_text =
22087                           $rblock_leading_if_elsif_text;
22088                     }
22089
22090                     if ( defined( $csc_block_label{$type_sequence} ) ) {
22091                         $block_label = $csc_block_label{$type_sequence};
22092                         delete $csc_block_label{$type_sequence};
22093                     }
22094
22095                     # if we run into a '}' then we probably started accumulating
22096                     # at something like a trailing 'if' clause..no harm done.
22097                     if (   $accumulating_text_for_block
22098                         && $levels_to_go[$i] <= $leading_block_text_level )
22099                     {
22100                         my $lev = $levels_to_go[$i];
22101                         reset_block_text_accumulator();
22102                     }
22103
22104                     if ( defined( $block_opening_line_number{$type_sequence} ) )
22105                     {
22106                         my $output_line_number =
22107                           $self->get_output_line_number();
22108                         $block_line_count =
22109                           $output_line_number -
22110                           $block_opening_line_number{$type_sequence} + 1;
22111                         delete $block_opening_line_number{$type_sequence};
22112                     }
22113                     else {
22114
22115                         # Error: block opening line undefined for this line..
22116                         # This shouldn't be possible, but it is not a
22117                         # significant problem.
22118                     }
22119                 }
22120
22121                 elsif ( $token eq '{' ) {
22122
22123                     my $line_number = $self->get_output_line_number();
22124                     $block_opening_line_number{$type_sequence} = $line_number;
22125
22126                     # set a label for this block, except for
22127                     # a bare block which already has the label
22128                     # A label can only be used on the next {
22129                     if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
22130                     $csc_block_label{$type_sequence} = $csc_last_label;
22131                     $csc_last_label = "";
22132
22133                     if (   $accumulating_text_for_block
22134                         && $levels_to_go[$i] == $leading_block_text_level )
22135                     {
22136
22137                         if ( $accumulating_text_for_block eq $block_type ) {
22138
22139                             # save any leading text before we enter this block
22140                             $block_leading_text{$type_sequence} = [
22141                                 $leading_block_text,
22142                                 $rleading_block_if_elsif_text
22143                             ];
22144                             $block_opening_line_number{$type_sequence} =
22145                               $leading_block_text_line_number;
22146                             reset_block_text_accumulator();
22147                         }
22148                         else {
22149
22150                             # shouldn't happen, but not a serious error.
22151                             # We were accumulating -csc text for block type
22152                             # $accumulating_text_for_block and unexpectedly
22153                             # encountered a '{' for block type $block_type.
22154                         }
22155                     }
22156                 }
22157             }
22158
22159             if (   $type eq 'k'
22160                 && $csc_new_statement_ok
22161                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
22162                 && $token =~ /$closing_side_comment_list_pattern/ )
22163             {
22164                 $self->set_block_text_accumulator($i);
22165             }
22166             else {
22167
22168                 # note: ignoring type 'q' because of tricks being played
22169                 # with 'q' for hanging side comments
22170                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
22171                     $csc_new_statement_ok =
22172                       ( $block_type || $type eq 'J' || $type eq ';' );
22173                 }
22174                 if (   $type eq ';'
22175                     && $accumulating_text_for_block
22176                     && $levels_to_go[$i] == $leading_block_text_level )
22177                 {
22178                     reset_block_text_accumulator();
22179                 }
22180                 else {
22181                     $self->accumulate_block_text($i);
22182                 }
22183             }
22184         }
22185
22186         # Treat an 'else' block specially by adding preceding 'if' and
22187         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
22188         # especially for cuddled-else formatting.
22189         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
22190             $block_leading_text =
22191               $self->make_else_csc_text( $i_terminal, $terminal_block_type,
22192                 $block_leading_text, $rblock_leading_if_elsif_text );
22193         }
22194
22195         # if this line ends in a label then remember it for the next pass
22196         $csc_last_label = "";
22197         if ( $terminal_type eq 'J' ) {
22198             $csc_last_label = $tokens_to_go[$i_terminal];
22199         }
22200
22201         return ( $terminal_type, $i_terminal, $i_block_leading_text,
22202             $block_leading_text, $block_line_count, $block_label );
22203     }
22204
22205     sub make_else_csc_text {
22206
22207         # create additional -csc text for an 'else' and optionally 'elsif',
22208         # depending on the value of switch
22209         #
22210         #  = 0 add 'if' text to trailing else
22211         #  = 1 same as 0 plus:
22212         #      add 'if' to 'elsif's if can fit in line length
22213         #      add last 'elsif' to trailing else if can fit in one line
22214         #  = 2 same as 1 but do not check if exceed line length
22215         #
22216         # $rif_elsif_text = a reference to a list of all previous closing
22217         # side comments created for this if block
22218         #
22219         my ( $self, $i_terminal, $block_type, $block_leading_text,
22220             $rif_elsif_text )
22221           = @_;
22222         my $csc_text = $block_leading_text;
22223
22224         if (   $block_type eq 'elsif'
22225             && $rOpts_closing_side_comment_else_flag == 0 )
22226         {
22227             return $csc_text;
22228         }
22229
22230         my $count = @{$rif_elsif_text};
22231         return $csc_text unless ($count);
22232
22233         my $if_text = '[ if' . $rif_elsif_text->[0];
22234
22235         # always show the leading 'if' text on 'else'
22236         if ( $block_type eq 'else' ) {
22237             $csc_text .= $if_text;
22238         }
22239
22240         # see if that's all
22241         if ( $rOpts_closing_side_comment_else_flag == 0 ) {
22242             return $csc_text;
22243         }
22244
22245         my $last_elsif_text = "";
22246         if ( $count > 1 ) {
22247             $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
22248             if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
22249         }
22250
22251         # tentatively append one more item
22252         my $saved_text = $csc_text;
22253         if ( $block_type eq 'else' ) {
22254             $csc_text .= $last_elsif_text;
22255         }
22256         else {
22257             $csc_text .= ' ' . $if_text;
22258         }
22259
22260         # all done if no length checks requested
22261         if ( $rOpts_closing_side_comment_else_flag == 2 ) {
22262             return $csc_text;
22263         }
22264
22265         # undo it if line length exceeded
22266         my $length =
22267           length($csc_text) +
22268           length($block_type) +
22269           length( $rOpts->{'closing-side-comment-prefix'} ) +
22270           $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
22271         if (
22272             $length > $maximum_line_length_at_level[$leading_block_text_level] )
22273         {
22274             $csc_text = $saved_text;
22275         }
22276         return $csc_text;
22277     }
22278 } ## end closure accumulate_csc_text
22279
22280 {    ## begin closure balance_csc_text
22281
22282     # Some additional routines for handling the --closing-side-comments option
22283
22284     my %matching_char;
22285
22286     BEGIN {
22287         %matching_char = (
22288             '{' => '}',
22289             '(' => ')',
22290             '[' => ']',
22291             '}' => '{',
22292             ')' => '(',
22293             ']' => '[',
22294         );
22295     }
22296
22297     sub balance_csc_text {
22298
22299         # Append characters to balance a closing side comment so that editors
22300         # such as vim can correctly jump through code.
22301         # Simple Example:
22302         #  input  = ## end foreach my $foo ( sort { $b  ...
22303         #  output = ## end foreach my $foo ( sort { $b  ...})
22304
22305         # NOTE: This routine does not currently filter out structures within
22306         # quoted text because the bounce algorithms in text editors do not
22307         # necessarily do this either (a version of vim was checked and
22308         # did not do this).
22309
22310         # Some complex examples which will cause trouble for some editors:
22311         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
22312         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
22313         #  if ( $1 eq '{' ) {
22314         # test file test1/braces.pl has many such examples.
22315
22316         my ($csc) = @_;
22317
22318         # loop to examine characters one-by-one, RIGHT to LEFT and
22319         # build a balancing ending, LEFT to RIGHT.
22320         for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
22321
22322             my $char = substr( $csc, $pos, 1 );
22323
22324             # ignore everything except structural characters
22325             next unless ( $matching_char{$char} );
22326
22327             # pop most recently appended character
22328             my $top = chop($csc);
22329
22330             # push it back plus the mate to the newest character
22331             # unless they balance each other.
22332             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
22333         }
22334
22335         # return the balanced string
22336         return $csc;
22337     }
22338 } ## end closure balance_csc_text
22339
22340 sub add_closing_side_comment {
22341
22342     my $self = shift;
22343     my $rLL  = $self->[_rLL_];
22344
22345     # add closing side comments after closing block braces if -csc used
22346     my ( $closing_side_comment, $cscw_block_comment );
22347
22348     #---------------------------------------------------------------
22349     # Step 1: loop through all tokens of this line to accumulate
22350     # the text needed to create the closing side comments. Also see
22351     # how the line ends.
22352     #---------------------------------------------------------------
22353
22354     my ( $terminal_type, $i_terminal, $i_block_leading_text,
22355         $block_leading_text, $block_line_count, $block_label )
22356       = $self->accumulate_csc_text();
22357
22358     #---------------------------------------------------------------
22359     # Step 2: make the closing side comment if this ends a block
22360     #---------------------------------------------------------------
22361     my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
22362
22363     # if this line might end in a block closure..
22364     if (
22365         $terminal_type eq '}'
22366
22367         # ..and either
22368         && (
22369
22370             # the block is long enough
22371             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
22372
22373             # or there is an existing comment to check
22374             || (   $have_side_comment
22375                 && $rOpts->{'closing-side-comment-warnings'} )
22376         )
22377
22378         # .. and if this is one of the types of interest
22379         && $block_type_to_go[$i_terminal] =~
22380         /$closing_side_comment_list_pattern/
22381
22382         # .. but not an anonymous sub
22383         # These are not normally of interest, and their closing braces are
22384         # often followed by commas or semicolons anyway.  This also avoids
22385         # possible erratic output due to line numbering inconsistencies
22386         # in the cases where their closing braces terminate a line.
22387         && $block_type_to_go[$i_terminal] ne 'sub'
22388
22389         # ..and the corresponding opening brace must is not in this batch
22390         # (because we do not need to tag one-line blocks, although this
22391         # should also be caught with a positive -csci value)
22392         && $mate_index_to_go[$i_terminal] < 0
22393
22394         # ..and either
22395         && (
22396
22397             # this is the last token (line doesn't have a side comment)
22398             !$have_side_comment
22399
22400             # or the old side comment is a closing side comment
22401             || $tokens_to_go[$max_index_to_go] =~
22402             /$closing_side_comment_prefix_pattern/
22403         )
22404       )
22405     {
22406
22407         # then make the closing side comment text
22408         if ($block_label) { $block_label .= " " }
22409         my $token =
22410 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
22411
22412         # append any extra descriptive text collected above
22413         if ( $i_block_leading_text == $i_terminal ) {
22414             $token .= $block_leading_text;
22415         }
22416
22417         $token = balance_csc_text($token)
22418           if $rOpts->{'closing-side-comments-balanced'};
22419
22420         $token =~ s/\s*$//;    # trim any trailing whitespace
22421
22422         # handle case of existing closing side comment
22423         if ($have_side_comment) {
22424
22425             # warn if requested and tokens differ significantly
22426             if ( $rOpts->{'closing-side-comment-warnings'} ) {
22427                 my $old_csc = $tokens_to_go[$max_index_to_go];
22428                 my $new_csc = $token;
22429                 $new_csc =~ s/\s+//g;            # trim all whitespace
22430                 $old_csc =~ s/\s+//g;            # trim all whitespace
22431                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
22432                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
22433                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
22434                 my $new_trailing_dots = $1;
22435                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
22436
22437                 # Patch to handle multiple closing side comments at
22438                 # else and elsif's.  These have become too complicated
22439                 # to check, so if we see an indication of
22440                 # '[ if' or '[ # elsif', then assume they were made
22441                 # by perltidy.
22442                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
22443                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
22444                 }
22445                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
22446                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
22447                 }
22448
22449                 # if old comment is contained in new comment,
22450                 # only compare the common part.
22451                 if ( length($new_csc) > length($old_csc) ) {
22452                     $new_csc = substr( $new_csc, 0, length($old_csc) );
22453                 }
22454
22455                 # if the new comment is shorter and has been limited,
22456                 # only compare the common part.
22457                 if ( length($new_csc) < length($old_csc)
22458                     && $new_trailing_dots )
22459                 {
22460                     $old_csc = substr( $old_csc, 0, length($new_csc) );
22461                 }
22462
22463                 # any remaining difference?
22464                 if ( $new_csc ne $old_csc ) {
22465
22466                     # just leave the old comment if we are below the threshold
22467                     # for creating side comments
22468                     if ( $block_line_count <
22469                         $rOpts->{'closing-side-comment-interval'} )
22470                     {
22471                         $token = undef;
22472                     }
22473
22474                     # otherwise we'll make a note of it
22475                     else {
22476
22477                         warning(
22478 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
22479                         );
22480
22481                         # save the old side comment in a new trailing block
22482                         # comment
22483                         my $timestamp = "";
22484                         if ( $rOpts->{'timestamp'} ) {
22485                             my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
22486                             $year  += 1900;
22487                             $month += 1;
22488                             $timestamp = "$year-$month-$day";
22489                         }
22490                         $cscw_block_comment =
22491 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
22492 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
22493                     }
22494                 }
22495                 else {
22496
22497                     # No differences.. we can safely delete old comment if we
22498                     # are below the threshold
22499                     if ( $block_line_count <
22500                         $rOpts->{'closing-side-comment-interval'} )
22501                     {
22502                         $token = undef;
22503                         $self->unstore_token_to_go()
22504                           if ( $types_to_go[$max_index_to_go] eq '#' );
22505                         $self->unstore_token_to_go()
22506                           if ( $types_to_go[$max_index_to_go] eq 'b' );
22507                     }
22508                 }
22509             }
22510
22511             # switch to the new csc (unless we deleted it!)
22512             if ($token) {
22513                 $tokens_to_go[$max_index_to_go] = $token;
22514                 my $K = $K_to_go[$max_index_to_go];
22515                 $rLL->[$K]->[_TOKEN_] = $token;
22516                 $rLL->[$K]->[_TOKEN_LENGTH_] =
22517                   length($token);    # NOTE: length no longer important
22518             }
22519         }
22520
22521         # handle case of NO existing closing side comment
22522         else {
22523
22524             # To avoid inserting a new token in the token arrays, we
22525             # will just return the new side comment so that it can be
22526             # inserted just before it is needed in the call to the
22527             # vertical aligner.
22528             $closing_side_comment = $token;
22529         }
22530     }
22531     return ( $closing_side_comment, $cscw_block_comment );
22532 }
22533
22534 ############################
22535 # CODE SECTION 15: Summarize
22536 ############################
22537
22538 sub wrapup {
22539
22540     # This is the last routine called when a file is formatted.
22541     # Flush buffer and write any informative messages
22542     my $self = shift;
22543
22544     $self->flush();
22545     my $file_writer_object = $self->[_file_writer_object_];
22546     $file_writer_object->decrement_output_line_number()
22547       ;    # fix up line number since it was incremented
22548     we_are_at_the_last_line();
22549     my $added_semicolon_count    = $self->[_added_semicolon_count_];
22550     my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
22551     my $last_added_semicolon_at  = $self->[_last_added_semicolon_at_];
22552
22553     if ( $added_semicolon_count > 0 ) {
22554         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
22555         my $what =
22556           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
22557         write_logfile_entry("$added_semicolon_count $what added:\n");
22558         write_logfile_entry(
22559             "  $first at input line $first_added_semicolon_at\n");
22560
22561         if ( $added_semicolon_count > 1 ) {
22562             write_logfile_entry(
22563                 "   Last at input line $last_added_semicolon_at\n");
22564         }
22565         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
22566         write_logfile_entry("\n");
22567     }
22568
22569     my $deleted_semicolon_count    = $self->[_deleted_semicolon_count_];
22570     my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
22571     my $last_deleted_semicolon_at  = $self->[_last_deleted_semicolon_at_];
22572     if ( $deleted_semicolon_count > 0 ) {
22573         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
22574         my $what =
22575           ( $deleted_semicolon_count > 1 )
22576           ? "semicolons were"
22577           : "semicolon was";
22578         write_logfile_entry(
22579             "$deleted_semicolon_count unnecessary $what deleted:\n");
22580         write_logfile_entry(
22581             "  $first at input line $first_deleted_semicolon_at\n");
22582
22583         if ( $deleted_semicolon_count > 1 ) {
22584             write_logfile_entry(
22585                 "   Last at input line $last_deleted_semicolon_at\n");
22586         }
22587         write_logfile_entry("  (Use -ndsm to prevent semicolon deletion)\n");
22588         write_logfile_entry("\n");
22589     }
22590
22591     my $embedded_tab_count    = $self->[_embedded_tab_count_];
22592     my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
22593     my $last_embedded_tab_at  = $self->[_last_embedded_tab_at_];
22594     if ( $embedded_tab_count > 0 ) {
22595         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
22596         my $what =
22597           ( $embedded_tab_count > 1 )
22598           ? "quotes or patterns"
22599           : "quote or pattern";
22600         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
22601         write_logfile_entry(
22602 "This means the display of this script could vary with device or software\n"
22603         );
22604         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
22605
22606         if ( $embedded_tab_count > 1 ) {
22607             write_logfile_entry(
22608                 "   Last at input line $last_embedded_tab_at\n");
22609         }
22610         write_logfile_entry("\n");
22611     }
22612
22613     my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
22614     my $last_tabbing_disagreement  = $self->[_last_tabbing_disagreement_];
22615     my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
22616     my $in_tabbing_disagreement    = $self->[_in_tabbing_disagreement_];
22617
22618     if ($first_tabbing_disagreement) {
22619         write_logfile_entry(
22620 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
22621         );
22622     }
22623
22624     my $first_btd = $self->[_first_brace_tabbing_disagreement_];
22625     if ($first_btd) {
22626         my $msg =
22627 "First closing brace indentation disagreement started at input line $first_btd\n";
22628         write_logfile_entry($msg);
22629
22630         # leave a hint in the .ERR file if there was a brace error
22631         if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
22632     }
22633
22634     my $in_btd = $self->[_in_brace_tabbing_disagreement_];
22635     if ($in_btd) {
22636         my $msg =
22637 "Ending with brace indentation disagreement which started at input line $in_btd\n";
22638         write_logfile_entry($msg);
22639
22640         # leave a hint in the .ERR file if there was a brace error
22641         if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
22642     }
22643
22644     if ($in_tabbing_disagreement) {
22645         my $msg =
22646 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
22647         write_logfile_entry($msg);
22648     }
22649     else {
22650
22651         if ($last_tabbing_disagreement) {
22652
22653             write_logfile_entry(
22654 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
22655             );
22656         }
22657         else {
22658             write_logfile_entry("No indentation disagreement seen\n");
22659         }
22660     }
22661
22662     if ($first_tabbing_disagreement) {
22663         write_logfile_entry(
22664 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
22665         );
22666     }
22667     write_logfile_entry("\n");
22668
22669     my $vao = $self->[_vertical_aligner_object_];
22670     $vao->report_anything_unusual();
22671
22672     $file_writer_object->report_line_length_errors();
22673
22674     $self->[_converged_] = $file_writer_object->get_convergence_check()
22675       || $rOpts->{'indent-only'};
22676
22677     return;
22678 }
22679
22680 } ## end package Perl::Tidy::Formatter
22681 1;