lib/Perl/Tidy/Formatter.pm

   1 #####################################################################
   2 #
   3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
   4 # line breaks to the token stream
   5 #
   6 #####################################################################
   7
   8 # Index...
   9 # CODE SECTION 1: Preliminary code, global definitions and sub new
  10 #                 sub new
  11 # CODE SECTION 2: Some Basic Utilities
  12 # CODE SECTION 3: Check and process options
  13 #                 sub check_options
  14 # CODE SECTION 4: Receive lines from the tokenizer
  15 #                 sub write_line
  16 # CODE SECTION 5: Pre-process the entire file
  17 #                 sub finish_formatting
  18 # CODE SECTION 6: Process line-by-line
  19 #                 sub process_all_lines
  20 # CODE SECTION 7: Process lines of code
  21 #                 process_line_of_CODE
  22 # CODE SECTION 8: Utilities for setting breakpoints
  23 #                 sub set_forced_breakpoint
  24 # CODE SECTION 9: Process batches of code
  25 #                 sub grind_batch_of_CODE
  26 # CODE SECTION 10: Code to break long statments
  27 #                  sub break_long_lines
  28 # CODE SECTION 11: Code to break long lists
  29 #                  sub break_lists
  30 # CODE SECTION 12: Code for setting indentation
  31 # CODE SECTION 13: Preparing batch of lines for vertical alignment
  32 #                  sub convey_batch_to_vertical_aligner
  33 # CODE SECTION 14: Code for creating closing side comments
  34 #                  sub add_closing_side_comment
  35 # CODE SECTION 15: Summarize
  36 #                  sub wrapup
  37
  38 #######################################################################
  39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
  40 #######################################################################
  41
  42 package Perl::Tidy::Formatter;
  43 use strict;
  44 use warnings;
  45
  46 # This flag gets switched on during automated testing for extra checking
  47 use constant DEVEL_MODE => 0;
  48
  49 { #<<< A non-indenting brace to contain all lexical variables
  50
  51 use Carp;
  52 our $VERSION = '20220217';
  53
  54 # The Tokenizer will be loaded with the Formatter
  55 ##use Perl::Tidy::Tokenizer;    # for is_keyword()
  56
  57 sub AUTOLOAD {
  58
  59     # Catch any undefined sub calls so that we are sure to get
  60     # some diagnostic information.  This sub should never be called
  61     # except for a programming error.
  62     our $AUTOLOAD;
  63     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  64     my ( $pkg, $fname, $lno ) = caller();
  65     my $my_package = __PACKAGE__;
  66     print STDERR <<EOM;
  67 ======================================================================
  68 Error detected in package '$my_package', version $VERSION
  69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  70 Called from package: '$pkg'
  71 Called from File '$fname'  at line '$lno'
  72 This error is probably due to a recent programming change
  73 ======================================================================
  74 EOM
  75     exit 1;
  76 }
  77
  78 sub DESTROY {
  79     my $self = shift;
  80     $self->_decrement_count();
  81     return;
  82 }
  83
  84 sub Die {
  85     my ($msg) = @_;
  86     Perl::Tidy::Die($msg);
  87     croak "unexpected return from Perl::Tidy::Die";
  88 }
  89
  90 sub Warn {
  91     my ($msg) = @_;
  92     Perl::Tidy::Warn($msg);
  93     return;
  94 }
  95
  96 sub Fault {
  97     my ($msg) = @_;
  98
  99     # This routine is called for errors that really should not occur
 100     # except if there has been a bug introduced by a recent program change.
 101     # Please add comments at calls to Fault to explain why the call
 102     # should not occur, and where to look to fix it.
 103     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
 104     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
 105     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
 106     my $input_stream_name = get_input_stream_name();
 107
 108     Die(<<EOM);
 109 ==============================================================================
 110 While operating on input stream with name: '$input_stream_name'
 111 A fault was detected at line $line0 of sub '$subroutine1'
 112 in file '$filename1'
 113 which was called from line $line1 of sub '$subroutine2'
 114 Message: '$msg'
 115 This is probably an error introduced by a recent programming change.
 116 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
 117 ==============================================================================
 118 EOM
 119
 120     # We shouldn't get here, but this return is to keep Perl-Critic from
 121     # complaining.
 122     return;
 123 }
 124
 125 sub Exit {
 126     my ($msg) = @_;
 127     Perl::Tidy::Exit($msg);
 128     croak "unexpected return from Perl::Tidy::Exit";
 129 }
 130
 131 # Global variables ...
 132 my (
 133
 134     #-----------------------------------------------------------------
 135     # Section 1: Global variables which are either always constant or
 136     # are constant after being configured by user-supplied
 137     # parameters.  They remain constant as a file is being processed.
 138     #-----------------------------------------------------------------
 139
 140     # user parameters and shortcuts
 141     $rOpts,
 142     $rOpts_add_newlines,
 143     $rOpts_add_whitespace,
 144     $rOpts_blank_lines_after_opening_block,
 145     $rOpts_block_brace_tightness,
 146     $rOpts_block_brace_vertical_tightness,
 147     $rOpts_break_after_labels,
 148     $rOpts_break_at_old_attribute_breakpoints,
 149     $rOpts_break_at_old_comma_breakpoints,
 150     $rOpts_break_at_old_keyword_breakpoints,
 151     $rOpts_break_at_old_logical_breakpoints,
 152     $rOpts_break_at_old_semicolon_breakpoints,
 153     $rOpts_break_at_old_ternary_breakpoints,
 154     $rOpts_break_open_paren_list,
 155     $rOpts_closing_side_comments,
 156     $rOpts_closing_side_comment_else_flag,
 157     $rOpts_closing_side_comment_maximum_text,
 158     $rOpts_comma_arrow_breakpoints,
 159     $rOpts_continuation_indentation,
 160     $rOpts_delete_closing_side_comments,
 161     $rOpts_delete_old_whitespace,
 162     $rOpts_delete_side_comments,
 163     $rOpts_extended_continuation_indentation,
 164     $rOpts_format_skipping,
 165     $rOpts_freeze_whitespace,
 166     $rOpts_function_paren_vertical_alignment,
 167     $rOpts_fuzzy_line_length,
 168     $rOpts_ignore_old_breakpoints,
 169     $rOpts_ignore_side_comment_lengths,
 170     $rOpts_indent_closing_brace,
 171     $rOpts_indent_columns,
 172     $rOpts_indent_only,
 173     $rOpts_keep_interior_semicolons,
 174     $rOpts_line_up_parentheses,
 175     $rOpts_logical_padding,
 176     $rOpts_maximum_consecutive_blank_lines,
 177     $rOpts_maximum_fields_per_table,
 178     $rOpts_maximum_line_length,
 179     $rOpts_one_line_block_semicolons,
 180     $rOpts_opening_brace_always_on_right,
 181     $rOpts_outdent_keywords,
 182     $rOpts_outdent_labels,
 183     $rOpts_outdent_long_comments,
 184     $rOpts_outdent_long_quotes,
 185     $rOpts_outdent_static_block_comments,
 186     $rOpts_recombine,
 187     $rOpts_short_concatenation_item_length,
 188     $rOpts_stack_closing_block_brace,
 189     $rOpts_static_block_comments,
 190     $rOpts_sub_alias_list,
 191     $rOpts_tee_block_comments,
 192     $rOpts_tee_pod,
 193     $rOpts_tee_side_comments,
 194     $rOpts_variable_maximum_line_length,
 195     $rOpts_valign,
 196     $rOpts_valign_code,
 197     $rOpts_valign_side_comments,
 198     $rOpts_whitespace_cycle,
 199     $rOpts_extended_line_up_parentheses,
 200
 201     # Static hashes initialized in a BEGIN block
 202     %is_assignment,
 203     %is_if_unless_and_or_last_next_redo_return,
 204     %is_if_elsif_else_unless_while_until_for_foreach,
 205     %is_if_unless_while_until_for_foreach,
 206     %is_last_next_redo_return,
 207     %is_if_unless,
 208     %is_and_or,
 209     %is_chain_operator,
 210     %is_block_without_semicolon,
 211     %ok_to_add_semicolon_for_block_type,
 212     %is_opening_type,
 213     %is_closing_type,
 214     %is_opening_token,
 215     %is_closing_token,
 216     %is_equal_or_fat_comma,
 217     %is_counted_type,
 218     %is_opening_sequence_token,
 219     %is_closing_sequence_token,
 220     %is_container_label_type,
 221
 222     @all_operators,
 223
 224     # Initialized in check_options. These are constants and could
 225     # just as well be initialized in a BEGIN block.
 226     %is_do_follower,
 227     %is_if_brace_follower,
 228     %is_else_brace_follower,
 229     %is_anon_sub_brace_follower,
 230     %is_anon_sub_1_brace_follower,
 231     %is_other_brace_follower,
 232
 233     # Initialized and re-initialized in sub initialize_grep_and_friends;
 234     # These can be modified by grep-alias-list
 235     %is_sort_map_grep,
 236     %is_sort_map_grep_eval,
 237     %is_sort_map_grep_eval_do,
 238     %is_block_with_ci,
 239     %is_keyword_returning_list,
 240     %block_type_map,
 241
 242     # Initialized in sub initialize_whitespace_hashes;
 243     # Some can be modified according to user parameters.
 244     %binary_ws_rules,
 245     %want_left_space,
 246     %want_right_space,
 247
 248     # Configured in sub initialize_bond_strength_hashes
 249     %right_bond_strength,
 250     %left_bond_strength,
 251
 252     # Hashes for -kbb=s and -kba=s
 253     %keep_break_before_type,
 254     %keep_break_after_type,
 255
 256     # Initialized in check_options, modified by prepare_cuddled_block_types:
 257     %want_one_line_block,
 258
 259     # Initialized in sub prepare_cuddled_block_types
 260     $rcuddled_block_types,
 261
 262     # Initialized and configured in check_optioms
 263     %outdent_keyword,
 264     %keyword_paren_inner_tightness,
 265
 266     %want_break_before,
 267
 268     %break_before_container_types,
 269     %container_indentation_options,
 270
 271     %space_after_keyword,
 272
 273     %tightness,
 274     %matching_token,
 275
 276     %opening_vertical_tightness,
 277     %closing_vertical_tightness,
 278     %closing_token_indentation,
 279     $some_closing_token_indentation,
 280
 281     %opening_token_right,
 282     %stack_opening_token,
 283     %stack_closing_token,
 284
 285     %weld_nested_exclusion_rules,
 286     %line_up_parentheses_control_hash,
 287     $line_up_parentheses_control_is_lxpl,
 288
 289     # regex patterns for text identification.
 290     # Most are initialized in a sub make_**_pattern during configuration.
 291     # Most can be configured by user parameters.
 292     $SUB_PATTERN,
 293     $ASUB_PATTERN,
 294     $static_block_comment_pattern,
 295     $static_side_comment_pattern,
 296     $format_skipping_pattern_begin,
 297     $format_skipping_pattern_end,
 298     $non_indenting_brace_pattern,
 299     $bl_exclusion_pattern,
 300     $bl_pattern,
 301     $bli_exclusion_pattern,
 302     $bli_pattern,
 303     $block_brace_vertical_tightness_pattern,
 304     $blank_lines_after_opening_block_pattern,
 305     $blank_lines_before_closing_block_pattern,
 306     $keyword_group_list_pattern,
 307     $keyword_group_list_comment_pattern,
 308     $closing_side_comment_prefix_pattern,
 309     $closing_side_comment_list_pattern,
 310
 311     # Table to efficiently find indentation and max line length
 312     # from level.
 313     @maximum_line_length_at_level,
 314     @maximum_text_length_at_level,
 315     $stress_level_alpha,
 316     $stress_level_beta,
 317
 318     # Total number of sequence items in a weld, for quick checks
 319     $total_weld_count,
 320
 321     #--------------------------------------------------------
 322     # Section 2: Work arrays for the current batch of tokens.
 323     #--------------------------------------------------------
 324
 325     # These are re-initialized for each batch of code
 326     # in sub initialize_batch_variables.
 327     $max_index_to_go,
 328     @block_type_to_go,
 329     @type_sequence_to_go,
 330     @bond_strength_to_go,
 331     @forced_breakpoint_to_go,
 332     @token_lengths_to_go,
 333     @summed_lengths_to_go,
 334     @levels_to_go,
 335     @leading_spaces_to_go,
 336     @reduced_spaces_to_go,
 337     @standard_spaces_to_go,
 338     @mate_index_to_go,
 339     @ci_levels_to_go,
 340     @nesting_depth_to_go,
 341     @nobreak_to_go,
 342     @old_breakpoint_to_go,
 343     @tokens_to_go,
 344     @K_to_go,
 345     @types_to_go,
 346     @inext_to_go,
 347     @iprev_to_go,
 348     @parent_seqno_to_go,
 349
 350 );
 351
 352 BEGIN {
 353
 354     # Index names for token variables.
 355     # Do not combine with other BEGIN blocks (c101).
 356     my $i = 0;
 357     use constant {
 358         _CI_LEVEL_          => $i++,
 359         _CUMULATIVE_LENGTH_ => $i++,
 360         _LINE_INDEX_        => $i++,
 361         _KNEXT_SEQ_ITEM_    => $i++,
 362         _LEVEL_             => $i++,
 363         _TOKEN_             => $i++,
 364         _TOKEN_LENGTH_      => $i++,
 365         _TYPE_              => $i++,
 366         _TYPE_SEQUENCE_     => $i++,
 367
 368         # Number of token variables; must be last in list:
 369         _NVARS => $i++,
 370     };
 371 }
 372
 373 BEGIN {
 374
 375     # Index names for $self variables.
 376     # Do not combine with other BEGIN blocks (c101).
 377     my $i = 0;
 378     use constant {
 379         _rlines_                    => $i++,
 380         _rlines_new_                => $i++,
 381         _rLL_                       => $i++,
 382         _Klimit_                    => $i++,
 383         _rdepth_of_opening_seqno_   => $i++,
 384         _rSS_                       => $i++,
 385         _Iss_opening_               => $i++,
 386         _Iss_closing_               => $i++,
 387         _rblock_type_of_seqno_      => $i++,
 388         _ris_asub_block_            => $i++,
 389         _ris_sub_block_             => $i++,
 390         _K_opening_container_       => $i++,
 391         _K_closing_container_       => $i++,
 392         _K_opening_ternary_         => $i++,
 393         _K_closing_ternary_         => $i++,
 394         _K_first_seq_item_          => $i++,
 395         _rK_phantom_semicolons_     => $i++,
 396         _rtype_count_by_seqno_      => $i++,
 397         _ris_function_call_paren_   => $i++,
 398         _rlec_count_by_seqno_       => $i++,
 399         _ris_broken_container_      => $i++,
 400         _ris_permanently_broken_    => $i++,
 401         _rhas_list_                 => $i++,
 402         _rhas_broken_list_          => $i++,
 403         _rhas_broken_list_with_lec_ => $i++,
 404         _rhas_code_block_           => $i++,
 405         _rhas_broken_code_block_    => $i++,
 406         _rhas_ternary_              => $i++,
 407         _ris_excluded_lp_container_ => $i++,
 408         _rlp_object_by_seqno_       => $i++,
 409         _rwant_reduced_ci_          => $i++,
 410         _rno_xci_by_seqno_          => $i++,
 411         _rbrace_left_               => $i++,
 412         _ris_bli_container_         => $i++,
 413         _rparent_of_seqno_          => $i++,
 414         _rchildren_of_seqno_        => $i++,
 415         _ris_list_by_seqno_         => $i++,
 416         _rbreak_container_          => $i++,
 417         _rshort_nested_             => $i++,
 418         _length_function_           => $i++,
 419         _is_encoded_data_           => $i++,
 420         _fh_tee_                    => $i++,
 421         _sink_object_               => $i++,
 422         _file_writer_object_        => $i++,
 423         _vertical_aligner_object_   => $i++,
 424         _logger_object_             => $i++,
 425         _radjusted_levels_          => $i++,
 426         _this_batch_                => $i++,
 427
 428         _last_output_short_opening_token_ => $i++,
 429
 430         _last_line_leading_type_       => $i++,
 431         _last_line_leading_level_      => $i++,
 432         _last_last_line_leading_level_ => $i++,
 433
 434         _added_semicolon_count_    => $i++,
 435         _first_added_semicolon_at_ => $i++,
 436         _last_added_semicolon_at_  => $i++,
 437
 438         _deleted_semicolon_count_    => $i++,
 439         _first_deleted_semicolon_at_ => $i++,
 440         _last_deleted_semicolon_at_  => $i++,
 441
 442         _embedded_tab_count_    => $i++,
 443         _first_embedded_tab_at_ => $i++,
 444         _last_embedded_tab_at_  => $i++,
 445
 446         _first_tabbing_disagreement_       => $i++,
 447         _last_tabbing_disagreement_        => $i++,
 448         _tabbing_disagreement_count_       => $i++,
 449         _in_tabbing_disagreement_          => $i++,
 450         _first_brace_tabbing_disagreement_ => $i++,
 451         _in_brace_tabbing_disagreement_    => $i++,
 452
 453         _saw_VERSION_in_this_file_ => $i++,
 454         _saw_END_or_DATA_          => $i++,
 455
 456         _rK_weld_left_         => $i++,
 457         _rK_weld_right_        => $i++,
 458         _rweld_len_right_at_K_ => $i++,
 459
 460         _rspecial_side_comment_type_ => $i++,
 461
 462         _rseqno_controlling_my_ci_    => $i++,
 463         _ris_seqno_controlling_ci_    => $i++,
 464         _save_logfile_                => $i++,
 465         _maximum_level_               => $i++,
 466         _maximum_level_at_line_       => $i++,
 467         _maximum_BLOCK_level_         => $i++,
 468         _maximum_BLOCK_level_at_line_ => $i++,
 469
 470         _rKrange_code_without_comments_ => $i++,
 471         _rbreak_before_Kfirst_          => $i++,
 472         _rbreak_after_Klast_            => $i++,
 473         _rwant_container_open_          => $i++,
 474         _converged_                     => $i++,
 475
 476         _rstarting_multiline_qw_seqno_by_K_ => $i++,
 477         _rending_multiline_qw_seqno_by_K_   => $i++,
 478         _rKrange_multiline_qw_by_seqno_     => $i++,
 479         _rmultiline_qw_has_extra_level_     => $i++,
 480
 481         _rcollapsed_length_by_seqno_       => $i++,
 482         _rbreak_before_container_by_seqno_ => $i++,
 483         _ris_essential_old_breakpoint_     => $i++,
 484         _roverride_cab3_                   => $i++,
 485         _ris_assigned_structure_           => $i++,
 486
 487         _LAST_SELF_INDEX_ => $i - 1,
 488     };
 489 }
 490
 491 BEGIN {
 492
 493     # Index names for batch variables.
 494     # Do not combine with other BEGIN blocks (c101).
 495     # These are stored in _this_batch_, which is a sub-array of $self.
 496     my $i = 0;
 497     use constant {
 498         _starting_in_quote_          => $i++,
 499         _ending_in_quote_            => $i++,
 500         _is_static_block_comment_    => $i++,
 501         _ri_first_                   => $i++,
 502         _ri_last_                    => $i++,
 503         _do_not_pad_                 => $i++,
 504         _peak_batch_size_            => $i++,
 505         _max_index_to_go_            => $i++,
 506         _batch_count_                => $i++,
 507         _rix_seqno_controlling_ci_   => $i++,
 508         _batch_CODE_type_            => $i++,
 509         _ri_starting_one_line_block_ => $i++,
 510     };
 511 }
 512
 513 BEGIN {
 514
 515     # Sequence number assigned to the root of sequence tree.
 516     # The minimum of the actual sequences numbers is 4, so we can use 1
 517     use constant SEQ_ROOT => 1;
 518
 519     # Codes for insertion and deletion of blanks
 520     use constant DELETE => 0;
 521     use constant STABLE => 1;
 522     use constant INSERT => 2;
 523
 524     # whitespace codes
 525     use constant WS_YES      => 1;
 526     use constant WS_OPTIONAL => 0;
 527     use constant WS_NO       => -1;
 528
 529     # Token bond strengths.
 530     use constant NO_BREAK    => 10000;
 531     use constant VERY_STRONG => 100;
 532     use constant STRONG      => 2.1;
 533     use constant NOMINAL     => 1.1;
 534     use constant WEAK        => 0.8;
 535     use constant VERY_WEAK   => 0.55;
 536
 537     # values for testing indexes in output array
 538     use constant UNDEFINED_INDEX => -1;
 539
 540     # Maximum number of little messages; probably need not be changed.
 541     use constant MAX_NAG_MESSAGES => 6;
 542
 543     # This is the decimal range of printable characters in ASCII.  It is used to
 544     # make quick preliminary checks before resorting to using a regex.
 545     use constant ORD_PRINTABLE_MIN => 33;
 546     use constant ORD_PRINTABLE_MAX => 126;
 547
 548     # Initialize constant hashes ...
 549     my @q;
 550
 551     @q = qw(
 552       = **= += *= &= <<= &&=
 553       -= /= |= >>= ||= //=
 554       .= %= ^=
 555       x=
 556     );
 557     @is_assignment{@q} = (1) x scalar(@q);
 558
 559     @q = qw(is if unless and or err last next redo return);
 560     @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
 561
 562     # These block types may have text between the keyword and opening
 563     # curly.  Note: 'else' does not, but must be included to allow trailing
 564     # if/elsif text to be appended.
 565     # patch for SWITCH/CASE: added 'case' and 'when'
 566     @q = qw(if elsif else unless while until for foreach case when catch);
 567     @is_if_elsif_else_unless_while_until_for_foreach{@q} =
 568       (1) x scalar(@q);
 569
 570     @q = qw(if unless while until for foreach);
 571     @is_if_unless_while_until_for_foreach{@q} =
 572       (1) x scalar(@q);
 573
 574     @q = qw(last next redo return);
 575     @is_last_next_redo_return{@q} = (1) x scalar(@q);
 576
 577     # Map related block names into a common name to allow vertical alignment
 578     # used by sub make_alignment_patterns. Note: this is normally unchanged,
 579     # but it contains 'grep' and can be re-initized in
 580     # sub initialize_grep_and_friends in a testing mode.
 581     %block_type_map = (
 582         'unless'  => 'if',
 583         'else'    => 'if',
 584         'elsif'   => 'if',
 585         'when'    => 'if',
 586         'default' => 'if',
 587         'case'    => 'if',
 588         'sort'    => 'map',
 589         'grep'    => 'map',
 590     );
 591
 592     @q = qw(if unless);
 593     @is_if_unless{@q} = (1) x scalar(@q);
 594
 595     @q = qw(and or err);
 596     @is_and_or{@q} = (1) x scalar(@q);
 597
 598     # Identify certain operators which often occur in chains.
 599     # Note: the minus (-) causes a side effect of padding of the first line in
 600     # something like this (by sub set_logical_padding):
 601     #    Checkbutton => 'Transmission checked',
 602     #   -variable    => \$TRANS
 603     # This usually improves appearance so it seems ok.
 604     @q = qw(&& || and or : ? . + - * /);
 605     @is_chain_operator{@q} = (1) x scalar(@q);
 606
 607     # Operators that the user can request break before or after.
 608     # Note that some are keywords
 609     @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
 610       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
 611       . : ? && || and or err xor
 612     );
 613
 614     # We can remove semicolons after blocks preceded by these keywords
 615     @q =
 616       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
 617       unless while until for foreach given when default);
 618     @is_block_without_semicolon{@q} = (1) x scalar(@q);
 619
 620     # We will allow semicolons to be added within these block types
 621     # as well as sub and package blocks.
 622     # NOTES:
 623     # 1. Note that these keywords are omitted:
 624     #     switch case given when default sort map grep
 625     # 2. It is also ok to add for sub and package blocks and a labeled block
 626     # 3. But not okay for other perltidy types including:
 627     #     { } ; G t
 628     # 4. Test files: blktype.t, blktype1.t, semicolon.t
 629     @q =
 630       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
 631       unless do while until eval for foreach );
 632     @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
 633
 634     # 'L' is token for opening { at hash key
 635     @q = qw< L { ( [ >;
 636     @is_opening_type{@q} = (1) x scalar(@q);
 637
 638     # 'R' is token for closing } at hash key
 639     @q = qw< R } ) ] >;
 640     @is_closing_type{@q} = (1) x scalar(@q);
 641
 642     @q = qw< { ( [ >;
 643     @is_opening_token{@q} = (1) x scalar(@q);
 644
 645     @q = qw< } ) ] >;
 646     @is_closing_token{@q} = (1) x scalar(@q);
 647
 648     @q = qw< { ( [ ? >;
 649     @is_opening_sequence_token{@q} = (1) x scalar(@q);
 650
 651     @q = qw< } ) ] : >;
 652     @is_closing_sequence_token{@q} = (1) x scalar(@q);
 653
 654     # a hash needed by sub break_lists for labeling containers
 655     @q = qw( k => && || ? : . );
 656     @is_container_label_type{@q} = (1) x scalar(@q);
 657
 658     # Braces -bbht etc must follow these. Note: experimentation with
 659     # including a simple comma shows that it adds little and can lead
 660     # to poor formatting in complex lists.
 661     @q = qw( = => );
 662     @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
 663
 664     @q = qw( => ; h f );
 665     push @q, ',';
 666     @is_counted_type{@q} = (1) x scalar(@q);
 667
 668 }
 669
 670 {    ## begin closure to count instances
 671
 672     # methods to count instances
 673     my $_count = 0;
 674     sub get_count        { return $_count; }
 675     sub _increment_count { return ++$_count }
 676     sub _decrement_count { return --$_count }
 677 } ## end closure to count instances
 678
 679 sub new {
 680
 681     my ( $class, @args ) = @_;
 682
 683     # we are given an object with a write_line() method to take lines
 684     my %defaults = (
 685         sink_object        => undef,
 686         diagnostics_object => undef,
 687         logger_object      => undef,
 688         length_function    => sub { return length( $_[0] ) },
 689         is_encoded_data    => "",
 690         fh_tee             => undef,
 691     );
 692     my %args = ( %defaults, @args );
 693
 694     my $length_function    = $args{length_function};
 695     my $is_encoded_data    = $args{is_encoded_data};
 696     my $fh_tee             = $args{fh_tee};
 697     my $logger_object      = $args{logger_object};
 698     my $diagnostics_object = $args{diagnostics_object};
 699
 700     # we create another object with a get_line() and peek_ahead() method
 701     my $sink_object = $args{sink_object};
 702     my $file_writer_object =
 703       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
 704
 705     # initialize closure variables...
 706     set_logger_object($logger_object);
 707     set_diagnostics_object($diagnostics_object);
 708     initialize_lp_vars();
 709     initialize_csc_vars();
 710     initialize_break_lists();
 711     initialize_undo_ci();
 712     initialize_process_line_of_CODE();
 713     initialize_grind_batch_of_CODE();
 714     initialize_final_indentation_adjustment();
 715     initialize_postponed_breakpoint();
 716     initialize_batch_variables();
 717     initialize_forced_breakpoint_vars();
 718     initialize_write_line();
 719
 720     my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
 721         rOpts              => $rOpts,
 722         file_writer_object => $file_writer_object,
 723         logger_object      => $logger_object,
 724         diagnostics_object => $diagnostics_object,
 725         length_function    => $length_function
 726     );
 727
 728     write_logfile_entry("\nStarting tokenization pass...\n");
 729
 730     if ( $rOpts->{'entab-leading-whitespace'} ) {
 731         write_logfile_entry(
 732 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
 733         );
 734     }
 735     elsif ( $rOpts->{'tabs'} ) {
 736         write_logfile_entry("Indentation will be with a tab character\n");
 737     }
 738     else {
 739         write_logfile_entry(
 740             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
 741     }
 742
 743     # Initialize the $self array reference.
 744     # To add an item, first add a constant index in the BEGIN block above.
 745     my $self = [];
 746
 747     # Basic data structures...
 748     $self->[_rlines_]     = [];    # = ref to array of lines of the file
 749     $self->[_rlines_new_] = [];    # = ref to array of output lines
 750
 751     # 'rLL' = reference to the continuous liner array of all tokens in a file.
 752     # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
 753     # 'LL' stuck because it is easy to type.  The 'rLL' array is updated
 754     # by sub 'respace_tokens' during reformatting.  The indexes in 'rLL' begin
 755     # with '$K' by convention.
 756     $self->[_rLL_]    = [];
 757     $self->[_Klimit_] = undef;    # = maximum K index for rLL.
 758
 759     # Indexes into the rLL list
 760     $self->[_K_opening_container_] = {};
 761     $self->[_K_closing_container_] = {};
 762     $self->[_K_opening_ternary_]   = {};
 763     $self->[_K_closing_ternary_]   = {};
 764     $self->[_K_first_seq_item_]    = undef; # K of first token with a sequence #
 765
 766     # Array of phantom semicolons, in case we ever need to undo them
 767     $self->[_rK_phantom_semicolons_] = undef;
 768
 769     # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
 770     # numbers with + or - indicating opening or closing. This list represents
 771     # the entire container tree and is invariant under reformatting.  It can be
 772     # used to quickly travel through the tree.  Indexes in the rSS array begin
 773     # with '$I' by convention.  The 'Iss' arrays give the indexes in this list
 774     # of opening and closing sequence numbers.
 775     $self->[_rSS_]         = [];
 776     $self->[_Iss_opening_] = [];
 777     $self->[_Iss_closing_] = [];
 778
 779     # Arrays to help traverse the tree
 780     $self->[_rdepth_of_opening_seqno_] = [];
 781     $self->[_rblock_type_of_seqno_]    = {};
 782     $self->[_ris_asub_block_]          = {};
 783     $self->[_ris_sub_block_]           = {};
 784
 785     # Mostly list characteristics and processing flags
 786     $self->[_rtype_count_by_seqno_]      = {};
 787     $self->[_ris_function_call_paren_]   = {};
 788     $self->[_rlec_count_by_seqno_]       = {};
 789     $self->[_ris_broken_container_]      = {};
 790     $self->[_ris_permanently_broken_]    = {};
 791     $self->[_rhas_list_]                 = {};
 792     $self->[_rhas_broken_list_]          = {};
 793     $self->[_rhas_broken_list_with_lec_] = {};
 794     $self->[_rhas_code_block_]           = {};
 795     $self->[_rhas_broken_code_block_]    = {};
 796     $self->[_rhas_ternary_]              = {};
 797     $self->[_ris_excluded_lp_container_] = {};
 798     $self->[_rlp_object_by_seqno_]       = {};
 799     $self->[_rwant_reduced_ci_]          = {};
 800     $self->[_rno_xci_by_seqno_]          = {};
 801     $self->[_rbrace_left_]               = {};
 802     $self->[_ris_bli_container_]         = {};
 803     $self->[_rparent_of_seqno_]          = {};
 804     $self->[_rchildren_of_seqno_]        = {};
 805     $self->[_ris_list_by_seqno_]         = {};
 806
 807     $self->[_rbreak_container_] = {};                 # prevent one-line blocks
 808     $self->[_rshort_nested_]    = {};                 # blocks not forced open
 809     $self->[_length_function_]  = $length_function;
 810     $self->[_is_encoded_data_]  = $is_encoded_data;
 811
 812     # Some objects...
 813     $self->[_fh_tee_]                  = $fh_tee;
 814     $self->[_sink_object_]             = $sink_object;
 815     $self->[_file_writer_object_]      = $file_writer_object;
 816     $self->[_vertical_aligner_object_] = $vertical_aligner_object;
 817     $self->[_logger_object_]           = $logger_object;
 818
 819     # Reference to the batch being processed
 820     $self->[_this_batch_] = [];
 821
 822     # Memory of processed text...
 823     $self->[_last_last_line_leading_level_]    = 0;
 824     $self->[_last_line_leading_level_]         = 0;
 825     $self->[_last_line_leading_type_]          = '#';
 826     $self->[_last_output_short_opening_token_] = 0;
 827     $self->[_added_semicolon_count_]           = 0;
 828     $self->[_first_added_semicolon_at_]        = 0;
 829     $self->[_last_added_semicolon_at_]         = 0;
 830     $self->[_deleted_semicolon_count_]         = 0;
 831     $self->[_first_deleted_semicolon_at_]      = 0;
 832     $self->[_last_deleted_semicolon_at_]       = 0;
 833     $self->[_embedded_tab_count_]              = 0;
 834     $self->[_first_embedded_tab_at_]           = 0;
 835     $self->[_last_embedded_tab_at_]            = 0;
 836     $self->[_first_tabbing_disagreement_]      = 0;
 837     $self->[_last_tabbing_disagreement_]       = 0;
 838     $self->[_tabbing_disagreement_count_]      = 0;
 839     $self->[_in_tabbing_disagreement_]         = 0;
 840     $self->[_saw_VERSION_in_this_file_]        = !$rOpts->{'pass-version-line'};
 841     $self->[_saw_END_or_DATA_]                 = 0;
 842     $self->[_first_brace_tabbing_disagreement_] = undef;
 843     $self->[_in_brace_tabbing_disagreement_]    = undef;
 844
 845     # Hashes related to container welding...
 846     $self->[_radjusted_levels_] = [];
 847
 848     # Weld data structures
 849     $self->[_rK_weld_left_]         = {};
 850     $self->[_rK_weld_right_]        = {};
 851     $self->[_rweld_len_right_at_K_] = {};
 852
 853     # -xci stuff
 854     $self->[_rseqno_controlling_my_ci_] = {};
 855     $self->[_ris_seqno_controlling_ci_] = {};
 856
 857     $self->[_rspecial_side_comment_type_]  = {};
 858     $self->[_maximum_level_]               = 0;
 859     $self->[_maximum_level_at_line_]       = 0;
 860     $self->[_maximum_BLOCK_level_]         = 0;
 861     $self->[_maximum_BLOCK_level_at_line_] = 0;
 862
 863     $self->[_rKrange_code_without_comments_] = [];
 864     $self->[_rbreak_before_Kfirst_]          = {};
 865     $self->[_rbreak_after_Klast_]            = {};
 866     $self->[_rwant_container_open_]          = {};
 867     $self->[_converged_]                     = 0;
 868
 869     # qw stuff
 870     $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
 871     $self->[_rending_multiline_qw_seqno_by_K_]   = {};
 872     $self->[_rKrange_multiline_qw_by_seqno_]     = {};
 873     $self->[_rmultiline_qw_has_extra_level_]     = {};
 874
 875     $self->[_rcollapsed_length_by_seqno_]       = {};
 876     $self->[_rbreak_before_container_by_seqno_] = {};
 877     $self->[_ris_essential_old_breakpoint_]     = {};
 878     $self->[_roverride_cab3_]                   = {};
 879     $self->[_ris_assigned_structure_]           = {};
 880
 881     # This flag will be updated later by a call to get_save_logfile()
 882     $self->[_save_logfile_] = defined($logger_object);
 883
 884     # Be sure all variables in $self have been initialized above.  To find the
 885     # correspondence of index numbers and array names, copy a list to a file
 886     # and use the unix 'nl' command to number lines 1..
 887     if (DEVEL_MODE) {
 888         my @non_existant;
 889         foreach ( 0 .. _LAST_SELF_INDEX_ ) {
 890             if ( !exists( $self->[$_] ) ) {
 891                 push @non_existant, $_;
 892             }
 893         }
 894         if (@non_existant) {
 895             Fault("These indexes in self not initialized: (@non_existant)\n");
 896         }
 897     }
 898
 899     bless $self, $class;
 900
 901     # Safety check..this is not a class yet
 902     if ( _increment_count() > 1 ) {
 903         confess
 904 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
 905     }
 906     return $self;
 907 }
 908
 909 ######################################
 910 # CODE SECTION 2: Some Basic Utilities
 911 ######################################
 912
 913 sub check_rLL {
 914
 915     # Verify that the rLL array has not been auto-vivified
 916     my ( $self, $msg ) = @_;
 917     my $rLL    = $self->[_rLL_];
 918     my $Klimit = $self->[_Klimit_];
 919     my $num    = @{$rLL};
 920     if (   ( defined($Klimit) && $Klimit != $num - 1 )
 921         || ( !defined($Klimit) && $num > 0 ) )
 922     {
 923
 924         # This fault can occur if the array has been accessed for an index
 925         # greater than $Klimit, which is the last token index.  Just accessing
 926         # the array above index $Klimit, not setting a value, can cause @rLL to
 927         # increase beyond $Klimit.  If this occurs, the problem can be located
 928         # by making calls to this routine at different locations in
 929         # sub 'finish_formatting'.
 930         $Klimit = 'undef' if ( !defined($Klimit) );
 931         $msg    = "" unless $msg;
 932         Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
 933     }
 934     return;
 935 }
 936
 937 sub check_keys {
 938     my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
 939
 940     # Check the keys of a hash:
 941     # $rtest   = ref to hash to test
 942     # $rvalid  = ref to hash with valid keys
 943
 944     # $msg = a message to write in case of error
 945     # $exact_match defines the type of check:
 946     #     = false: test hash must not have unknown key
 947     #     = true:  test hash must have exactly same keys as known hash
 948     my @unknown_keys =
 949       grep { !exists $rvalid->{$_} } keys %{$rtest};
 950     my @missing_keys =
 951       grep { !exists $rtest->{$_} } keys %{$rvalid};
 952     my $error = @unknown_keys;
 953     if ($exact_match) { $error ||= @missing_keys }
 954     if ($error) {
 955         local $" = ')(';
 956         my @expected_keys = sort keys %{$rvalid};
 957         @unknown_keys = sort @unknown_keys;
 958         Fault(<<EOM);
 959 ------------------------------------------------------------------------
 960 Program error detected checking hash keys
 961 Message is: '$msg'
 962 Expected keys: (@expected_keys)
 963 Unknown key(s): (@unknown_keys)
 964 Missing key(s): (@missing_keys)
 965 ------------------------------------------------------------------------
 966 EOM
 967     }
 968     return;
 969 }
 970
 971 sub check_token_array {
 972     my $self = shift;
 973
 974     # Check for errors in the array of tokens. This is only called
 975     # when the DEVEL_MODE flag is set, so this Fault will only occur
 976     # during code development.
 977     my $rLL = $self->[_rLL_];
 978     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
 979         my $nvars = @{ $rLL->[$KK] };
 980         if ( $nvars != _NVARS ) {
 981             my $NVARS = _NVARS;
 982             my $type  = $rLL->[$KK]->[_TYPE_];
 983             $type = '*' unless defined($type);
 984
 985             # The number of variables per token node is _NVARS and was set when
 986             # the array indexes were generated. So if the number of variables
 987             # is different we have done something wrong, like not store all of
 988             # them in sub 'write_line' when they were received from the
 989             # tokenizer.
 990             Fault(
 991 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
 992             );
 993         }
 994         foreach my $var ( _TOKEN_, _TYPE_ ) {
 995             if ( !defined( $rLL->[$KK]->[$var] ) ) {
 996                 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
 997
 998                 # This is a simple check that each token has some basic
 999                 # variables.  In other words, that there are no holes in the
1000                 # array of tokens.  Sub 'write_line' pushes tokens into the
1001                 # $rLL array, so this should guarantee no gaps.
1002                 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1003             }
1004         }
1005     }
1006     return;
1007 }
1008
1009 {    ## begin closure check_line_hashes
1010
1011     # This code checks that no autovivification occurs in the 'line' hash
1012
1013     my %valid_line_hash;
1014
1015     BEGIN {
1016
1017         # These keys are defined for each line in the formatter
1018         # Each line must have exactly these quantities
1019         my @valid_line_keys = qw(
1020           _curly_brace_depth
1021           _ending_in_quote
1022           _guessed_indentation_level
1023           _line_number
1024           _line_text
1025           _line_type
1026           _paren_depth
1027           _quote_character
1028           _rK_range
1029           _square_bracket_depth
1030           _starting_in_quote
1031           _ended_in_blank_token
1032           _code_type
1033
1034           _ci_level_0
1035           _level_0
1036           _nesting_blocks_0
1037           _nesting_tokens_0
1038         );
1039
1040         @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1041     }
1042
1043     sub check_line_hashes {
1044         my $self   = shift;
1045         my $rlines = $self->[_rlines_];
1046         foreach my $rline ( @{$rlines} ) {
1047             my $iline     = $rline->{_line_number};
1048             my $line_type = $rline->{_line_type};
1049             check_keys( $rline, \%valid_line_hash,
1050                 "Checkpoint: line number =$iline,  line_type=$line_type", 1 );
1051         }
1052         return;
1053     }
1054 } ## end closure check_line_hashes
1055
1056 {    ## begin closure for logger routines
1057     my $logger_object;
1058
1059     # Called once per file to initialize the logger object
1060     sub set_logger_object {
1061         $logger_object = shift;
1062         return;
1063     }
1064
1065     sub get_logger_object {
1066         return $logger_object;
1067     }
1068
1069     sub get_input_stream_name {
1070         my $input_stream_name = "";
1071         if ($logger_object) {
1072             $input_stream_name = $logger_object->get_input_stream_name();
1073         }
1074         return $input_stream_name;
1075     }
1076
1077     # interface to Perl::Tidy::Logger routines
1078     sub warning {
1079         my ($msg) = @_;
1080         if ($logger_object) { $logger_object->warning($msg); }
1081         return;
1082     }
1083
1084     sub complain {
1085         my ($msg) = @_;
1086         if ($logger_object) {
1087             $logger_object->complain($msg);
1088         }
1089         return;
1090     }
1091
1092     sub write_logfile_entry {
1093         my @msg = @_;
1094         if ($logger_object) {
1095             $logger_object->write_logfile_entry(@msg);
1096         }
1097         return;
1098     }
1099
1100     sub get_saw_brace_error {
1101         if ($logger_object) {
1102             return $logger_object->get_saw_brace_error();
1103         }
1104         return;
1105     }
1106
1107     sub we_are_at_the_last_line {
1108         if ($logger_object) {
1109             $logger_object->we_are_at_the_last_line();
1110         }
1111         return;
1112     }
1113
1114 } ## end closure for logger routines
1115
1116 {    ## begin closure for diagnostics routines
1117     my $diagnostics_object;
1118
1119     # Called once per file to initialize the diagnostics object
1120     sub set_diagnostics_object {
1121         $diagnostics_object = shift;
1122         return;
1123     }
1124
1125     sub write_diagnostics {
1126         my ($msg) = @_;
1127         if ($diagnostics_object) {
1128             $diagnostics_object->write_diagnostics($msg);
1129         }
1130         return;
1131     }
1132 } ## end closure for diagnostics routines
1133
1134 sub get_convergence_check {
1135     my ($self) = @_;
1136     return $self->[_converged_];
1137 }
1138
1139 sub get_added_semicolon_count {
1140     my $self = shift;
1141     return $self->[_added_semicolon_count_];
1142 }
1143
1144 sub get_output_line_number {
1145     my ($self) = @_;
1146     my $vao = $self->[_vertical_aligner_object_];
1147     return $vao->get_output_line_number();
1148 }
1149
1150 sub want_blank_line {
1151     my $self = shift;
1152     $self->flush();
1153     my $file_writer_object = $self->[_file_writer_object_];
1154     $file_writer_object->want_blank_line();
1155     return;
1156 }
1157
1158 sub write_unindented_line {
1159     my ( $self, $line ) = @_;
1160     $self->flush();
1161     my $file_writer_object = $self->[_file_writer_object_];
1162     $file_writer_object->write_line($line);
1163     return;
1164 }
1165
1166 sub consecutive_nonblank_lines {
1167     my ($self)             = @_;
1168     my $file_writer_object = $self->[_file_writer_object_];
1169     my $vao                = $self->[_vertical_aligner_object_];
1170     return $file_writer_object->get_consecutive_nonblank_lines() +
1171       $vao->get_cached_line_count();
1172 }
1173
1174 sub max {
1175     my (@vals) = @_;
1176     my $max = shift @vals;
1177     for (@vals) { $max = $_ > $max ? $_ : $max }
1178     return $max;
1179 }
1180
1181 sub min {
1182     my (@vals) = @_;
1183     my $min = shift @vals;
1184     for (@vals) { $min = $_ < $min ? $_ : $min }
1185     return $min;
1186 }
1187
1188 sub split_words {
1189
1190     # given a string containing words separated by whitespace,
1191     # return the list of words
1192     my ($str) = @_;
1193     return unless $str;
1194     $str =~ s/\s+$//;
1195     $str =~ s/^\s+//;
1196     return split( /\s+/, $str );
1197 }
1198
1199 ###########################################
1200 # CODE SECTION 3: Check and process options
1201 ###########################################
1202
1203 sub check_options {
1204
1205     # This routine is called to check the user-supplied run parameters
1206     # and to configure the control hashes to them.
1207     $rOpts = shift;
1208
1209     initialize_whitespace_hashes();
1210     initialize_bond_strength_hashes();
1211
1212     # This function must be called early to get hashes with grep initialized
1213     initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
1214
1215     # Make needed regex patterns for matching text.
1216     # NOTE: sub_matching_patterns must be made first because later patterns use
1217     # them; see RT #133130.
1218     make_sub_matching_pattern();
1219     make_static_block_comment_pattern();
1220     make_static_side_comment_pattern();
1221     make_closing_side_comment_prefix();
1222     make_closing_side_comment_list_pattern();
1223     $format_skipping_pattern_begin =
1224       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1225     $format_skipping_pattern_end =
1226       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1227     make_non_indenting_brace_pattern();
1228
1229     # If closing side comments ARE selected, then we can safely
1230     # delete old closing side comments unless closing side comment
1231     # warnings are requested.  This is a good idea because it will
1232     # eliminate any old csc's which fall below the line count threshold.
1233     # We cannot do this if warnings are turned on, though, because we
1234     # might delete some text which has been added.  So that must
1235     # be handled when comments are created.  And we cannot do this
1236     # with -io because -csc will be skipped altogether.
1237     if ( $rOpts->{'closing-side-comments'} ) {
1238         if (   !$rOpts->{'closing-side-comment-warnings'}
1239             && !$rOpts->{'indent-only'} )
1240         {
1241             $rOpts->{'delete-closing-side-comments'} = 1;
1242         }
1243     }
1244
1245     # If closing side comments ARE NOT selected, but warnings ARE
1246     # selected and we ARE DELETING csc's, then we will pretend to be
1247     # adding with a huge interval.  This will force the comments to be
1248     # generated for comparison with the old comments, but not added.
1249     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1250         if ( $rOpts->{'delete-closing-side-comments'} ) {
1251             $rOpts->{'delete-closing-side-comments'}  = 0;
1252             $rOpts->{'closing-side-comments'}         = 1;
1253             $rOpts->{'closing-side-comment-interval'} = 100000000;
1254         }
1255     }
1256
1257     make_bli_pattern();
1258     make_bl_pattern();
1259     make_block_brace_vertical_tightness_pattern();
1260     make_blank_line_pattern();
1261     make_keyword_group_list_pattern();
1262
1263     # Make initial list of desired one line block types
1264     # They will be modified by 'prepare_cuddled_block_types'
1265     # NOTE: this line must come after is_sort_map_grep_eval is
1266     # initialized in sub 'initialize_grep_and_friends'
1267     %want_one_line_block = %is_sort_map_grep_eval;
1268
1269     prepare_cuddled_block_types();
1270     if ( $rOpts->{'dump-cuddled-block-list'} ) {
1271         dump_cuddled_block_list(*STDOUT);
1272         Exit(0);
1273     }
1274
1275     # -xlp implies -lp
1276     if ( $rOpts->{'extended-line-up-parentheses'} ) {
1277         $rOpts->{'line-up-parentheses'} ||= 1;
1278     }
1279
1280     if ( $rOpts->{'line-up-parentheses'} ) {
1281
1282         if (   $rOpts->{'indent-only'}
1283             || !$rOpts->{'add-newlines'}
1284             || !$rOpts->{'delete-old-newlines'} )
1285         {
1286             Warn(<<EOM);
1287 -----------------------------------------------------------------------
1288 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1289
1290 The -lp indentation logic requires that perltidy be able to coordinate
1291 arbitrarily large numbers of line breakpoints.  This isn't possible
1292 with these flags.
1293 -----------------------------------------------------------------------
1294 EOM
1295             $rOpts->{'line-up-parentheses'}          = 0;
1296             $rOpts->{'extended-line-up-parentheses'} = 0;
1297         }
1298
1299         if ( $rOpts->{'whitespace-cycle'} ) {
1300             Warn(<<EOM);
1301 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1302 EOM
1303             $rOpts->{'whitespace-cycle'} = 0;
1304         }
1305     }
1306
1307     # At present, tabs are not compatible with the line-up-parentheses style
1308     # (it would be possible to entab the total leading whitespace
1309     # just prior to writing the line, if desired).
1310     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1311         Warn(<<EOM);
1312 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
1313 EOM
1314         $rOpts->{'tabs'} = 0;
1315     }
1316
1317     # Likewise, tabs are not compatible with outdenting..
1318     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1319         Warn(<<EOM);
1320 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1321 EOM
1322         $rOpts->{'tabs'} = 0;
1323     }
1324
1325     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1326         Warn(<<EOM);
1327 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
1328 EOM
1329         $rOpts->{'tabs'} = 0;
1330     }
1331
1332     if ( !$rOpts->{'space-for-semicolon'} ) {
1333         $want_left_space{'f'} = -1;
1334     }
1335
1336     if ( $rOpts->{'space-terminal-semicolon'} ) {
1337         $want_left_space{';'} = 1;
1338     }
1339
1340     # We should put an upper bound on any -sil=n value. Otherwise enormous
1341     # files could be created by mistake.
1342     for ( $rOpts->{'starting-indentation-level'} ) {
1343         if ( $_ && $_ > 100 ) {
1344             Warn(<<EOM);
1345 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1346 EOM
1347             $_ = 0;
1348         }
1349     }
1350
1351     # implement outdenting preferences for keywords
1352     %outdent_keyword = ();
1353     my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1354     unless (@okw) {
1355         @okw = qw(next last redo goto return);    # defaults
1356     }
1357
1358     # FUTURE: if not a keyword, assume that it is an identifier
1359     foreach (@okw) {
1360         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1361             $outdent_keyword{$_} = 1;
1362         }
1363         else {
1364             Warn("ignoring '$_' in -okwl list; not a perl keyword");
1365         }
1366     }
1367
1368     # setup hash for -kpit option
1369     %keyword_paren_inner_tightness = ();
1370     my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1371     if ( defined($kpit_value) && $kpit_value != 1 ) {
1372         my @kpit =
1373           split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1374         unless (@kpit) {
1375             @kpit = qw(if elsif unless while until for foreach);    # defaults
1376         }
1377
1378         # we will allow keywords and user-defined identifiers
1379         foreach (@kpit) {
1380             $keyword_paren_inner_tightness{$_} = $kpit_value;
1381         }
1382     }
1383
1384     # implement user whitespace preferences
1385     if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1386         @want_left_space{@q} = (1) x scalar(@q);
1387     }
1388
1389     if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1390         @want_right_space{@q} = (1) x scalar(@q);
1391     }
1392
1393     if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1394         @want_left_space{@q} = (-1) x scalar(@q);
1395     }
1396
1397     if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1398         @want_right_space{@q} = (-1) x scalar(@q);
1399     }
1400     if ( $rOpts->{'dump-want-left-space'} ) {
1401         dump_want_left_space(*STDOUT);
1402         Exit(0);
1403     }
1404
1405     if ( $rOpts->{'dump-want-right-space'} ) {
1406         dump_want_right_space(*STDOUT);
1407         Exit(0);
1408     }
1409
1410     # default keywords for which space is introduced before an opening paren
1411     # (at present, including them messes up vertical alignment)
1412     my @sak = qw(my local our and or xor err eq ne if else elsif until
1413       unless while for foreach return switch case given when catch);
1414     %space_after_keyword = map { $_ => 1 } @sak;
1415
1416     # first remove any or all of these if desired
1417     if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
1418
1419         # -nsak='*' selects all the above keywords
1420         if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
1421         @space_after_keyword{@q} = (0) x scalar(@q);
1422     }
1423
1424     # then allow user to add to these defaults
1425     if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
1426         @space_after_keyword{@q} = (1) x scalar(@q);
1427     }
1428
1429     # implement user break preferences
1430     my $break_after = sub {
1431         my @toks = @_;
1432         foreach my $tok (@toks) {
1433             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
1434             my $lbs = $left_bond_strength{$tok};
1435             my $rbs = $right_bond_strength{$tok};
1436             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
1437                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1438                   ( $lbs, $rbs );
1439             }
1440         }
1441         return;
1442     };
1443
1444     my $break_before = sub {
1445         my @toks = @_;
1446         foreach my $tok (@toks) {
1447             my $lbs = $left_bond_strength{$tok};
1448             my $rbs = $right_bond_strength{$tok};
1449             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
1450                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1451                   ( $lbs, $rbs );
1452             }
1453         }
1454         return;
1455     };
1456
1457     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
1458     $break_before->(@all_operators)
1459       if ( $rOpts->{'break-before-all-operators'} );
1460
1461     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
1462     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
1463
1464     # make note if breaks are before certain key types
1465     %want_break_before = ();
1466     foreach my $tok ( @all_operators, ',' ) {
1467         $want_break_before{$tok} =
1468           $left_bond_strength{$tok} < $right_bond_strength{$tok};
1469     }
1470
1471     # Coordinate ?/: breaks, which must be similar
1472     if ( !$want_break_before{':'} ) {
1473         $want_break_before{'?'}   = $want_break_before{':'};
1474         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
1475         $left_bond_strength{'?'}  = NO_BREAK;
1476     }
1477
1478     # Only make a hash entry for the next parameters if values are defined.
1479     # That allows a quick check to be made later.
1480     %break_before_container_types = ();
1481     for ( $rOpts->{'break-before-hash-brace'} ) {
1482         $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
1483     }
1484     for ( $rOpts->{'break-before-square-bracket'} ) {
1485         $break_before_container_types{'['} = $_ if $_ && $_ > 0;
1486     }
1487     for ( $rOpts->{'break-before-paren'} ) {
1488         $break_before_container_types{'('} = $_ if $_ && $_ > 0;
1489     }
1490
1491     #--------------------------------------------------------------
1492     # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1493     #--------------------------------------------------------------
1494     # The -vmll and -lp parameters do not really work well together.
1495     # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1496     # NOTE: we could make this more precise by looking at any exclusion
1497     # flags for -lp, and allowing -bbx=2 for excluded types.
1498     if (   $rOpts->{'variable-maximum-line-length'}
1499         && $rOpts->{'ignore-old-breakpoints'}
1500         && $rOpts->{'line-up-parentheses'} )
1501     {
1502         my @changed;
1503         foreach my $key ( keys %break_before_container_types ) {
1504             if ( $break_before_container_types{$key} == 2 ) {
1505                 $break_before_container_types{$key} = 1;
1506                 push @changed, $key;
1507             }
1508         }
1509         if (@changed) {
1510
1511             # we could write a warning here
1512         }
1513     }
1514
1515     #-------------------------------------------------------------------
1516     # The combination -xlp and -vmll can be unstable unless -iscl is set
1517     #-------------------------------------------------------------------
1518     # This is a temporary fix for issue b1302.  See also b1306, b1310.
1519     # FIXME: look for a better fix.
1520     if (   $rOpts->{'variable-maximum-line-length'}
1521         && $rOpts->{'extended-line-up-parentheses'}
1522         && !$rOpts->{'ignore-side-comment-lengths'} )
1523     {
1524         $rOpts->{'ignore-side-comment-lengths'} = 1;
1525
1526         # we could write a warning here
1527     }
1528
1529     #-----------------------------------------------------------
1530     # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1531     #-----------------------------------------------------------
1532     # The -vmll and -lp parameters do not really work well together.
1533     # This is a very crude fix for an unusual parameter combination.
1534     if (   $rOpts->{'variable-maximum-line-length'}
1535         && $rOpts->{'line-up-parentheses'}
1536         && $rOpts->{'continuation-indentation'} < 2 )
1537     {
1538         $rOpts->{'continuation-indentation'} = 2;
1539         ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1540     }
1541
1542     %container_indentation_options = ();
1543     foreach my $pair (
1544         [ 'break-before-hash-brace-and-indent',     '{' ],
1545         [ 'break-before-square-bracket-and-indent', '[' ],
1546         [ 'break-before-paren-and-indent',          '(' ],
1547       )
1548     {
1549         my ( $key, $tok ) = @{$pair};
1550         my $opt = $rOpts->{$key};
1551         if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1552         {
1553
1554             # (1) -lp is not compatable with opt=2, silently set to opt=0
1555             # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1556             if ( $opt == 2 ) {
1557                 if (   $rOpts->{'line-up-parentheses'}
1558                     || $rOpts->{'indent-columns'} ==
1559                     $rOpts->{'continuation-indentation'} )
1560                 {
1561                     $opt = 0;
1562                 }
1563             }
1564             $container_indentation_options{$tok} = $opt;
1565         }
1566     }
1567
1568     # Define here tokens which may follow the closing brace of a do statement
1569     # on the same line, as in:
1570     #   } while ( $something);
1571     my @dof = qw(until while unless if ; : );
1572     push @dof, ',';
1573     @is_do_follower{@dof} = (1) x scalar(@dof);
1574
1575     # What tokens may follow the closing brace of an if or elsif block?
1576     # Not used. Previously used for cuddled else, but no longer needed.
1577     %is_if_brace_follower = ();
1578
1579     # nothing can follow the closing curly of an else { } block:
1580     %is_else_brace_follower = ();
1581
1582     # what can follow a multi-line anonymous sub definition closing curly:
1583     my @asf = qw# ; : => or and  && || ~~ !~~ ) #;
1584     push @asf, ',';
1585     @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
1586
1587     # what can follow a one-line anonymous sub closing curly:
1588     # one-line anonymous subs also have ']' here...
1589     # see tk3.t and PP.pm
1590     my @asf1 = qw#  ; : => or and  && || ) ] ~~ !~~ #;
1591     push @asf1, ',';
1592     @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
1593
1594     # What can follow a closing curly of a block
1595     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
1596     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
1597     my @obf = qw#  ; : => or and  && || ) #;
1598     push @obf, ',';
1599     @is_other_brace_follower{@obf} = (1) x scalar(@obf);
1600
1601     $right_bond_strength{'{'} = WEAK;
1602     $left_bond_strength{'{'}  = VERY_STRONG;
1603
1604     # make -l=0  equal to -l=infinite
1605     if ( !$rOpts->{'maximum-line-length'} ) {
1606         $rOpts->{'maximum-line-length'} = 1000000;
1607     }
1608
1609     # make -lbl=0  equal to -lbl=infinite
1610     if ( !$rOpts->{'long-block-line-count'} ) {
1611         $rOpts->{'long-block-line-count'} = 1000000;
1612     }
1613
1614     my $ole = $rOpts->{'output-line-ending'};
1615     if ($ole) {
1616         my %endings = (
1617             dos  => "\015\012",
1618             win  => "\015\012",
1619             mac  => "\015",
1620             unix => "\012",
1621         );
1622
1623         # Patch for RT #99514, a memoization issue.
1624         # Normally, the user enters one of 'dos', 'win', etc, and we change the
1625         # value in the options parameter to be the corresponding line ending
1626         # character.  But, if we are using memoization, on later passes through
1627         # here the option parameter will already have the desired ending
1628         # character rather than the keyword 'dos', 'win', etc.  So
1629         # we must check to see if conversion has already been done and, if so,
1630         # bypass the conversion step.
1631         my %endings_inverted = (
1632             "\015\012" => 'dos',
1633             "\015\012" => 'win',
1634             "\015"     => 'mac',
1635             "\012"     => 'unix',
1636         );
1637
1638         if ( defined( $endings_inverted{$ole} ) ) {
1639
1640             # we already have valid line ending, nothing more to do
1641         }
1642         else {
1643             $ole = lc $ole;
1644             unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
1645                 my $str = join " ", keys %endings;
1646                 Die(<<EOM);
1647 Unrecognized line ending '$ole'; expecting one of: $str
1648 EOM
1649             }
1650             if ( $rOpts->{'preserve-line-endings'} ) {
1651                 Warn("Ignoring -ple; conflicts with -ole\n");
1652                 $rOpts->{'preserve-line-endings'} = undef;
1653             }
1654         }
1655     }
1656
1657     # hashes used to simplify setting whitespace
1658     %tightness = (
1659         '{' => $rOpts->{'brace-tightness'},
1660         '}' => $rOpts->{'brace-tightness'},
1661         '(' => $rOpts->{'paren-tightness'},
1662         ')' => $rOpts->{'paren-tightness'},
1663         '[' => $rOpts->{'square-bracket-tightness'},
1664         ']' => $rOpts->{'square-bracket-tightness'},
1665     );
1666     %matching_token = (
1667         '{' => '}',
1668         '(' => ')',
1669         '[' => ']',
1670         '?' => ':',
1671     );
1672
1673     if ( $rOpts->{'ignore-old-breakpoints'} ) {
1674
1675         my @conflicts;
1676         if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1677             $rOpts->{'break-at-old-method-breakpoints'} = 0;
1678             push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1679         }
1680         if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1681             $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1682             push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1683         }
1684         if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1685             $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1686             push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1687         }
1688         if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1689             $rOpts->{'keep-old-breakpoints-before'} = "";
1690             push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1691         }
1692         if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1693             $rOpts->{'keep-old-breakpoints-after'} = "";
1694             push @conflicts, '--keep-old-breakpoints-after (-kba)';
1695         }
1696
1697         if (@conflicts) {
1698             my $msg = join( "\n  ",
1699 " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1700                 @conflicts )
1701               . "\n";
1702             Warn($msg);
1703         }
1704
1705         # Note: These additional parameters are made inactive by -iob.
1706         # They are silently turned off here because they are on by default.
1707         # We would generate unexpected warnings if we issued a warning.
1708         $rOpts->{'break-at-old-keyword-breakpoints'}   = 0;
1709         $rOpts->{'break-at-old-logical-breakpoints'}   = 0;
1710         $rOpts->{'break-at-old-ternary-breakpoints'}   = 0;
1711         $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1712     }
1713
1714     %keep_break_before_type = ();
1715     initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1716         'kbb', \%keep_break_before_type );
1717
1718     %keep_break_after_type = ();
1719     initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1720         'kba', \%keep_break_after_type );
1721
1722     #------------------------------------------------------------
1723     # Make global vars for frequently used options for efficiency
1724     #------------------------------------------------------------
1725
1726     $rOpts_add_newlines   = $rOpts->{'add-newlines'};
1727     $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
1728     $rOpts_blank_lines_after_opening_block =
1729       $rOpts->{'blank-lines-after-opening-block'};
1730     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
1731     $rOpts_block_brace_vertical_tightness =
1732       $rOpts->{'block-brace-vertical-tightness'};
1733     $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
1734     $rOpts_break_at_old_attribute_breakpoints =
1735       $rOpts->{'break-at-old-attribute-breakpoints'};
1736     $rOpts_break_at_old_comma_breakpoints =
1737       $rOpts->{'break-at-old-comma-breakpoints'};
1738     $rOpts_break_at_old_keyword_breakpoints =
1739       $rOpts->{'break-at-old-keyword-breakpoints'};
1740     $rOpts_break_at_old_logical_breakpoints =
1741       $rOpts->{'break-at-old-logical-breakpoints'};
1742     $rOpts_break_at_old_semicolon_breakpoints =
1743       $rOpts->{'break-at-old-semicolon-breakpoints'};
1744     $rOpts_break_at_old_ternary_breakpoints =
1745       $rOpts->{'break-at-old-ternary-breakpoints'};
1746     $rOpts_break_open_paren_list = $rOpts->{'break-open-paren-list'};
1747     $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
1748     $rOpts_closing_side_comment_else_flag =
1749       $rOpts->{'closing-side-comment-else-flag'};
1750     $rOpts_closing_side_comment_maximum_text =
1751       $rOpts->{'closing-side-comment-maximum-text'};
1752     $rOpts_comma_arrow_breakpoints  = $rOpts->{'comma-arrow-breakpoints'};
1753     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
1754     $rOpts_delete_closing_side_comments =
1755       $rOpts->{'delete-closing-side-comments'};
1756     $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
1757     $rOpts_extended_continuation_indentation =
1758       $rOpts->{'extended-continuation-indentation'};
1759     $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
1760     $rOpts_format_skipping      = $rOpts->{'format-skipping'};
1761     $rOpts_freeze_whitespace    = $rOpts->{'freeze-whitespace'};
1762     $rOpts_function_paren_vertical_alignment =
1763       $rOpts->{'function-paren-vertical-alignment'};
1764     $rOpts_fuzzy_line_length      = $rOpts->{'fuzzy-line-length'};
1765     $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
1766     $rOpts_ignore_side_comment_lengths =
1767       $rOpts->{'ignore-side-comment-lengths'};
1768     $rOpts_indent_closing_brace     = $rOpts->{'indent-closing-brace'};
1769     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
1770     $rOpts_indent_only              = $rOpts->{'indent-only'};
1771     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
1772     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
1773     $rOpts_extended_line_up_parentheses =
1774       $rOpts->{'extended-line-up-parentheses'};
1775     $rOpts_logical_padding = $rOpts->{'logical-padding'};
1776     $rOpts_maximum_consecutive_blank_lines =
1777       $rOpts->{'maximum-consecutive-blank-lines'};
1778     $rOpts_maximum_fields_per_table  = $rOpts->{'maximum-fields-per-table'};
1779     $rOpts_maximum_line_length       = $rOpts->{'maximum-line-length'};
1780     $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
1781     $rOpts_opening_brace_always_on_right =
1782       $rOpts->{'opening-brace-always-on-right'};
1783     $rOpts_outdent_keywords      = $rOpts->{'outdent-keywords'};
1784     $rOpts_outdent_labels        = $rOpts->{'outdent-labels'};
1785     $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
1786     $rOpts_outdent_long_quotes   = $rOpts->{'outdent-long-quotes'};
1787     $rOpts_outdent_static_block_comments =
1788       $rOpts->{'outdent-static-block-comments'};
1789     $rOpts_recombine = $rOpts->{'recombine'};
1790     $rOpts_short_concatenation_item_length =
1791       $rOpts->{'short-concatenation-item-length'};
1792     $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
1793     $rOpts_static_block_comments     = $rOpts->{'static-block-comments'};
1794     $rOpts_sub_alias_list            = $rOpts->{'sub-alias-list'};
1795     $rOpts_tee_block_comments        = $rOpts->{'tee-block-comments'};
1796     $rOpts_tee_pod                   = $rOpts->{'tee-pod'};
1797     $rOpts_tee_side_comments         = $rOpts->{'tee-side-comments'};
1798     $rOpts_valign                    = $rOpts->{'valign'};
1799     $rOpts_valign_code               = $rOpts->{'valign-code'};
1800     $rOpts_valign_side_comments      = $rOpts->{'valign-side-comments'};
1801     $rOpts_variable_maximum_line_length =
1802       $rOpts->{'variable-maximum-line-length'};
1803
1804     # Note that both opening and closing tokens can access the opening
1805     # and closing flags of their container types.
1806     %opening_vertical_tightness = (
1807         '(' => $rOpts->{'paren-vertical-tightness'},
1808         '{' => $rOpts->{'brace-vertical-tightness'},
1809         '[' => $rOpts->{'square-bracket-vertical-tightness'},
1810         ')' => $rOpts->{'paren-vertical-tightness'},
1811         '}' => $rOpts->{'brace-vertical-tightness'},
1812         ']' => $rOpts->{'square-bracket-vertical-tightness'},
1813     );
1814
1815     %closing_vertical_tightness = (
1816         '(' => $rOpts->{'paren-vertical-tightness-closing'},
1817         '{' => $rOpts->{'brace-vertical-tightness-closing'},
1818         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1819         ')' => $rOpts->{'paren-vertical-tightness-closing'},
1820         '}' => $rOpts->{'brace-vertical-tightness-closing'},
1821         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1822     );
1823
1824     # assume flag for '>' same as ')' for closing qw quotes
1825     %closing_token_indentation = (
1826         ')' => $rOpts->{'closing-paren-indentation'},
1827         '}' => $rOpts->{'closing-brace-indentation'},
1828         ']' => $rOpts->{'closing-square-bracket-indentation'},
1829         '>' => $rOpts->{'closing-paren-indentation'},
1830     );
1831
1832     # flag indicating if any closing tokens are indented
1833     $some_closing_token_indentation =
1834          $rOpts->{'closing-paren-indentation'}
1835       || $rOpts->{'closing-brace-indentation'}
1836       || $rOpts->{'closing-square-bracket-indentation'}
1837       || $rOpts->{'indent-closing-brace'};
1838
1839     %opening_token_right = (
1840         '(' => $rOpts->{'opening-paren-right'},
1841         '{' => $rOpts->{'opening-hash-brace-right'},
1842         '[' => $rOpts->{'opening-square-bracket-right'},
1843     );
1844
1845     %stack_opening_token = (
1846         '(' => $rOpts->{'stack-opening-paren'},
1847         '{' => $rOpts->{'stack-opening-hash-brace'},
1848         '[' => $rOpts->{'stack-opening-square-bracket'},
1849     );
1850
1851     %stack_closing_token = (
1852         ')' => $rOpts->{'stack-closing-paren'},
1853         '}' => $rOpts->{'stack-closing-hash-brace'},
1854         ']' => $rOpts->{'stack-closing-square-bracket'},
1855     );
1856
1857     # Create a table of maximum line length vs level for later efficient use.
1858     # We will make the tables very long to be sure it will not be exceeded.
1859     # But we have to choose a fixed length.  A check will be made at the start
1860     # of sub 'finish_formatting' to be sure it is not exceeded.  Note, some of
1861     # my standard test problems have indentation levels of about 150, so this
1862     # should be fairly large.  If the choice of a maximum level ever becomes
1863     # an issue then these table values could be returned in a sub with a simple
1864     # memoization scheme.
1865
1866     # Also create a table of the maximum spaces available for text due to the
1867     # level only.  If a line has continuation indentation, then that space must
1868     # be subtracted from the table value.  This table is used for preliminary
1869     # estimates in welding, extended_ci, BBX, and marking short blocks.
1870     my $level_max = 1000;
1871
1872     # The basic scheme:
1873     foreach my $level ( 0 .. $level_max ) {
1874         my $indent = $level * $rOpts_indent_columns;
1875         $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
1876         $maximum_text_length_at_level[$level] =
1877           $rOpts_maximum_line_length - $indent;
1878     }
1879
1880     # Correct the maximum_text_length table if the -wc=n flag is used
1881     $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
1882     if ($rOpts_whitespace_cycle) {
1883         if ( $rOpts_whitespace_cycle > 0 ) {
1884             foreach my $level ( 0 .. $level_max ) {
1885                 my $level_mod = $level % $rOpts_whitespace_cycle;
1886                 my $indent    = $level_mod * $rOpts_indent_columns;
1887                 $maximum_text_length_at_level[$level] =
1888                   $rOpts_maximum_line_length - $indent;
1889             }
1890         }
1891         else {
1892             $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
1893         }
1894     }
1895
1896     # Correct the tables if the -vmll flag is used.  These values override the
1897     # previous values.
1898     if ($rOpts_variable_maximum_line_length) {
1899         foreach my $level ( 0 .. $level_max ) {
1900             $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
1901             $maximum_line_length_at_level[$level] =
1902               $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
1903         }
1904     }
1905
1906     # Define two measures of indentation level, alpha and beta, at which some
1907     # formatting features come under stress and need to start shutting down.
1908     # Some combination of the two will be used to shut down different
1909     # formatting features.
1910     # Put a reasonable upper limit on stress level (say 100) in case the
1911     # whitespace-cycle variable is used.
1912     my $stress_level_limit = min( 100, $level_max );
1913
1914     # Find stress_level_alpha, targeted at very short maximum line lengths.
1915     $stress_level_alpha = $stress_level_limit + 1;
1916     foreach my $level_test ( 0 .. $stress_level_limit ) {
1917         my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
1918         my $excess_inside_space =
1919           $max_len -
1920           $rOpts_continuation_indentation -
1921           $rOpts_indent_columns - 8;
1922         if ( $excess_inside_space <= 0 ) {
1923             $stress_level_alpha = $level_test;
1924             last;
1925         }
1926     }
1927
1928     # Find stress level beta, a stress level targeted at formatting
1929     # at deep levels near the maximum line length.  We start increasing
1930     # from zero and stop at the first level which shows no more space.
1931
1932     # 'const' is a fixed number of spaces for a typical variable.
1933     # Cases b1197-b1204 work ok with const=12 but not with const=8
1934     my $const = 16;
1935     my $denom = max( 1, $rOpts_indent_columns );
1936     $stress_level_beta = 0;
1937     foreach my $level ( 0 .. $stress_level_limit ) {
1938         my $remaining_cycles = max(
1939             0,
1940             (
1941                 $maximum_text_length_at_level[$level] -
1942                   $rOpts_continuation_indentation - $const
1943             ) / $denom
1944         );
1945         last if ( $remaining_cycles <= 3 );    # 2 does not work
1946         $stress_level_beta = $level;
1947     }
1948
1949     initialize_weld_nested_exclusion_rules($rOpts);
1950
1951     %line_up_parentheses_control_hash    = ();
1952     $line_up_parentheses_control_is_lxpl = 1;
1953     my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
1954     my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
1955     if ( $lpxl && $lpil ) {
1956         Warn( <<EOM );
1957 You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
1958 EOM
1959     }
1960     if ($lpxl) {
1961         $line_up_parentheses_control_is_lxpl = 1;
1962         initialize_line_up_parentheses_control_hash(
1963             $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
1964     }
1965     elsif ($lpil) {
1966         $line_up_parentheses_control_is_lxpl = 0;
1967         initialize_line_up_parentheses_control_hash(
1968             $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
1969     }
1970
1971     return;
1972 }
1973
1974 use constant ALIGN_GREP_ALIASES => 0;
1975
1976 sub initialize_grep_and_friends {
1977     my ($str) = @_;
1978
1979     # Initialize or re-initialize hashes with 'grep' and grep aliases. This
1980     # must be done after each set of options because new grep aliases may be
1981     # used.
1982
1983     # re-initialize the hash ... this is critical!
1984     %is_sort_map_grep = ();
1985
1986     my @q = qw(sort map grep);
1987     @is_sort_map_grep{@q} = (1) x scalar(@q);
1988
1989     # Note that any 'grep-alias-list' string has been preprocessed to be a
1990     # trimmed, space-separated list.
1991     my @grep_aliases = split /\s+/, $str;
1992     @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
1993
1994     ##@q = qw(sort map grep eval);
1995     %is_sort_map_grep_eval = %is_sort_map_grep;
1996     $is_sort_map_grep_eval{'eval'} = 1;
1997
1998     ##@q = qw(sort map grep eval do);
1999     %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
2000     $is_sort_map_grep_eval_do{'do'} = 1;
2001
2002     # These block types can take ci.  This is used by the -xci option.
2003     # Note that the 'sub' in this list is an anonymous sub.  To be more correct
2004     # we could remove sub and use ASUB pattern to also handle a
2005     # prototype/signature.  But that would slow things down and would probably
2006     # never be useful.
2007     ##@q = qw( do sub eval sort map grep );
2008     %is_block_with_ci = %is_sort_map_grep_eval_do;
2009     $is_block_with_ci{'sub'} = 1;
2010
2011     %is_keyword_returning_list = ();
2012     @q                         = qw(
2013       grep
2014       keys
2015       map
2016       reverse
2017       sort
2018       split
2019     );
2020     push @q, @grep_aliases;
2021     @is_keyword_returning_list{@q} = (1) x scalar(@q);
2022
2023     # This code enables vertical alignment of grep aliases for testing.  It has
2024     # not been found to be beneficial, so it is off by default.  But it is
2025     # useful for precise testing of the grep alias coding.
2026     if (ALIGN_GREP_ALIASES) {
2027         %block_type_map = (
2028             'unless'  => 'if',
2029             'else'    => 'if',
2030             'elsif'   => 'if',
2031             'when'    => 'if',
2032             'default' => 'if',
2033             'case'    => 'if',
2034             'sort'    => 'map',
2035             'grep'    => 'map',
2036         );
2037         foreach (@q) {
2038             $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
2039         }
2040     }
2041     return;
2042 }
2043
2044 sub initialize_weld_nested_exclusion_rules {
2045     my ($rOpts) = @_;
2046     %weld_nested_exclusion_rules = ();
2047
2048     my $opt_name = 'weld-nested-exclusion-list';
2049     my $str      = $rOpts->{$opt_name};
2050     return unless ($str);
2051     $str =~ s/^\s+//;
2052     $str =~ s/\s+$//;
2053     return unless ($str);
2054
2055     # There are four container tokens.
2056     my %token_keys = (
2057         '(' => '(',
2058         '[' => '[',
2059         '{' => '{',
2060         'q' => 'q',
2061     );
2062
2063     # We are parsing an exclusion list for nested welds. The list is a string
2064     # with spaces separating any number of items.  Each item consists of three
2065     # pieces of information:
2066     # <optional position> <optional type> <type of container>
2067     # <     ^ or .      > <    k or K   > <     ( [ {       >
2068
2069     # The last character is the required container type and must be one of:
2070     # ( = paren
2071     # [ = square bracket
2072     # { = brace
2073
2074     # An optional leading position indicator:
2075     # ^ means the leading token position in the weld
2076     # . means a secondary token position in the weld
2077     #   no position indicator means all positions match
2078
2079     # An optional alphanumeric character between the position and container
2080     # token selects to which the rule applies:
2081     # k = any keyword
2082     # K = any non-keyword
2083     # f = function call
2084     # F = not a function call
2085     # w = function or keyword
2086     # W = not a function or keyword
2087     #     no letter means any preceding type matches
2088
2089     # Examples:
2090     # ^(  - the weld must not start with a paren
2091     # .(  - the second and later tokens may not be parens
2092     # (   - no parens in weld
2093     # ^K(  - exclude a leading paren not preceded by a keyword
2094     # .k(  - exclude a secondary paren preceded by a keyword
2095     # [ {  - exclude all brackets and braces
2096
2097     my @items = split /\s+/, $str;
2098     my $msg1;
2099     my $msg2;
2100     foreach my $item (@items) {
2101         my $item_save = $item;
2102         my $tok       = chop($item);
2103         my $key       = $token_keys{$tok};
2104         if ( !defined($key) ) {
2105             $msg1 .= " '$item_save'";
2106             next;
2107         }
2108         if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
2109             $weld_nested_exclusion_rules{$key} = [];
2110         }
2111         my $rflags = $weld_nested_exclusion_rules{$key};
2112
2113         # A 'q' means do not weld quotes
2114         if ( $tok eq 'q' ) {
2115             $rflags->[0] = '*';
2116             $rflags->[1] = '*';
2117             next;
2118         }
2119
2120         my $pos    = '*';
2121         my $select = '*';
2122         if ($item) {
2123             if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
2124                 $pos    = $1 if ($1);
2125                 $select = $2 if ($2);
2126             }
2127             else {
2128                 $msg1 .= " '$item_save'";
2129                 next;
2130             }
2131         }
2132
2133         my $err;
2134         if ( $pos eq '^' || $pos eq '*' ) {
2135             if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
2136                 $err = 1;
2137             }
2138             $rflags->[0] = $select;
2139         }
2140         if ( $pos eq '.' || $pos eq '*' ) {
2141             if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
2142                 $err = 1;
2143             }
2144             $rflags->[1] = $select;
2145         }
2146         if ($err) { $msg2 .= " '$item_save'"; }
2147     }
2148     if ($msg1) {
2149         Warn(<<EOM);
2150 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2151 $msg1
2152 EOM
2153     }
2154     if ($msg2) {
2155         Warn(<<EOM);
2156 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
2157 $msg2
2158 Only the last will be used.
2159 EOM
2160     }
2161     return;
2162 }
2163
2164 sub initialize_line_up_parentheses_control_hash {
2165     my ( $str, $opt_name ) = @_;
2166     return unless ($str);
2167     $str =~ s/^\s+//;
2168     $str =~ s/\s+$//;
2169     return unless ($str);
2170
2171     # The format is space separated items, where each item must consist of a
2172     # string with a token type preceded by an optional text token and followed
2173     # by an integer:
2174     # For example:
2175     #    W(1
2176     #  = (flag1)(key)(flag2), where
2177     #    flag1 = 'W'
2178     #    key = '('
2179     #    flag2 = '1'
2180
2181     my @items = split /\s+/, $str;
2182     my $msg1;
2183     my $msg2;
2184     foreach my $item (@items) {
2185         my $item_save = $item;
2186         my ( $flag1, $key, $flag2 );
2187         if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2188             $flag1 = $1 if $1;
2189             $key   = $2 if $2;
2190             $flag2 = $3 if $3;
2191         }
2192         else {
2193             $msg1 .= " '$item_save'";
2194             next;
2195         }
2196
2197         if ( !defined($key) ) {
2198             $msg1 .= " '$item_save'";
2199             next;
2200         }
2201
2202         # Check for valid flag1
2203         if    ( !defined($flag1) ) { $flag1 = '*' }
2204         elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2205             $msg1 .= " '$item_save'";
2206             next;
2207         }
2208
2209         # Check for valid flag2
2210         # 0 or blank: ignore container contents
2211         # 1 all containers with sublists match
2212         # 2 all containers with sublists, code blocks or ternary operators match
2213         # ... this could be extended in the future
2214         if    ( !defined($flag2) ) { $flag2 = 0 }
2215         elsif ( $flag2 !~ /^[012]$/ ) {
2216             $msg1 .= " '$item_save'";
2217             next;
2218         }
2219
2220         if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2221             $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2222             next;
2223         }
2224
2225         # check for multiple conflicting specifications
2226         my $rflags = $line_up_parentheses_control_hash{$key};
2227         my $err;
2228         if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2229             $err = 1;
2230             $rflags->[0] = $flag1;
2231         }
2232         if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2233             $err = 1;
2234             $rflags->[1] = $flag2;
2235         }
2236         $msg2 .= " '$item_save'" if ($err);
2237         next;
2238     }
2239     if ($msg1) {
2240         Warn(<<EOM);
2241 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2242 $msg1
2243 EOM
2244     }
2245     if ($msg2) {
2246         Warn(<<EOM);
2247 Multiple specifications were encountered in the $opt_name at:
2248 $msg2
2249 Only the last will be used.
2250 EOM
2251     }
2252
2253     # Speedup: we can turn off -lp if it is not actually used
2254     if ($line_up_parentheses_control_is_lxpl) {
2255         my $all_off = 1;
2256         foreach my $key (qw# ( { [ #) {
2257             my $rflags = $line_up_parentheses_control_hash{$key};
2258             if ( defined($rflags) ) {
2259                 my ( $flag1, $flag2 ) = @{$rflags};
2260                 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
2261                 if ($flag2)                    { $all_off = 0; last }
2262             }
2263         }
2264         if ($all_off) {
2265             $rOpts->{'line-up-parentheses'} = "";
2266         }
2267     }
2268
2269     return;
2270 }
2271
2272 use constant DEBUG_KB => 0;
2273
2274 sub initialize_keep_old_breakpoints {
2275     my ( $str, $short_name, $rkeep_break_hash ) = @_;
2276     return unless $str;
2277
2278     my %flags = ();
2279     my @list  = split_words($str);
2280     if ( DEBUG_KB && @list ) {
2281         local $" = ' ';
2282         print <<EOM;
2283 DEBUG_KB entering for '$short_name' with str=$str\n";
2284 list is: @list;
2285 EOM
2286     }
2287
2288     # - pull out any any leading container code, like f( or *{
2289     foreach (@list) {
2290         if ( $_ =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2291             $_ = $2;
2292             $flags{$2} = $1;
2293         }
2294     }
2295
2296     my @unknown_types;
2297     foreach my $type (@list) {
2298         if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2299             push @unknown_types, $type;
2300         }
2301     }
2302
2303     if (@unknown_types) {
2304         my $num = @unknown_types;
2305         local $" = ' ';
2306         Warn(<<EOM);
2307 $num unrecognized token types were input with --$short_name :
2308 @unknown_types
2309 EOM
2310     }
2311
2312     @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
2313
2314     foreach my $key ( keys %flags ) {
2315         my $flag = $flags{$key};
2316
2317         if ( length($flag) != 1 ) {
2318             Warn(<<EOM);
2319 Multiple entries given for '$key' in '$short_name'
2320 EOM
2321         }
2322         elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2323             Warn(<<EOM);
2324 Unknown flag '$flag' given for '$key' in '$short_name'
2325 EOM
2326         }
2327         elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2328             Warn(<<EOM);
2329 Unknown flag '$flag' given for '$key' in '$short_name'
2330 EOM
2331         }
2332
2333         $rkeep_break_hash->{$key} = $flag;
2334     }
2335
2336     # Temporary patch and warning during changeover from using type to token for
2337     # containers .  This can be eliminated after one or two future releases.
2338     if (   $rkeep_break_hash->{'{'}
2339         && $rkeep_break_hash->{'{'} eq '1'
2340         && !$rkeep_break_hash->{'('}
2341         && !$rkeep_break_hash->{'['} )
2342     {
2343         $rkeep_break_hash->{'('} = 1;
2344         $rkeep_break_hash->{'['} = 1;
2345         Warn(<<EOM);
2346 Sorry, but the format for the -kbb and -kba flags is changing a little.
2347 You entered '{' which currently matches '{' '(' and '[',
2348 but in the future it will only match '{'.
2349 To prevent this message please do one of the following:
2350   use '{ ( [' if you want to match all opening containers, or
2351   use '(' or '[' to match just those containers, or
2352   use '*{' to match only opening braces
2353 EOM
2354     }
2355
2356     if (   $rkeep_break_hash->{'}'}
2357         && $rkeep_break_hash->{'}'} eq '1'
2358         && !$rkeep_break_hash->{')'}
2359         && !$rkeep_break_hash->{']'} )
2360     {
2361         $rkeep_break_hash->{'('} = 1;
2362         $rkeep_break_hash->{'['} = 1;
2363         Warn(<<EOM);
2364 Sorry, but the format for the -kbb and -kba flags is changing a little.
2365 You entered '}' which currently matches each of '}' ')' and ']',
2366 but in the future it will only match '}'.
2367 To prevent this message please do one of the following:
2368   use '} ) ]' if you want to match all closing containers, or
2369   use ')' or ']' to match just those containers, or
2370   use '*}' to match only closing braces
2371 EOM
2372     }
2373
2374     if ( DEBUG_KB && @list ) {
2375         my @tmp = %flags;
2376         local $" = ' ';
2377         print <<EOM;
2378
2379 DEBUG_KB -$short_name flag: $str
2380 final keys:  @list
2381 special flags:  @tmp
2382 EOM
2383
2384     }
2385
2386     return;
2387
2388 }
2389
2390 sub initialize_whitespace_hashes {
2391
2392     # This is called once before formatting begins to initialize these global
2393     # hashes, which control the use of whitespace around tokens:
2394     #
2395     # %binary_ws_rules
2396     # %want_left_space
2397     # %want_right_space
2398     # %space_after_keyword
2399     #
2400     # Many token types are identical to the tokens themselves.
2401     # See the tokenizer for a complete list. Here are some special types:
2402     #   k = perl keyword
2403     #   f = semicolon in for statement
2404     #   m = unary minus
2405     #   p = unary plus
2406     # Note that :: is excluded since it should be contained in an identifier
2407     # Note that '->' is excluded because it never gets space
2408     # parentheses and brackets are excluded since they are handled specially
2409     # curly braces are included but may be overridden by logic, such as
2410     # newline logic.
2411
2412     # NEW_TOKENS: create a whitespace rule here.  This can be as
2413     # simple as adding your new letter to @spaces_both_sides, for
2414     # example.
2415
2416     my @opening_type = qw< L { ( [ >;
2417     @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
2418
2419     my @closing_type = qw< R } ) ] >;
2420     @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
2421
2422     my @spaces_both_sides = qw#
2423       + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2424       .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2425       &&= ||= //= <=> A k f w F n C Y U G v
2426       #;
2427
2428     my @spaces_left_side = qw<
2429       t ! ~ m p { \ h pp mm Z j
2430     >;
2431     push( @spaces_left_side, '#' );    # avoids warning message
2432
2433     my @spaces_right_side = qw<
2434       ; } ) ] R J ++ -- **=
2435     >;
2436     push( @spaces_right_side, ',' );    # avoids warning message
2437
2438     %want_left_space  = ();
2439     %want_right_space = ();
2440     %binary_ws_rules  = ();
2441
2442     # Note that we setting defaults here.  Later in processing
2443     # the values of %want_left_space and  %want_right_space
2444     # may be overridden by any user settings specified by the
2445     # -wls and -wrs parameters.  However the binary_whitespace_rules
2446     # are hardwired and have priority.
2447     @want_left_space{@spaces_both_sides} =
2448       (1) x scalar(@spaces_both_sides);
2449     @want_right_space{@spaces_both_sides} =
2450       (1) x scalar(@spaces_both_sides);
2451     @want_left_space{@spaces_left_side} =
2452       (1) x scalar(@spaces_left_side);
2453     @want_right_space{@spaces_left_side} =
2454       (-1) x scalar(@spaces_left_side);
2455     @want_left_space{@spaces_right_side} =
2456       (-1) x scalar(@spaces_right_side);
2457     @want_right_space{@spaces_right_side} =
2458       (1) x scalar(@spaces_right_side);
2459     $want_left_space{'->'}      = WS_NO;
2460     $want_right_space{'->'}     = WS_NO;
2461     $want_left_space{'**'}      = WS_NO;
2462     $want_right_space{'**'}     = WS_NO;
2463     $want_right_space{'CORE::'} = WS_NO;
2464
2465     # These binary_ws_rules are hardwired and have priority over the above
2466     # settings.  It would be nice to allow adjustment by the user,
2467     # but it would be complicated to specify.
2468     #
2469     # hash type information must stay tightly bound
2470     # as in :  ${xxxx}
2471     $binary_ws_rules{'i'}{'L'} = WS_NO;
2472     $binary_ws_rules{'i'}{'{'} = WS_YES;
2473     $binary_ws_rules{'k'}{'{'} = WS_YES;
2474     $binary_ws_rules{'U'}{'{'} = WS_YES;
2475     $binary_ws_rules{'i'}{'['} = WS_NO;
2476     $binary_ws_rules{'R'}{'L'} = WS_NO;
2477     $binary_ws_rules{'R'}{'{'} = WS_NO;
2478     $binary_ws_rules{'t'}{'L'} = WS_NO;
2479     $binary_ws_rules{'t'}{'{'} = WS_NO;
2480     $binary_ws_rules{'t'}{'='} = WS_OPTIONAL;    # for signatures; fixes b1123
2481     $binary_ws_rules{'}'}{'L'} = WS_NO;
2482     $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL;    # RT#129850; was WS_NO
2483     $binary_ws_rules{'$'}{'L'} = WS_NO;
2484     $binary_ws_rules{'$'}{'{'} = WS_NO;
2485     $binary_ws_rules{'@'}{'L'} = WS_NO;
2486     $binary_ws_rules{'@'}{'{'} = WS_NO;
2487     $binary_ws_rules{'='}{'L'} = WS_YES;
2488     $binary_ws_rules{'J'}{'J'} = WS_YES;
2489
2490     # the following includes ') {'
2491     # as in :    if ( xxx ) { yyy }
2492     $binary_ws_rules{']'}{'L'} = WS_NO;
2493     $binary_ws_rules{']'}{'{'} = WS_NO;
2494     $binary_ws_rules{')'}{'{'} = WS_YES;
2495     $binary_ws_rules{')'}{'['} = WS_NO;
2496     $binary_ws_rules{']'}{'['} = WS_NO;
2497     $binary_ws_rules{']'}{'{'} = WS_NO;
2498     $binary_ws_rules{'}'}{'['} = WS_NO;
2499     $binary_ws_rules{'R'}{'['} = WS_NO;
2500
2501     $binary_ws_rules{']'}{'++'} = WS_NO;
2502     $binary_ws_rules{']'}{'--'} = WS_NO;
2503     $binary_ws_rules{')'}{'++'} = WS_NO;
2504     $binary_ws_rules{')'}{'--'} = WS_NO;
2505
2506     $binary_ws_rules{'R'}{'++'} = WS_NO;
2507     $binary_ws_rules{'R'}{'--'} = WS_NO;
2508
2509     $binary_ws_rules{'i'}{'Q'} = WS_YES;
2510     $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
2511
2512     $binary_ws_rules{'i'}{'('} = WS_NO;
2513
2514     $binary_ws_rules{'w'}{'('} = WS_NO;
2515     $binary_ws_rules{'w'}{'{'} = WS_YES;
2516     return;
2517
2518 } ## end initialize_whitespace_hashes
2519
2520 # The following hash is used to skip over needless if tests.
2521 # Be sure to update it when adding new checks in its block.
2522 my %is_special_ws_type;
2523
2524 BEGIN {
2525     my @q = qw(k w i C m - Q);
2526     push @q, '#';
2527     @is_special_ws_type{@q} = (1) x scalar(@q);
2528 }
2529
2530 use constant DEBUG_WHITE => 0;
2531
2532 sub set_whitespace_flags {
2533
2534     # This routine is called once per file to set whitespace flags for that
2535     # file.  This routine examines each pair of nonblank tokens and sets a flag
2536     # indicating if white space is needed.
2537     #
2538     # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2539     # BEFORE token $j is needed, with the following values:
2540     #
2541     #             WS_NO      = -1 do not want a space BEFORE token $j
2542     #             WS_OPTIONAL=  0 optional space or $j is a whitespace
2543     #             WS_YES     =  1 want a space BEFORE token $j
2544     #
2545
2546     my $self = shift;
2547
2548     my $rLL                  = $self->[_rLL_];
2549     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2550     my $jmax                 = @{$rLL} - 1;
2551
2552     my $rOpts_space_keyword_paren   = $rOpts->{'space-keyword-paren'};
2553     my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2554     my $rOpts_space_function_paren  = $rOpts->{'space-function-paren'};
2555
2556     my $rwhitespace_flags       = [];
2557     my $ris_function_call_paren = {};
2558
2559     return $rwhitespace_flags if ( $jmax < 0 );
2560
2561     my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2562
2563     my ( $rtokh,      $token,      $type );
2564     my ( $rtokh_last, $last_token, $last_type );
2565
2566     my $j_tight_closing_paren = -1;
2567
2568     $rtokh = [ @{ $rLL->[0] } ];
2569     $token = ' ';
2570     $type  = 'b';
2571
2572     $rtokh->[_TOKEN_]         = $token;
2573     $rtokh->[_TYPE_]          = $type;
2574     $rtokh->[_TYPE_SEQUENCE_] = '';
2575     $rtokh->[_LINE_INDEX_]    = 0;
2576
2577     my ($ws);
2578
2579     # This is some logic moved to a sub to avoid deep nesting of if stmts
2580     my $ws_in_container = sub {
2581
2582         my ($j) = @_;
2583         my $ws = WS_YES;
2584         if ( $j + 1 > $jmax ) { return (WS_NO) }
2585
2586         # Patch to count '-foo' as single token so that
2587         # each of  $a{-foo} and $a{foo} and $a{'foo'} do
2588         # not get spaces with default formatting.
2589         my $j_here = $j;
2590         ++$j_here
2591           if ( $token eq '-'
2592             && $last_token eq '{'
2593             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
2594
2595         # Patch to count a sign separated from a number as a single token, as
2596         # in the following line. Otherwise, it takes two steps to converge:
2597         #    deg2rad(-  0.5)
2598         if (   ( $type eq 'm' || $type eq 'p' )
2599             && $j < $jmax + 1
2600             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
2601             && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
2602             && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
2603         {
2604             $j_here = $j + 2;
2605         }
2606
2607         # $j_next is where a closing token should be if
2608         # the container has a single token
2609         if ( $j_here + 1 > $jmax ) { return (WS_NO) }
2610         my $j_next =
2611           ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
2612           ? $j_here + 2
2613           : $j_here + 1;
2614
2615         if ( $j_next > $jmax ) { return WS_NO }
2616         my $tok_next  = $rLL->[$j_next]->[_TOKEN_];
2617         my $type_next = $rLL->[$j_next]->[_TYPE_];
2618
2619         # for tightness = 1, if there is just one token
2620         # within the matching pair, we will keep it tight
2621         if (
2622             $tok_next eq $matching_token{$last_token}
2623
2624             # but watch out for this: [ [ ]    (misc.t)
2625             && $last_token ne $token
2626
2627             # double diamond is usually spaced
2628             && $token ne '<<>>'
2629
2630           )
2631         {
2632
2633             # remember where to put the space for the closing paren
2634             $j_tight_closing_paren = $j_next;
2635             return (WS_NO);
2636         }
2637         return (WS_YES);
2638     };
2639
2640     # Local hashes to set spaces around container tokens according to their
2641     # sequence numbers.  These are set as keywords are examined.
2642     # They are controlled by the -kpit and -kpitl flags.
2643     my %opening_container_inside_ws;
2644     my %closing_container_inside_ws;
2645     my $set_container_ws_by_keyword = sub {
2646
2647         return unless (%keyword_paren_inner_tightness);
2648
2649         my ( $word, $sequence_number ) = @_;
2650
2651         # We just saw a keyword (or other function name) followed by an opening
2652         # paren. Now check to see if the following paren should have special
2653         # treatment for its inside space.  If so we set a hash value using the
2654         # sequence number as key.
2655         if ( $word && $sequence_number ) {
2656             my $tightness = $keyword_paren_inner_tightness{$word};
2657             if ( defined($tightness) && $tightness != 1 ) {
2658                 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
2659                 $opening_container_inside_ws{$sequence_number} = $ws_flag;
2660                 $closing_container_inside_ws{$sequence_number} = $ws_flag;
2661             }
2662         }
2663         return;
2664     };
2665
2666     my ( $ws_1, $ws_2, $ws_3, $ws_4 );
2667
2668     # main loop over all tokens to define the whitespace flags
2669     for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
2670
2671         if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
2672             $rwhitespace_flags->[$j] = WS_OPTIONAL;
2673             next;
2674         }
2675
2676         $rtokh_last = $rtokh;
2677         $last_token = $token;
2678         $last_type  = $type;
2679
2680         $rtokh = $rLL->[$j];
2681         $token = $rtokh->[_TOKEN_];
2682         $type  = $rtokh->[_TYPE_];
2683
2684         $ws = undef;
2685
2686         #---------------------------------------------------------------
2687         # Whitespace Rules Section 1:
2688         # Handle space on the inside of opening braces.
2689         #---------------------------------------------------------------
2690
2691         #    /^[L\{\(\[]$/
2692         if ( $is_opening_type{$last_type} ) {
2693
2694             my $seqno           = $rtokh->[_TYPE_SEQUENCE_];
2695             my $block_type      = $rblock_type_of_seqno->{$seqno};
2696             my $last_seqno      = $rtokh_last->[_TYPE_SEQUENCE_];
2697             my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
2698
2699             $j_tight_closing_paren = -1;
2700
2701             # let us keep empty matched braces together: () {} []
2702             # except for BLOCKS
2703             if ( $token eq $matching_token{$last_token} ) {
2704                 if ($block_type) {
2705                     $ws = WS_YES;
2706                 }
2707                 else {
2708                     $ws = WS_NO;
2709                 }
2710             }
2711             else {
2712
2713                 # we're considering the right of an opening brace
2714                 # tightness = 0 means always pad inside with space
2715                 # tightness = 1 means pad inside if "complex"
2716                 # tightness = 2 means never pad inside with space
2717
2718                 my $tightness;
2719                 if (   $last_type eq '{'
2720                     && $last_token eq '{'
2721                     && $last_block_type )
2722                 {
2723                     $tightness = $rOpts_block_brace_tightness;
2724                 }
2725                 else { $tightness = $tightness{$last_token} }
2726
2727                #=============================================================
2728                # Patch for test problem <<snippets/fabrice_bug.in>>
2729                # We must always avoid spaces around a bare word beginning
2730                # with ^ as in:
2731                #    my $before = ${^PREMATCH};
2732                # Because all of the following cause an error in perl:
2733                #    my $before = ${ ^PREMATCH };
2734                #    my $before = ${ ^PREMATCH};
2735                #    my $before = ${^PREMATCH };
2736                # So if brace tightness flag is -bt=0 we must temporarily reset
2737                # to bt=1.  Note that here we must set tightness=1 and not 2 so
2738                # that the closing space
2739                # is also avoided (via the $j_tight_closing_paren flag in coding)
2740                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2741
2742                 #=============================================================
2743
2744                 if ( $tightness <= 0 ) {
2745                     $ws = WS_YES;
2746                 }
2747                 elsif ( $tightness > 1 ) {
2748                     $ws = WS_NO;
2749                 }
2750                 else {
2751                     $ws = $ws_in_container->($j);
2752                 }
2753             }
2754
2755             # check for special cases which override the above rules
2756             if ( %opening_container_inside_ws && $last_seqno ) {
2757                 my $ws_override = $opening_container_inside_ws{$last_seqno};
2758                 if ($ws_override) { $ws = $ws_override }
2759             }
2760
2761             $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
2762               if DEBUG_WHITE;
2763
2764         } ## end setting space flag inside opening tokens
2765
2766         #---------------------------------------------------------------
2767         # Whitespace Rules Section 2:
2768         # Handle space on inside of closing brace pairs.
2769         #---------------------------------------------------------------
2770
2771         #   /[\}\)\]R]/
2772         if ( $is_closing_type{$type} ) {
2773
2774             my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2775             if ( $j == $j_tight_closing_paren ) {
2776
2777                 $j_tight_closing_paren = -1;
2778                 $ws                    = WS_NO;
2779             }
2780             else {
2781
2782                 if ( !defined($ws) ) {
2783
2784                     my $tightness;
2785                     my $block_type = $rblock_type_of_seqno->{$seqno};
2786                     if ( $type eq '}' && $token eq '}' && $block_type ) {
2787                         $tightness = $rOpts_block_brace_tightness;
2788                     }
2789                     else { $tightness = $tightness{$token} }
2790
2791                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2792                 }
2793             }
2794
2795             # check for special cases which override the above rules
2796             if ( %closing_container_inside_ws && $seqno ) {
2797                 my $ws_override = $closing_container_inside_ws{$seqno};
2798                 if ($ws_override) { $ws = $ws_override }
2799             }
2800
2801             $ws_4 = $ws_3 = $ws_2 = $ws
2802               if DEBUG_WHITE;
2803         } ## end setting space flag inside closing tokens
2804
2805         #---------------------------------------------------------------
2806         # Whitespace Rules Section 3:
2807         # Handle some special cases.
2808         #---------------------------------------------------------------
2809
2810         #    /^[L\{\(\[]$/
2811         elsif ( $is_opening_type{$type} ) {
2812
2813             if ( $token eq '(' ) {
2814
2815                 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2816
2817               # This will have to be tweaked as tokenization changes.
2818               # We usually want a space at '} (', for example:
2819               # <<snippets/space1.in>>
2820               #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2821               #
2822               # But not others:
2823               #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2824               # At present, the above & block is marked as type L/R so this case
2825               # won't go through here.
2826                 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
2827
2828                # NOTE: some older versions of Perl had occasional problems if
2829                # spaces are introduced between keywords or functions and opening
2830                # parens.  So the default is not to do this except is certain
2831                # cases.  The current Perl seems to tolerate spaces.
2832
2833                 # Space between keyword and '('
2834                 elsif ( $last_type eq 'k' ) {
2835                     $ws = WS_NO
2836                       unless ( $rOpts_space_keyword_paren
2837                         || $space_after_keyword{$last_token} );
2838
2839                     # Set inside space flag if requested
2840                     $set_container_ws_by_keyword->( $last_token, $seqno );
2841                 }
2842
2843                 # Space between function and '('
2844                 # -----------------------------------------------------
2845                 # 'w' and 'i' checks for something like:
2846                 #   myfun(    &myfun(   ->myfun(
2847                 # -----------------------------------------------------
2848
2849               # Note that at this point an identifier may still have a leading
2850               # arrow, but the arrow will be split off during token respacing.
2851               # After that, the token may become a bare word without leading
2852               # arrow.  The point is, it is best to mark function call parens
2853               # right here before that happens.
2854               # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
2855               # NOTE: this would be the place to allow spaces between repeated
2856               # parens, like () () (), as in case c017, but I decided that would
2857               # not be a good idea.
2858                 elsif (
2859                        ( $last_type =~ /^[wCUG]$/ )
2860                     || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ )
2861                   )
2862                 {
2863                     $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
2864                     $set_container_ws_by_keyword->( $last_token, $seqno );
2865                     $ris_function_call_paren->{$seqno} = 1;
2866                 }
2867
2868                # space between something like $i and ( in <<snippets/space2.in>>
2869                # for $i ( 0 .. 20 ) {
2870                # FIXME: eventually, type 'i' could be split into multiple
2871                # token types so this can be a hardwired rule.
2872                 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2873                     $ws = WS_YES;
2874                 }
2875
2876                 # allow constant function followed by '()' to retain no space
2877                 elsif ($last_type eq 'C'
2878                     && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2879                 {
2880                     $ws = WS_NO;
2881                 }
2882             }
2883
2884             # patch for SWITCH/CASE: make space at ']{' optional
2885             # since the '{' might begin a case or when block
2886             elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2887                 $ws = WS_OPTIONAL;
2888             }
2889
2890             # keep space between 'sub' and '{' for anonymous sub definition
2891             if ( $type eq '{' ) {
2892                 if ( $last_token eq 'sub' ) {
2893                     $ws = WS_YES;
2894                 }
2895
2896                 # this is needed to avoid no space in '){'
2897                 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
2898
2899                 # avoid any space before the brace or bracket in something like
2900                 #  @opts{'a','b',...}
2901                 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
2902                     $ws = WS_NO;
2903                 }
2904             }
2905         } ## end if ( $is_opening_type{$type} ) {
2906
2907         # Special checks for certain other types ...
2908         # the hash '%is_special_ws_type' significantly speeds up this routine,
2909         # but be sure to update it if a new check is added.
2910         # Currently has types: qw(k w i C m - Q #)
2911         elsif ( $is_special_ws_type{$type} ) {
2912             if ( $type eq 'i' ) {
2913
2914                 # never a space before ->
2915                 if ( substr( $token, 0, 2 ) eq '->' ) {
2916                     $ws = WS_NO;
2917                 }
2918             }
2919
2920             elsif ( $type eq 'k' ) {
2921
2922                 # Keywords 'for', 'foreach' are special cases for -kpit since
2923                 # the opening paren does not always immediately follow the
2924                 # keyword. So we have to search forward for the paren in this
2925                 # case.  I have limited the search to 10 tokens ahead, just in
2926                 # case somebody has a big file and no opening paren.  This
2927                 # should be enough for all normal code. Added the level check
2928                 # to fix b1236.
2929                 if (   $is_for_foreach{$token}
2930                     && %keyword_paren_inner_tightness
2931                     && defined( $keyword_paren_inner_tightness{$token} )
2932                     && $j < $jmax )
2933                 {
2934                     my $level = $rLL->[$j]->[_LEVEL_];
2935                     my $jp    = $j;
2936                     for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
2937                         $jp++;
2938                         last if ( $jp > $jmax );
2939                         last if ( $rLL->[$jp]->[_LEVEL_] != $level );    # b1236
2940                         next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
2941                         my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
2942                         $set_container_ws_by_keyword->( $token, $seqno_p );
2943                         last;
2944                     }
2945                 }
2946             }
2947
2948             # retain any space between '-' and bare word
2949             elsif ( $type eq 'w' || $type eq 'C' ) {
2950                 $ws = WS_OPTIONAL if $last_type eq '-';
2951
2952                 # never a space before ->
2953                 if ( substr( $token, 0, 2 ) eq '->' ) {
2954                     $ws = WS_NO;
2955                 }
2956             }
2957
2958             # retain any space between '-' and bare word; for example
2959             # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2960             #   $myhash{USER-NAME}='steve';
2961             elsif ( $type eq 'm' || $type eq '-' ) {
2962                 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2963             }
2964
2965             # always space before side comment
2966             elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2967
2968             # space_backslash_quote; RT #123774  <<snippets/rt123774.in>>
2969             # allow a space between a backslash and single or double quote
2970             # to avoid fooling html formatters
2971             elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
2972             {
2973                 if ($rOpts_space_backslash_quote) {
2974                     if ( $rOpts_space_backslash_quote == 1 ) {
2975                         $ws = WS_OPTIONAL;
2976                     }
2977                     elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2978                     else { }    # shouldnt happen
2979                 }
2980                 else {
2981                     $ws = WS_NO;
2982                 }
2983             }
2984         } ## end elsif ( $is_special_ws_type{$type} ...
2985
2986         # always preserver whatever space was used after a possible
2987         # filehandle (except _) or here doc operator
2988         if (
2989             $type ne '#'
2990             && ( ( $last_type eq 'Z' && $last_token ne '_' )
2991                 || $last_type eq 'h' )
2992           )
2993         {
2994             $ws = WS_OPTIONAL;
2995         }
2996
2997         $ws_4 = $ws_3 = $ws
2998           if DEBUG_WHITE;
2999
3000         if ( !defined($ws) ) {
3001
3002             #---------------------------------------------------------------
3003             # Whitespace Rules Section 4:
3004             # Use the binary rule table.
3005             #---------------------------------------------------------------
3006             $ws   = $binary_ws_rules{$last_type}{$type};
3007             $ws_4 = $ws if DEBUG_WHITE;
3008
3009             #---------------------------------------------------------------
3010             # Whitespace Rules Section 5:
3011             # Apply default rules not covered above.
3012             #---------------------------------------------------------------
3013
3014            # If we fall through to here, look at the pre-defined hash tables for
3015            # the two tokens, and:
3016            #  if (they are equal) use the common value
3017            #  if (either is zero or undef) use the other
3018            #  if (either is -1) use it
3019            # That is,
3020            # left  vs right
3021            #  1    vs    1     -->  1
3022            #  0    vs    0     -->  0
3023            # -1    vs   -1     --> -1
3024            #
3025            #  0    vs   -1     --> -1
3026            #  0    vs    1     -->  1
3027            #  1    vs    0     -->  1
3028            # -1    vs    0     --> -1
3029            #
3030            # -1    vs    1     --> -1
3031            #  1    vs   -1     --> -1
3032             if ( !defined($ws) ) {
3033                 my $wl = $want_left_space{$type};
3034                 my $wr = $want_right_space{$last_type};
3035                 if ( !defined($wl) ) {
3036                     $ws = defined($wr) ? $wr : 0;
3037                 }
3038                 elsif ( !defined($wr) ) {
3039                     $ws = $wl;
3040                 }
3041                 else {
3042                     $ws =
3043                       ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3044                 }
3045             }
3046         }
3047
3048         # Treat newline as a whitespace. Otherwise, we might combine
3049         # 'Send' and '-recipients' here according to the above rules:
3050         # <<snippets/space3.in>>
3051         #    my $msg = new Fax::Send
3052         #      -recipients => $to,
3053         #      -data => $data;
3054         if (   $ws == 0
3055             && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3056         {
3057             $ws = 1;
3058         }
3059
3060         $rwhitespace_flags->[$j] = $ws;
3061
3062         if (DEBUG_WHITE) {
3063             my $str = substr( $last_token, 0, 15 );
3064             $str .= ' ' x ( 16 - length($str) );
3065             if ( !defined($ws_1) ) { $ws_1 = "*" }
3066             if ( !defined($ws_2) ) { $ws_2 = "*" }
3067             if ( !defined($ws_3) ) { $ws_3 = "*" }
3068             if ( !defined($ws_4) ) { $ws_4 = "*" }
3069             print STDOUT
3070 "NEW WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3071
3072             # reset for next pass
3073             $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3074         }
3075     } ## end main loop
3076
3077     if ( $rOpts->{'tight-secret-operators'} ) {
3078         new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3079     }
3080     $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3081     return $rwhitespace_flags;
3082
3083 } ## end sub set_whitespace_flags
3084
3085 sub dump_want_left_space {
3086     my $fh = shift;
3087     local $" = "\n";
3088     $fh->print(<<EOM);
3089 These values are the main control of whitespace to the left of a token type;
3090 They may be altered with the -wls parameter.
3091 For a list of token types, use perltidy --dump-token-types (-dtt)
3092  1 means the token wants a space to its left
3093 -1 means the token does not want a space to its left
3094 ------------------------------------------------------------------------
3095 EOM
3096     foreach my $key ( sort keys %want_left_space ) {
3097         $fh->print("$key\t$want_left_space{$key}\n");
3098     }
3099     return;
3100 }
3101
3102 sub dump_want_right_space {
3103     my $fh = shift;
3104     local $" = "\n";
3105     $fh->print(<<EOM);
3106 These values are the main control of whitespace to the right of a token type;
3107 They may be altered with the -wrs parameter.
3108 For a list of token types, use perltidy --dump-token-types (-dtt)
3109  1 means the token wants a space to its right
3110 -1 means the token does not want a space to its right
3111 ------------------------------------------------------------------------
3112 EOM
3113     foreach my $key ( sort keys %want_right_space ) {
3114         $fh->print("$key\t$want_right_space{$key}\n");
3115     }
3116     return;
3117 }
3118
3119 {    ## begin closure is_essential_whitespace
3120
3121     my %is_sort_grep_map;
3122     my %is_for_foreach;
3123     my %is_digraph;
3124     my %is_trigraph;
3125     my %essential_whitespace_filter_l1;
3126     my %essential_whitespace_filter_r1;
3127     my %essential_whitespace_filter_l2;
3128     my %essential_whitespace_filter_r2;
3129     my %is_type_with_space_before_bareword;
3130     my %is_special_variable_char;
3131
3132     BEGIN {
3133
3134         my @q;
3135
3136         # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3137         # grep aliases on purpose, since here we are looking parens, not braces
3138         @q = qw(sort grep map);
3139         @is_sort_grep_map{@q} = (1) x scalar(@q);
3140
3141         @q = qw(for foreach);
3142         @is_for_foreach{@q} = (1) x scalar(@q);
3143
3144         @q = qw(
3145           .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
3146           <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3147         );
3148         @is_digraph{@q} = (1) x scalar(@q);
3149
3150         @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3151         @is_trigraph{@q} = (1) x scalar(@q);
3152
3153         # These are used as a speedup filters for sub is_essential_whitespace.
3154
3155         # Filter 1:
3156         # These left side token types USUALLY do not require a space:
3157         @q = qw( ; { } [ ] L R );
3158         push @q, ',';
3159         push @q, ')';
3160         push @q, '(';
3161         @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3162
3163         # BUT some might if followed by these right token types
3164         @q = qw( pp mm << <<= h );
3165         @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3166
3167         # Filter 2:
3168         # These right side filters usually do not require a space
3169         @q = qw( ; ] R } );
3170         push @q, ',';
3171         push @q, ')';
3172         @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3173
3174         # BUT some might if followed by these left token types
3175         @q = qw( h Z );
3176         @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3177
3178         # Keep a space between certain types and any bareword:
3179         # Q: keep a space between a quote and a bareword to prevent the
3180         #    bareword from becoming a quote modifier.
3181         # &: do not remove space between an '&' and a bare word because
3182         #    it may turn into a function evaluation, like here
3183         #    between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3184         #      $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3185         @q = qw( Q & );
3186         @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3187
3188         # These are the only characters which can (currently) form special
3189         # variables, like $^W: (issue c066, c068).
3190         @q =
3191           qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3192         @{is_special_variable_char}{@q} = (1) x scalar(@q);
3193
3194     }
3195
3196     sub is_essential_whitespace {
3197
3198         # Essential whitespace means whitespace which cannot be safely deleted
3199         # without risking the introduction of a syntax error.
3200         # We are given three tokens and their types:
3201         # ($tokenl, $typel) is the token to the left of the space in question
3202         # ($tokenr, $typer) is the token to the right of the space in question
3203         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3204         #
3205         # Note1: This routine should almost never need to be changed.  It is
3206         # for avoiding syntax problems rather than for formatting.
3207
3208         # Note2: The -mangle option causes large numbers of calls to this
3209         # routine and therefore is a good test. So if a change is made, be sure
3210         # to use nytprof to profile with both old and reviesed coding using the
3211         # -mangle option and check differences.
3212
3213         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3214
3215         # This is potentially a very slow routine but the following quick
3216         # filters typically catch and handle over 90% of the calls.
3217
3218         # Filter 1: usually no space required after common types ; , [ ] { } ( )
3219         return
3220           if ( $essential_whitespace_filter_l1{$typel}
3221             && !$essential_whitespace_filter_r1{$typer} );
3222
3223         # Filter 2: usually no space before common types ; ,
3224         return
3225           if ( $essential_whitespace_filter_r2{$typer}
3226             && !$essential_whitespace_filter_l2{$typel} );
3227
3228         # Filter 3: Handle side comments: a space is only essential if the left
3229         # token ends in '$' For example, we do not want to create $#foo below:
3230
3231         #   sub t086
3232         #       ( #foo)))
3233         #       $ #foo)))
3234         #       a #foo)))
3235         #       ) #foo)))
3236         #       { ... }
3237
3238         # Also, I prefer not to put a ? and # together because ? used to be
3239         # a pattern delmiter and spacing was used if guessing was needed.
3240
3241         if ( $typer eq '#' ) {
3242
3243             return 1
3244               if ( $tokenl
3245                 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3246             return;
3247         }
3248
3249         my $tokenr_is_bareword   = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3250         my $tokenr_is_open_paren = $tokenr eq '(';
3251         my $token_joined         = $tokenl . $tokenr;
3252         my $tokenl_is_dash       = $tokenl eq '-';
3253
3254         my $result =
3255
3256           # never combine two bare words or numbers
3257           # examples:  and ::ok(1)
3258           #            return ::spw(...)
3259           #            for bla::bla:: abc
3260           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3261           #            $input eq"quit" to make $inputeq"quit"
3262           #            my $size=-s::SINK if $file;  <==OK but we won't do it
3263           # don't join something like: for bla::bla:: abc
3264           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3265           (      ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3266               && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3267
3268           # do not combine a number with a concatenation dot
3269           # example: pom.caputo:
3270           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3271           || $typel eq 'n' && $tokenr eq '.'
3272           || $typer eq 'n' && $tokenl eq '.'
3273
3274           # cases of a space before a bareword...
3275           || (
3276             $tokenr_is_bareword && (
3277
3278                 # do not join a minus with a bare word, because you might form
3279                 # a file test operator.  Example from Complex.pm:
3280                 # if (CORE::abs($z - i) < $eps);
3281                 # "z-i" would be taken as a file test.
3282                 $tokenl_is_dash && length($tokenr) == 1
3283
3284                 # and something like this could become ambiguous without space
3285                 # after the '-':
3286                 #   use constant III=>1;
3287                 #   $a = $b - III;
3288                 # and even this:
3289                 #   $a = - III;
3290                 || $tokenl_is_dash && $typer =~ /^[wC]$/
3291
3292                 # keep space between types Q & and a bareword
3293                 || $is_type_with_space_before_bareword{$typel}
3294
3295                 # +-: binary plus and minus before a bareword could get
3296                 # converted into unary plus and minus on next pass through the
3297                 # tokenizer. This can lead to blinkers: cases b660 b670 b780
3298                 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3299                 # follows an operator
3300                 || ( ( $typel eq '+' || $typel eq '-' )
3301                     && $typell !~ /^[niC\)\}\]R]$/ )
3302
3303                 # keep a space between a token ending in '$' and any word;
3304                 # this caused trouble:  "die @$ if $@"
3305                 ##|| $typel eq 'i' && $tokenl =~ /\$$/
3306                 || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3307
3308                 # don't combine $$ or $# with any alphanumeric
3309                 # (testfile mangle.t with --mangle)
3310                 ##|| $tokenl =~ /^\$[\$\#]$/
3311                 || $tokenl eq '$$'
3312                 || $tokenl eq '$#'
3313
3314             )
3315           )    ## end $tokenr_is_bareword
3316
3317           # OLD, not used
3318           # '= -' should not become =- or you will get a warning
3319           # about reversed -=
3320           # || ($tokenr eq '-')
3321
3322           # do not join a bare word with a minus, like between 'Send' and
3323           # '-recipients' here <<snippets/space3.in>>
3324           #   my $msg = new Fax::Send
3325           #     -recipients => $to,
3326           #     -data => $data;
3327           # This is the safest thing to do. If we had the token to the right of
3328           # the minus we could do a better check.
3329           #
3330           # And do not combine a bareword and a quote, like this:
3331           #    oops "Your login, $Bad_Login, is not valid";
3332           # It can cause a syntax error if oops is a sub
3333           || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3334
3335           # perl is very fussy about spaces before <<
3336           || substr( $tokenr, 0, 2 ) eq '<<'
3337           ##|| $tokenr =~ /^\<\</
3338
3339           # avoid combining tokens to create new meanings. Example:
3340           #     $a+ +$b must not become $a++$b
3341           || ( $is_digraph{$token_joined} )
3342           || $is_trigraph{$token_joined}
3343
3344           # another example: do not combine these two &'s:
3345           #     allow_options & &OPT_EXECCGI
3346           || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3347
3348           # retain any space after possible filehandle
3349           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3350           || $typel eq 'Z'
3351
3352           # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3353           # space after type Y. Otherwise, it will get parsed as type 'Z' later
3354           # and any space would have to be added back manually if desired.
3355           || $typel eq 'Y'
3356
3357           # Perl is sensitive to whitespace after the + here:
3358           #  $b = xvals $a + 0.1 * yvals $a;
3359           || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3360
3361           || (
3362             $tokenr_is_open_paren && (
3363
3364                 # keep paren separate in 'use Foo::Bar ()'
3365                 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3366
3367                 # OLD: keep any space between filehandle and paren:
3368                 # file mangle.t with --mangle:
3369                 # NEW: this test is no longer necessary here (moved above)
3370                 ## || $typel eq 'Y'
3371
3372                 # must have space between grep and left paren; "grep(" will fail
3373                 || $is_sort_grep_map{$tokenl}
3374
3375                 # don't stick numbers next to left parens, as in:
3376                 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3377                 || $typel eq 'n'
3378             )
3379           )    ## end $tokenr_is_open_paren
3380
3381           # retain any space after here doc operator ( hereerr.t)
3382           || $typel eq 'h'
3383
3384           # be careful with a space around ++ and --, to avoid ambiguity as to
3385           # which token it applies
3386           ##|| $typer =~ /^(pp|mm)$/     && $tokenl !~ /^[\;\{\(\[]/
3387           || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3388           || ( $typel eq '++' || $typel eq '--' )
3389           && $tokenr !~ /^[\;\}\)\]]/
3390           ##|| $typel =~ /^(\+\+|\-\-)$/ && $tokenr !~ /^[\;\}\)\]]/
3391
3392           # need space after foreach my; for example, this will fail in
3393           # older versions of Perl:
3394           # foreach my$ft(@filetypes)...
3395           || (
3396             $tokenl eq 'my'
3397
3398             && substr( $tokenr, 0, 1 ) eq '$'
3399             ##&& $tokenr =~ /^\$/
3400
3401             #  /^(for|foreach)$/
3402             && $is_for_foreach{$tokenll}
3403           )
3404
3405           # Keep space after like $^ if needed to avoid forming a different
3406           # special variable (issue c068). For example:
3407           #       my $aa = $^ ? "none" : "ok";
3408           || ( $typel eq 'i'
3409             && length($tokenl) == 2
3410             && substr( $tokenl, 1, 1 ) eq '^'
3411             && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3412
3413           # We must be sure that a space between a ? and a quoted string
3414           # remains if the space before the ? remains.  [Loca.pm, lockarea]
3415           # ie,
3416           #    $b=join $comma ? ',' : ':', @_;  # ok
3417           #    $b=join $comma?',' : ':', @_;    # ok!
3418           #    $b=join $comma ?',' : ':', @_;   # error!
3419           # Not really required:
3420           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
3421
3422           # Space stacked labels...
3423           # Not really required: Perl seems to accept non-spaced labels.
3424           ## || $typel eq 'J' && $typer eq 'J'
3425
3426           ;    # the value of this long logic sequence is the result we want
3427         return $result;
3428     }
3429 } ## end closure is_essential_whitespace
3430
3431 {    ## begin closure new_secret_operator_whitespace
3432
3433     my %secret_operators;
3434     my %is_leading_secret_token;
3435
3436     BEGIN {
3437
3438         # token lists for perl secret operators as compiled by Philippe Bruhat
3439         # at: https://metacpan.org/module/perlsecret
3440         %secret_operators = (
3441             'Goatse'             => [qw#= ( ) =#],        #=( )=
3442             'Venus1'             => [qw#0 +#],            # 0+
3443             'Venus2'             => [qw#+ 0#],            # +0
3444             'Enterprise'         => [qw#) x ! !#],        # ()x!!
3445             'Kite1'              => [qw#~ ~ <>#],         # ~~<>
3446             'Kite2'              => [qw#~~ <>#],          # ~~<>
3447             'Winking Fat Comma'  => [ ( ',', '=>' ) ],    # ,=>
3448             'Bang bang         ' => [qw#! !#],            # !!
3449         );
3450
3451         # The following operators and constants are not included because they
3452         # are normally kept tight by perltidy:
3453         # ~~ <~>
3454         #
3455
3456         # Make a lookup table indexed by the first token of each operator:
3457         # first token => [list, list, ...]
3458         foreach my $value ( values(%secret_operators) ) {
3459             my $tok = $value->[0];
3460             push @{ $is_leading_secret_token{$tok} }, $value;
3461         }
3462     }
3463
3464     sub new_secret_operator_whitespace {
3465
3466         my ( $rlong_array, $rwhitespace_flags ) = @_;
3467
3468         # Loop over all tokens in this line
3469         my ( $token, $type );
3470         my $jmax = @{$rlong_array} - 1;
3471         foreach my $j ( 0 .. $jmax ) {
3472
3473             $token = $rlong_array->[$j]->[_TOKEN_];
3474             $type  = $rlong_array->[$j]->[_TYPE_];
3475
3476             # Skip unless this token might start a secret operator
3477             next if ( $type eq 'b' );
3478             next unless ( $is_leading_secret_token{$token} );
3479
3480             #      Loop over all secret operators with this leading token
3481             foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
3482                 my $jend = $j - 1;
3483                 foreach my $tok ( @{$rpattern} ) {
3484                     $jend++;
3485                     $jend++
3486
3487                       if ( $jend <= $jmax
3488                         && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
3489                     if (   $jend > $jmax
3490                         || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
3491                     {
3492                         $jend = undef;
3493                         last;
3494                     }
3495                 }
3496
3497                 if ($jend) {
3498
3499                     # set flags to prevent spaces within this operator
3500                     foreach my $jj ( $j + 1 .. $jend ) {
3501                         $rwhitespace_flags->[$jj] = WS_NO;
3502                     }
3503                     $j = $jend;
3504                     last;
3505                 }
3506             }    ##      End Loop over all operators
3507         }    ## End loop over all tokens
3508         return;
3509     }    # End sub
3510 } ## end closure new_secret_operator_whitespace
3511
3512 {    ## begin closure set_bond_strengths
3513
3514     # These routines and variables are involved in deciding where to break very
3515     # long lines.
3516
3517     my %is_good_keyword_breakpoint;
3518     my %is_lt_gt_le_ge;
3519     my %is_container_token;
3520
3521     my %binary_bond_strength_nospace;
3522     my %binary_bond_strength;
3523     my %nobreak_lhs;
3524     my %nobreak_rhs;
3525
3526     my @bias_tokens;
3527     my %bias_hash;
3528     my %bias;
3529     my $delta_bias;
3530
3531     sub initialize_bond_strength_hashes {
3532
3533         my @q;
3534         @q = qw(if unless while until for foreach);
3535         @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
3536
3537         @q = qw(lt gt le ge);
3538         @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3539
3540         @q = qw/ ( [ { } ] ) /;
3541         @is_container_token{@q} = (1) x scalar(@q);
3542
3543         # The decision about where to break a line depends upon a "bond
3544         # strength" between tokens.  The LOWER the bond strength, the MORE
3545         # likely a break.  A bond strength may be any value but to simplify
3546         # things there are several pre-defined strength levels:
3547
3548         #    NO_BREAK    => 10000;
3549         #    VERY_STRONG => 100;
3550         #    STRONG      => 2.1;
3551         #    NOMINAL     => 1.1;
3552         #    WEAK        => 0.8;
3553         #    VERY_WEAK   => 0.55;
3554
3555         # The strength values are based on trial-and-error, and need to be
3556         # tweaked occasionally to get desired results.  Some comments:
3557         #
3558         #   1. Only relative strengths are important.  small differences
3559         #      in strengths can make big formatting differences.
3560         #   2. Each indentation level adds one unit of bond strength.
3561         #   3. A value of NO_BREAK makes an unbreakable bond
3562         #   4. A value of VERY_WEAK is the strength of a ','
3563         #   5. Values below NOMINAL are considered ok break points.
3564         #   6. Values above NOMINAL are considered poor break points.
3565         #
3566         # The bond strengths should roughly follow precedence order where
3567         # possible.  If you make changes, please check the results very
3568         # carefully on a variety of scripts.  Testing with the -extrude
3569         # options is particularly helpful in exercising all of the rules.
3570
3571         # Wherever possible, bond strengths are defined in the following
3572         # tables.  There are two main stages to setting bond strengths and
3573         # two types of tables:
3574         #
3575         # The first stage involves looking at each token individually and
3576         # defining left and right bond strengths, according to if we want
3577         # to break to the left or right side, and how good a break point it
3578         # is.  For example tokens like =, ||, && make good break points and
3579         # will have low strengths, but one might want to break on either
3580         # side to put them at the end of one line or beginning of the next.
3581         #
3582         # The second stage involves looking at certain pairs of tokens and
3583         # defining a bond strength for that particular pair.  This second
3584         # stage has priority.
3585
3586         #---------------------------------------------------------------
3587         # Bond Strength BEGIN Section 1.
3588         # Set left and right bond strengths of individual tokens.
3589         #---------------------------------------------------------------
3590
3591         # NOTE: NO_BREAK's set in this section first are HINTS which will
3592         # probably not be honored. Essential NO_BREAKS's should be set in
3593         # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
3594         # of this subroutine.
3595
3596         # Note that we are setting defaults in this section.  The user
3597         # cannot change bond strengths but can cause the left and right
3598         # bond strengths of any token type to be swapped through the use of
3599         # the -wba and -wbb flags. In this way the user can determine if a
3600         # breakpoint token should appear at the end of one line or the
3601         # beginning of the next line.
3602
3603         %right_bond_strength          = ();
3604         %left_bond_strength           = ();
3605         %binary_bond_strength_nospace = ();
3606         %binary_bond_strength         = ();
3607         %nobreak_lhs                  = ();
3608         %nobreak_rhs                  = ();
3609
3610         # The hash keys in this section are token types, plus the text of
3611         # certain keywords like 'or', 'and'.
3612
3613         # no break around possible filehandle
3614         $left_bond_strength{'Z'}  = NO_BREAK;
3615         $right_bond_strength{'Z'} = NO_BREAK;
3616
3617         # never put a bare word on a new line:
3618         # example print (STDERR, "bla"); will fail with break after (
3619         $left_bond_strength{'w'} = NO_BREAK;
3620
3621         # blanks always have infinite strength to force breaks after
3622         # real tokens
3623         $right_bond_strength{'b'} = NO_BREAK;
3624
3625         # try not to break on exponentation
3626         @q                       = qw# ** .. ... <=> #;
3627         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3628         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3629
3630         # The comma-arrow has very low precedence but not a good break point
3631         $left_bond_strength{'=>'}  = NO_BREAK;
3632         $right_bond_strength{'=>'} = NOMINAL;
3633
3634         # ok to break after label
3635         $left_bond_strength{'J'}  = NO_BREAK;
3636         $right_bond_strength{'J'} = NOMINAL;
3637         $left_bond_strength{'j'}  = STRONG;
3638         $right_bond_strength{'j'} = STRONG;
3639         $left_bond_strength{'A'}  = STRONG;
3640         $right_bond_strength{'A'} = STRONG;
3641
3642         $left_bond_strength{'->'}  = STRONG;
3643         $right_bond_strength{'->'} = VERY_STRONG;
3644
3645         $left_bond_strength{'CORE::'}  = NOMINAL;
3646         $right_bond_strength{'CORE::'} = NO_BREAK;
3647
3648         # breaking AFTER modulus operator is ok:
3649         @q = qw< % >;
3650         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3651         @right_bond_strength{@q} =
3652           ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
3653
3654         # Break AFTER math operators * and /
3655         @q                       = qw< * / x  >;
3656         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3657         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3658
3659         # Break AFTER weakest math operators + and -
3660         # Make them weaker than * but a bit stronger than '.'
3661         @q = qw< + - >;
3662         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3663         @right_bond_strength{@q} =
3664           ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
3665
3666         # Define left strength of unary plus and minus (fixes case b511)
3667         $left_bond_strength{p} = $left_bond_strength{'+'};
3668         $left_bond_strength{m} = $left_bond_strength{'-'};
3669
3670         # And make right strength of unary plus and minus very high.
3671         # Fixes cases b670 b790
3672         $right_bond_strength{p} = NO_BREAK;
3673         $right_bond_strength{m} = NO_BREAK;
3674
3675         # breaking BEFORE these is just ok:
3676         @q                       = qw# >> << #;
3677         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3678         @left_bond_strength{@q}  = (NOMINAL) x scalar(@q);
3679
3680         # breaking before the string concatenation operator seems best
3681         # because it can be hard to see at the end of a line
3682         $right_bond_strength{'.'} = STRONG;
3683         $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
3684
3685         @q                       = qw< } ] ) R >;
3686         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3687         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3688
3689         # make these a little weaker than nominal so that they get
3690         # favored for end-of-line characters
3691         @q = qw< != == =~ !~ ~~ !~~ >;
3692         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3693         @right_bond_strength{@q} =
3694           ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
3695
3696         # break AFTER these
3697         @q = qw# < >  | & >= <= #;
3698         @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
3699         @right_bond_strength{@q} =
3700           ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
3701
3702         # breaking either before or after a quote is ok
3703         # but bias for breaking before a quote
3704         $left_bond_strength{'Q'}  = NOMINAL;
3705         $right_bond_strength{'Q'} = NOMINAL + 0.02;
3706         $left_bond_strength{'q'}  = NOMINAL;
3707         $right_bond_strength{'q'} = NOMINAL;
3708
3709         # starting a line with a keyword is usually ok
3710         $left_bond_strength{'k'} = NOMINAL;
3711
3712         # we usually want to bond a keyword strongly to what immediately
3713         # follows, rather than leaving it stranded at the end of a line
3714         $right_bond_strength{'k'} = STRONG;
3715
3716         $left_bond_strength{'G'}  = NOMINAL;
3717         $right_bond_strength{'G'} = STRONG;
3718
3719         # assignment operators
3720         @q = qw(
3721           = **= += *= &= <<= &&=
3722           -= /= |= >>= ||= //=
3723           .= %= ^=
3724           x=
3725         );
3726
3727         # Default is to break AFTER various assignment operators
3728         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3729         @right_bond_strength{@q} =
3730           ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
3731
3732         # Default is to break BEFORE '&&' and '||' and '//'
3733         # set strength of '||' to same as '=' so that chains like
3734         # $a = $b || $c || $d   will break before the first '||'
3735         $right_bond_strength{'||'} = NOMINAL;
3736         $left_bond_strength{'||'}  = $right_bond_strength{'='};
3737
3738         # same thing for '//'
3739         $right_bond_strength{'//'} = NOMINAL;
3740         $left_bond_strength{'//'}  = $right_bond_strength{'='};
3741
3742         # set strength of && a little higher than ||
3743         $right_bond_strength{'&&'} = NOMINAL;
3744         $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
3745
3746         $left_bond_strength{';'}  = VERY_STRONG;
3747         $right_bond_strength{';'} = VERY_WEAK;
3748         $left_bond_strength{'f'}  = VERY_STRONG;
3749
3750         # make right strength of for ';' a little less than '='
3751         # to make for contents break after the ';' to avoid this:
3752         #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
3753         #     $number_of_fields )
3754         # and make it weaker than ',' and 'and' too
3755         $right_bond_strength{'f'} = VERY_WEAK - 0.03;
3756
3757         # The strengths of ?/: should be somewhere between
3758         # an '=' and a quote (NOMINAL),
3759         # make strength of ':' slightly less than '?' to help
3760         # break long chains of ? : after the colons
3761         $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
3762         $right_bond_strength{':'} = NO_BREAK;
3763         $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
3764         $right_bond_strength{'?'} = NO_BREAK;
3765
3766         $left_bond_strength{','}  = VERY_STRONG;
3767         $right_bond_strength{','} = VERY_WEAK;
3768
3769         # remaining digraphs and trigraphs not defined above
3770         @q                       = qw( :: <> ++ --);
3771         @left_bond_strength{@q}  = (WEAK) x scalar(@q);
3772         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3773
3774         # Set bond strengths of certain keywords
3775         # make 'or', 'err', 'and' slightly weaker than a ','
3776         $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
3777         $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
3778         $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
3779         $left_bond_strength{'xor'}  = VERY_WEAK - 0.01;
3780         $right_bond_strength{'and'} = NOMINAL;
3781         $right_bond_strength{'or'}  = NOMINAL;
3782         $right_bond_strength{'err'} = NOMINAL;
3783         $right_bond_strength{'xor'} = NOMINAL;
3784
3785         #---------------------------------------------------------------
3786         # Bond Strength BEGIN Section 2.
3787         # Set binary rules for bond strengths between certain token types.
3788         #---------------------------------------------------------------
3789
3790         #  We have a little problem making tables which apply to the
3791         #  container tokens.  Here is a list of container tokens and
3792         #  their types:
3793         #
3794         #   type    tokens // meaning
3795         #      {    {, [, ( // indent
3796         #      }    }, ], ) // outdent
3797         #      [    [ // left non-structural [ (enclosing an array index)
3798         #      ]    ] // right non-structural square bracket
3799         #      (    ( // left non-structural paren
3800         #      )    ) // right non-structural paren
3801         #      L    { // left non-structural curly brace (enclosing a key)
3802         #      R    } // right non-structural curly brace
3803         #
3804         #  Some rules apply to token types and some to just the token
3805         #  itself.  We solve the problem by combining type and token into a
3806         #  new hash key for the container types.
3807         #
3808         #  If a rule applies to a token 'type' then we need to make rules
3809         #  for each of these 'type.token' combinations:
3810         #  Type    Type.Token
3811         #  {       {{, {[, {(
3812         #  [       [[
3813         #  (       ((
3814         #  L       L{
3815         #  }       }}, }], })
3816         #  ]       ]]
3817         #  )       ))
3818         #  R       R}
3819         #
3820         #  If a rule applies to a token then we need to make rules for
3821         #  these 'type.token' combinations:
3822         #  Token   Type.Token
3823         #  {       {{, L{
3824         #  [       {[, [[
3825         #  (       {(, ((
3826         #  }       }}, R}
3827         #  ]       }], ]]
3828         #  )       }), ))
3829
3830         # allow long lines before final { in an if statement, as in:
3831         #    if (..........
3832         #      ..........)
3833         #    {
3834         #
3835         # Otherwise, the line before the { tends to be too short.
3836
3837         $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
3838         $binary_bond_strength{'(('}{'{{'} = NOMINAL;
3839
3840         # break on something like '} (', but keep this stronger than a ','
3841         # example is in 'howe.pl'
3842         $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3843         $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3844
3845         # keep matrix and hash indices together
3846         # but make them a little below STRONG to allow breaking open
3847         # something like {'some-word'}{'some-very-long-word'} at the }{
3848         # (bracebrk.t)
3849         $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3850         $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3851         $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3852         $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3853
3854         # increase strength to the point where a break in the following
3855         # will be after the opening paren rather than at the arrow:
3856         #    $a->$b($c);
3857         $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
3858
3859     # Note that the following alternative strength would make the break at the
3860     # '->' rather than opening the '('.  Both have advantages and disadvantages.
3861     # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
3862
3863         $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3864         $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3865         $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3866         $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3867         $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3868         $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3869
3870         $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3871         $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3872         $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3873         $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3874
3875         #---------------------------------------------------------------
3876         # Binary NO_BREAK rules
3877         #---------------------------------------------------------------
3878
3879         # use strict requires that bare word and => not be separated
3880         $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
3881         $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
3882
3883         # Never break between a bareword and a following paren because
3884         # perl may give an error.  For example, if a break is placed
3885         # between 'to_filehandle' and its '(' the following line will
3886         # give a syntax error [Carp.pm]: my( $no) =fileno(
3887         # to_filehandle( $in)) ;
3888         $binary_bond_strength{'C'}{'(('} = NO_BREAK;
3889         $binary_bond_strength{'C'}{'{('} = NO_BREAK;
3890         $binary_bond_strength{'U'}{'(('} = NO_BREAK;
3891         $binary_bond_strength{'U'}{'{('} = NO_BREAK;
3892
3893         # use strict requires that bare word within braces not start new
3894         # line
3895         $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
3896
3897         $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
3898
3899         # The following two rules prevent a syntax error caused by breaking up
3900         # a construction like '{-y}'.  The '-' quotes the 'y' and prevents
3901         # it from being taken as a transliteration. We have to keep
3902         # token types 'L m w' together to prevent this error.
3903         $binary_bond_strength{'L{'}{'m'}        = NO_BREAK;
3904         $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
3905
3906         # keep 'bareword-' together, but only if there is no space between
3907         # the word and dash. Do not keep together if there is a space.
3908         # example 'use perl6-alpha'
3909         $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
3910
3911         # use strict requires that bare word and => not be separated
3912         $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
3913
3914         # use strict does not allow separating type info from trailing { }
3915         # testfile is readmail.pl
3916         $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
3917         $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
3918
3919         # As a defensive measure, do not break between a '(' and a
3920         # filehandle.  In some cases, this can cause an error.  For
3921         # example, the following program works:
3922         #    my $msg="hi!\n";
3923         #    print
3924         #    ( STDOUT
3925         #    $msg
3926         #    );
3927         #
3928         # But this program fails:
3929         #    my $msg="hi!\n";
3930         #    print
3931         #    (
3932         #    STDOUT
3933         #    $msg
3934         #    );
3935         #
3936         # This is normally only a problem with the 'extrude' option
3937         $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
3938         $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
3939
3940         # never break between sub name and opening paren
3941         $binary_bond_strength{'w'}{'(('} = NO_BREAK;
3942         $binary_bond_strength{'w'}{'{('} = NO_BREAK;
3943
3944         # keep '}' together with ';'
3945         $binary_bond_strength{'}}'}{';'} = NO_BREAK;
3946
3947         # Breaking before a ++ can cause perl to guess wrong. For
3948         # example the following line will cause a syntax error
3949         # with -extrude if we break between '$i' and '++' [fixstyle2]
3950         #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
3951         $nobreak_lhs{'++'} = NO_BREAK;
3952
3953         # Do not break before a possible file handle
3954         $nobreak_lhs{'Z'} = NO_BREAK;
3955
3956         # use strict hates bare words on any new line.  For
3957         # example, a break before the underscore here provokes the
3958         # wrath of use strict:
3959         # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
3960         $nobreak_rhs{'F'}      = NO_BREAK;
3961         $nobreak_rhs{'CORE::'} = NO_BREAK;
3962
3963         # To prevent the tokenizer from switching between types 'w' and 'G' we
3964         # need to avoid breaking between type 'G' and the following code block
3965         # brace. Fixes case b929.
3966         $nobreak_rhs{G} = NO_BREAK;
3967
3968         #---------------------------------------------------------------
3969         # Bond Strength BEGIN Section 3.
3970         # Define tables and values for applying a small bias to the above
3971         # values.
3972         #---------------------------------------------------------------
3973         # Adding a small 'bias' to strengths is a simple way to make a line
3974         # break at the first of a sequence of identical terms.  For
3975         # example, to force long string of conditional operators to break
3976         # with each line ending in a ':', we can add a small number to the
3977         # bond strength of each ':' (colon.t)
3978         @bias_tokens = qw( : && || f and or . );       # tokens which get bias
3979         %bias_hash   = map { $_ => 0 } @bias_tokens;
3980         $delta_bias  = 0.0001;    # a very small strength level
3981         return;
3982
3983     } ## end sub initialize_bond_strength_hashes
3984
3985     use constant DEBUG_BOND => 0;
3986
3987     sub set_bond_strengths {
3988
3989         my ($self) = @_;
3990
3991         my $rK_weld_right = $self->[_rK_weld_right_];
3992         my $rK_weld_left  = $self->[_rK_weld_left_];
3993
3994         # patch-its always ok to break at end of line
3995         $nobreak_to_go[$max_index_to_go] = 0;
3996
3997         # we start a new set of bias values for each line
3998         %bias = %bias_hash;
3999
4000         my $code_bias = -.01;    # bias for closing block braces
4001
4002         my $type         = 'b';
4003         my $token        = ' ';
4004         my $token_length = 1;
4005         my $last_type;
4006         my $last_nonblank_type  = $type;
4007         my $last_nonblank_token = $token;
4008         my $list_str            = $left_bond_strength{'?'};
4009
4010         my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4011
4012         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4013             $next_nonblank_type, $next_token, $next_type,
4014             $total_nesting_depth, );
4015
4016         # main loop to compute bond strengths between each pair of tokens
4017         foreach my $i ( 0 .. $max_index_to_go ) {
4018             $last_type = $type;
4019             if ( $type ne 'b' ) {
4020                 $last_nonblank_type  = $type;
4021                 $last_nonblank_token = $token;
4022             }
4023             $type = $types_to_go[$i];
4024
4025             # strength on both sides of a blank is the same
4026             if ( $type eq 'b' && $last_type ne 'b' ) {
4027                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
4028                 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4029                 next;
4030             }
4031
4032             $token               = $tokens_to_go[$i];
4033             $token_length        = $token_lengths_to_go[$i];
4034             $block_type          = $block_type_to_go[$i];
4035             $i_next              = $i + 1;
4036             $next_type           = $types_to_go[$i_next];
4037             $next_token          = $tokens_to_go[$i_next];
4038             $total_nesting_depth = $nesting_depth_to_go[$i_next];
4039             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4040             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
4041             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4042
4043             my $seqno               = $type_sequence_to_go[$i];
4044             my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4045
4046             # We are computing the strength of the bond between the current
4047             # token and the NEXT token.
4048
4049             #---------------------------------------------------------------
4050             # Bond Strength Section 1:
4051             # First Approximation.
4052             # Use minimum of individual left and right tabulated bond
4053             # strengths.
4054             #---------------------------------------------------------------
4055             my $bsr = $right_bond_strength{$type};
4056             my $bsl = $left_bond_strength{$next_nonblank_type};
4057
4058             # define right bond strengths of certain keywords
4059             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
4060                 $bsr = $right_bond_strength{$token};
4061             }
4062             elsif ( $token eq 'ne' or $token eq 'eq' ) {
4063                 $bsr = NOMINAL;
4064             }
4065
4066             # set terminal bond strength to the nominal value
4067             # this will cause good preceding breaks to be retained
4068             if ( $i_next_nonblank > $max_index_to_go ) {
4069                 $bsl = NOMINAL;
4070             }
4071
4072             # define right bond strengths of certain keywords
4073             if ( $next_nonblank_type eq 'k'
4074                 && defined( $left_bond_strength{$next_nonblank_token} ) )
4075             {
4076                 $bsl = $left_bond_strength{$next_nonblank_token};
4077             }
4078             elsif ($next_nonblank_token eq 'ne'
4079                 or $next_nonblank_token eq 'eq' )
4080             {
4081                 $bsl = NOMINAL;
4082             }
4083             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
4084                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
4085             }
4086
4087             # Use the minimum of the left and right strengths.  Note: it might
4088             # seem that we would want to keep a NO_BREAK if either token has
4089             # this value.  This didn't work, for example because in an arrow
4090             # list, it prevents the comma from separating from the following
4091             # bare word (which is probably quoted by its arrow).  So necessary
4092             # NO_BREAK's have to be handled as special cases in the final
4093             # section.
4094             if ( !defined($bsr) ) { $bsr = VERY_STRONG }
4095             if ( !defined($bsl) ) { $bsl = VERY_STRONG }
4096             my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4097             $bond_str_1 = $bond_str if (DEBUG_BOND);
4098
4099             #---------------------------------------------------------------
4100             # Bond Strength Section 2:
4101             # Apply hardwired rules..
4102             #---------------------------------------------------------------
4103
4104             # Patch to put terminal or clauses on a new line: Weaken the bond
4105             # at an || followed by die or similar keyword to make the terminal
4106             # or clause fall on a new line, like this:
4107             #
4108             #   my $class = shift
4109             #     || die "Cannot add broadcast:  No class identifier found";
4110             #
4111             # Otherwise the break will be at the previous '=' since the || and
4112             # = have the same starting strength and the or is biased, like
4113             # this:
4114             #
4115             # my $class =
4116             #   shift || die "Cannot add broadcast:  No class identifier found";
4117             #
4118             # In any case if the user places a break at either the = or the ||
4119             # it should remain there.
4120             if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
4121                 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
4122                     if ( $want_break_before{$token} && $i > 0 ) {
4123                         $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
4124
4125                         # keep bond strength of a token and its following blank
4126                         # the same
4127                         if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4128                             $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
4129                         }
4130                     }
4131                     else {
4132                         $bond_str -= $delta_bias;
4133                     }
4134                 }
4135             }
4136
4137             # good to break after end of code blocks
4138             if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
4139
4140                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4141                 $code_bias += $delta_bias;
4142             }
4143
4144             if ( $type eq 'k' ) {
4145
4146                 # allow certain control keywords to stand out
4147                 if (   $next_nonblank_type eq 'k'
4148                     && $is_last_next_redo_return{$token} )
4149                 {
4150                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4151                 }
4152
4153                 # Don't break after keyword my.  This is a quick fix for a
4154                 # rare problem with perl. An example is this line from file
4155                 # Container.pm:
4156
4157                 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4158                 # $this->{'question'} ) )
4159
4160                 if ( $token eq 'my' ) {
4161                     $bond_str = NO_BREAK;
4162                 }
4163
4164             }
4165
4166             # good to break before 'if', 'unless', etc
4167             if ( $is_if_brace_follower{$next_nonblank_token} ) {
4168                 $bond_str = VERY_WEAK;
4169             }
4170
4171             if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
4172
4173                 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4174                     $bond_str = $list_str if ( $bond_str > $list_str );
4175                 }
4176
4177                 # keywords like 'unless', 'if', etc, within statements
4178                 # make good breaks
4179                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4180                     $bond_str = VERY_WEAK / 1.05;
4181                 }
4182             }
4183
4184             # try not to break before a comma-arrow
4185             elsif ( $next_nonblank_type eq '=>' ) {
4186                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
4187             }
4188
4189             #---------------------------------------------------------------
4190             # Additional hardwired NOBREAK rules
4191             #---------------------------------------------------------------
4192
4193             # map1.t -- correct for a quirk in perl
4194             if (   $token eq '('
4195                 && $next_nonblank_type eq 'i'
4196                 && $last_nonblank_type eq 'k'
4197                 && $is_sort_map_grep{$last_nonblank_token} )
4198
4199               #     /^(sort|map|grep)$/ )
4200             {
4201                 $bond_str = NO_BREAK;
4202             }
4203
4204             # extrude.t: do not break before paren at:
4205             #    -l pid_filename(
4206             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4207                 $bond_str = NO_BREAK;
4208             }
4209
4210             # OLD COMMENT: In older version of perl, use strict can cause
4211             # problems with breaks before bare words following opening parens.
4212             # For example, this will fail under older versions if a break is
4213             # made between '(' and 'MAIL':
4214
4215             # use strict; open( MAIL, "a long filename or command"); close MAIL;
4216
4217             # NEW COMMENT: Third fix for b1213:
4218             # This option does not seem to be needed any longer, and it can
4219             # cause instabilities.  It can be turned off, but to minimize
4220             # changes to existing formatting it is retained only in the case
4221             # where the previous token was 'open' and there was no line break.
4222             # Even this could eventually be removed if it causes instability.
4223             if ( $type eq '{' ) {
4224
4225                 if (   $token eq '('
4226                     && $next_nonblank_type eq 'w'
4227                     && $last_nonblank_type eq 'k'
4228                     && $last_nonblank_token eq 'open'
4229                     && !$old_breakpoint_to_go[$i] )
4230                 {
4231                     $bond_str = NO_BREAK;
4232                 }
4233             }
4234
4235             # Do not break between a possible filehandle and a ? or / and do
4236             # not introduce a break after it if there is no blank
4237             # (extrude.t)
4238             elsif ( $type eq 'Z' ) {
4239
4240                 # don't break..
4241                 if (
4242
4243                     # if there is no blank and we do not want one. Examples:
4244                     #    print $x++    # do not break after $x
4245                     #    print HTML"HELLO"   # break ok after HTML
4246                     (
4247                            $next_type ne 'b'
4248                         && defined( $want_left_space{$next_type} )
4249                         && $want_left_space{$next_type} == WS_NO
4250                     )
4251
4252                     # or we might be followed by the start of a quote,
4253                     # and this is not an existing breakpoint; fixes c039.
4254                     || !$old_breakpoint_to_go[$i]
4255                     && substr( $next_nonblank_token, 0, 1 ) eq '/'
4256
4257                   )
4258                 {
4259                     $bond_str = NO_BREAK;
4260                 }
4261             }
4262
4263             # Breaking before a ? before a quote can cause trouble if
4264             # they are not separated by a blank.
4265             # Example: a syntax error occurs if you break before the ? here
4266             #  my$logic=join$all?' && ':' || ',@regexps;
4267             # From: Professional_Perl_Programming_Code/multifind.pl
4268             if ( $next_nonblank_type eq '?' ) {
4269                 $bond_str = NO_BREAK
4270                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4271             }
4272
4273             # Breaking before a . followed by a number
4274             # can cause trouble if there is no intervening space
4275             # Example: a syntax error occurs if you break before the .2 here
4276             #  $str .= pack($endian.2, ensurrogate($ord));
4277             # From: perl58/Unicode.pm
4278             elsif ( $next_nonblank_type eq '.' ) {
4279                 $bond_str = NO_BREAK
4280                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4281             }
4282
4283             # Fix for c039
4284             elsif ( $type eq 'w' ) {
4285                 $bond_str = NO_BREAK
4286                   if ( !$old_breakpoint_to_go[$i]
4287                     && substr( $next_nonblank_token, 0, 1 ) eq '/' );
4288             }
4289
4290             $bond_str_2 = $bond_str if (DEBUG_BOND);
4291
4292             #---------------------------------------------------------------
4293             # End of hardwired rules
4294             #---------------------------------------------------------------
4295
4296             #---------------------------------------------------------------
4297             # Bond Strength Section 3:
4298             # Apply table rules. These have priority over the above
4299             # hardwired rules.
4300             #---------------------------------------------------------------
4301
4302             my $tabulated_bond_str;
4303             my $ltype = $type;
4304             my $rtype = $next_nonblank_type;
4305             if ( $seqno && $is_container_token{$token} ) {
4306                 $ltype = $type . $token;
4307             }
4308
4309             if (   $next_nonblank_seqno
4310                 && $is_container_token{$next_nonblank_token} )
4311             {
4312                 $rtype = $next_nonblank_type . $next_nonblank_token;
4313
4314                 # Alternate Fix #1 for issue b1299.  This version makes the
4315                 # decision as soon as possible.  See Alternate Fix #2 also.
4316                 # Do not separate a bareword identifier from its paren: b1299
4317                 # This is currently needed for stability because if the bareword
4318                 # gets separated from a preceding '->' and following '(' then
4319                 # the tokenizer may switch from type 'i' to type 'w'.  This
4320                 # patch will prevent this by keeping it adjacent to its '('.
4321 ##              if (   $next_nonblank_token eq '('
4322 ##                  && $ltype eq 'i'
4323 ##                  && substr( $token, 0, 1 ) =~ /^\w$/ )
4324 ##              {
4325 ##                  $ltype = 'w';
4326 ##              }
4327             }
4328
4329             # apply binary rules which apply regardless of space between tokens
4330             if ( $binary_bond_strength{$ltype}{$rtype} ) {
4331                 $bond_str           = $binary_bond_strength{$ltype}{$rtype};
4332                 $tabulated_bond_str = $bond_str;
4333             }
4334
4335             # apply binary rules which apply only if no space between tokens
4336             if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4337                 $bond_str           = $binary_bond_strength{$ltype}{$next_type};
4338                 $tabulated_bond_str = $bond_str;
4339             }
4340
4341             if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4342                 $bond_str           = NO_BREAK;
4343                 $tabulated_bond_str = $bond_str;
4344             }
4345
4346             $bond_str_3 = $bond_str if (DEBUG_BOND);
4347
4348             # If the hardwired rules conflict with the tabulated bond
4349             # strength then there is an inconsistency that should be fixed
4350             DEBUG_BOND
4351               && $tabulated_bond_str
4352               && $bond_str_1
4353               && $bond_str_1 != $bond_str_2
4354               && $bond_str_2 != $tabulated_bond_str
4355               && do {
4356                 print STDERR
4357 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4358               };
4359
4360            #-----------------------------------------------------------------
4361            # Bond Strength Section 4:
4362            # Modify strengths of certain tokens which often occur in sequence
4363            # by adding a small bias to each one in turn so that the breaks
4364            # occur from left to right.
4365            #
4366            # Note that we only changing strengths by small amounts here,
4367            # and usually increasing, so we should not be altering any NO_BREAKs.
4368            # Other routines which check for NO_BREAKs will use a tolerance
4369            # of one to avoid any problem.
4370            #-----------------------------------------------------------------
4371
4372             # The bias tables use special keys:
4373             #   $type - if not keyword
4374             #   $token - if keyword, but map some keywords together
4375             my $left_key =
4376               $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4377             my $right_key =
4378                 $next_nonblank_type eq 'k'
4379               ? $next_nonblank_token eq 'err'
4380                   ? 'or'
4381                   : $next_nonblank_token
4382               : $next_nonblank_type;
4383
4384             if ( $type eq ',' ) {
4385
4386                 # add any bias set by sub break_lists at old comma break points
4387                 $bond_str += $bond_strength_to_go[$i];
4388
4389             }
4390
4391             # bias left token
4392             elsif ( defined( $bias{$left_key} ) ) {
4393                 if ( !$want_break_before{$left_key} ) {
4394                     $bias{$left_key} += $delta_bias;
4395                     $bond_str += $bias{$left_key};
4396                 }
4397             }
4398
4399             # bias right token
4400             if ( defined( $bias{$right_key} ) ) {
4401                 if ( $want_break_before{$right_key} ) {
4402
4403                     # for leading '.' align all but 'short' quotes; the idea
4404                     # is to not place something like "\n" on a single line.
4405                     if ( $right_key eq '.' ) {
4406                         unless (
4407                             $last_nonblank_type eq '.'
4408                             && ( $token_length <=
4409                                 $rOpts_short_concatenation_item_length )
4410                             && ( !$is_closing_token{$token} )
4411                           )
4412                         {
4413                             $bias{$right_key} += $delta_bias;
4414                         }
4415                     }
4416                     else {
4417                         $bias{$right_key} += $delta_bias;
4418                     }
4419                     $bond_str += $bias{$right_key};
4420                 }
4421             }
4422
4423             $bond_str_4 = $bond_str if (DEBUG_BOND);
4424
4425             #---------------------------------------------------------------
4426             # Bond Strength Section 5:
4427             # Fifth Approximation.
4428             # Take nesting depth into account by adding the nesting depth
4429             # to the bond strength.
4430             #---------------------------------------------------------------
4431             my $strength;
4432
4433             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
4434                 if ( $total_nesting_depth > 0 ) {
4435                     $strength = $bond_str + $total_nesting_depth;
4436                 }
4437                 else {
4438                     $strength = $bond_str;
4439                 }
4440             }
4441             else {
4442                 $strength = NO_BREAK;
4443
4444                 # For critical code such as lines with here targets we must
4445                 # be absolutely sure that we do not allow a break.  So for
4446                 # these the nobreak flag exceeds 1 as a signal. Otherwise we
4447                 # can run into trouble when small tolerances are added.
4448                 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
4449             }
4450
4451             #---------------------------------------------------------------
4452             # Bond Strength Section 6:
4453             # Sixth Approximation. Welds.
4454             #---------------------------------------------------------------
4455
4456             # Do not allow a break within welds
4457             if ( $total_weld_count && $seqno ) {
4458                 my $KK = $K_to_go[$i];
4459                 if ( $rK_weld_right->{$KK} ) {
4460                     $strength = NO_BREAK;
4461                 }
4462
4463                 # But encourage breaking after opening welded tokens
4464                 elsif ($rK_weld_left->{$KK}
4465                     && $is_opening_token{$token} )
4466                 {
4467                     $strength -= 1;
4468                 }
4469             }
4470
4471             # always break after side comment
4472             if ( $type eq '#' ) { $strength = 0 }
4473
4474             $bond_strength_to_go[$i] = $strength;
4475
4476             # Fix for case c001: be sure NO_BREAK's are enforced by later
4477             # routines, except at a '?' because '?' as quote delimiter is
4478             # deprecated.
4479             if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
4480                 $nobreak_to_go[$i] ||= 1;
4481             }
4482
4483             DEBUG_BOND && do {
4484                 my $str = substr( $token, 0, 15 );
4485                 $str .= ' ' x ( 16 - length($str) );
4486                 print STDOUT
4487 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
4488
4489                 # reset for next pass
4490                 $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
4491             };
4492
4493         } ## end main loop
4494         return;
4495     } ## end sub set_bond_strengths
4496 } ## end closure set_bond_strengths
4497
4498 sub bad_pattern {
4499
4500     # See if a pattern will compile. We have to use a string eval here,
4501     # but it should be safe because the pattern has been constructed
4502     # by this program.
4503     my ($pattern) = @_;
4504     eval "'##'=~/$pattern/";
4505     return $@;
4506 }
4507
4508 {    ## begin closure prepare_cuddled_block_types
4509
4510     my %no_cuddle;
4511
4512     # Add keywords here which really should not be cuddled
4513     BEGIN {
4514         my @q = qw(if unless for foreach while);
4515         @no_cuddle{@q} = (1) x scalar(@q);
4516     }
4517
4518     sub prepare_cuddled_block_types {
4519
4520         # the cuddled-else style, if used, is controlled by a hash that
4521         # we construct here
4522
4523         # Include keywords here which should not be cuddled
4524
4525         my $cuddled_string = "";
4526         if ( $rOpts->{'cuddled-else'} ) {
4527
4528             # set the default
4529             $cuddled_string = 'elsif else continue catch finally'
4530               unless ( $rOpts->{'cuddled-block-list-exclusive'} );
4531
4532             # This is the old equivalent but more complex version
4533             # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
4534
4535             # Add users other blocks to be cuddled
4536             my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
4537             if ($cuddled_block_list) {
4538                 $cuddled_string .= " " . $cuddled_block_list;
4539             }
4540
4541         }
4542
4543         # If we have a cuddled string of the form
4544         #  'try-catch-finally'
4545
4546         # we want to prepare a hash of the form
4547
4548         # $rcuddled_block_types = {
4549         #    'try' => {
4550         #        'catch'   => 1,
4551         #        'finally' => 1
4552         #    },
4553         # };
4554
4555         # use -dcbl to dump this hash
4556
4557         # Multiple such strings are input as a space or comma separated list
4558
4559         # If we get two lists with the same leading type, such as
4560         #   -cbl = "-try-catch-finally  -try-catch-otherwise"
4561         # then they will get merged as follows:
4562         # $rcuddled_block_types = {
4563         #    'try' => {
4564         #        'catch'     => 1,
4565         #        'finally'   => 2,
4566         #        'otherwise' => 1,
4567         #    },
4568         # };
4569         # This will allow either type of chain to be followed.
4570
4571         $cuddled_string =~ s/,/ /g;    # allow space or comma separated lists
4572         my @cuddled_strings = split /\s+/, $cuddled_string;
4573
4574         $rcuddled_block_types = {};
4575
4576         # process each dash-separated string...
4577         my $string_count = 0;
4578         foreach my $string (@cuddled_strings) {
4579             next unless $string;
4580             my @words = split /-+/, $string;    # allow multiple dashes
4581
4582             # we could look for and report possible errors here...
4583             next unless ( @words > 0 );
4584
4585            # allow either '-continue' or *-continue' for arbitrary starting type
4586             my $start = '*';
4587
4588             # a single word without dashes is a secondary block type
4589             if ( @words > 1 ) {
4590                 $start = shift @words;
4591             }
4592
4593             # always make an entry for the leading word. If none follow, this
4594             # will still prevent a wildcard from matching this word.
4595             if ( !defined( $rcuddled_block_types->{$start} ) ) {
4596                 $rcuddled_block_types->{$start} = {};
4597             }
4598
4599             # The count gives the original word order in case we ever want it.
4600             $string_count++;
4601             my $word_count = 0;
4602             foreach my $word (@words) {
4603                 next unless $word;
4604                 if ( $no_cuddle{$word} ) {
4605                     Warn(
4606 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
4607                     );
4608                     next;
4609                 }
4610                 $word_count++;
4611                 $rcuddled_block_types->{$start}->{$word} =
4612                   1;    #"$string_count.$word_count";
4613
4614                 # git#9: Remove this word from the list of desired one-line
4615                 # blocks
4616                 $want_one_line_block{$word} = 0;
4617             }
4618         }
4619         return;
4620     }
4621 }    ## begin closure prepare_cuddled_block_types
4622
4623 sub dump_cuddled_block_list {
4624     my ($fh) = @_;
4625
4626     # ORIGINAL METHOD: Here is the format of the cuddled block type hash
4627     # which controls this routine
4628     #    my $rcuddled_block_types = {
4629     #        'if' => {
4630     #            'else'  => 1,
4631     #            'elsif' => 1
4632     #        },
4633     #        'try' => {
4634     #            'catch'   => 1,
4635     #            'finally' => 1
4636     #        },
4637     #    };
4638
4639     # SIMPLFIED METHOD: the simplified method uses a wildcard for
4640     # the starting block type and puts all cuddled blocks together:
4641     #    my $rcuddled_block_types = {
4642     #        '*' => {
4643     #            'else'  => 1,
4644     #            'elsif' => 1
4645     #            'catch'   => 1,
4646     #            'finally' => 1
4647     #        },
4648     #    };
4649
4650     # Both methods work, but the simplified method has proven to be adequate and
4651     # easier to manage.
4652
4653     my $cuddled_string = $rOpts->{'cuddled-block-list'};
4654     $cuddled_string = '' unless $cuddled_string;
4655
4656     my $flags = "";
4657     $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
4658     $flags .= " -cbl='$cuddled_string'";
4659
4660     unless ( $rOpts->{'cuddled-else'} ) {
4661         $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
4662     }
4663
4664     $fh->print(<<EOM);
4665 ------------------------------------------------------------------------
4666 Hash of cuddled block types prepared for a run with these parameters:
4667   $flags
4668 ------------------------------------------------------------------------
4669 EOM
4670
4671     use Data::Dumper;
4672     $fh->print( Dumper($rcuddled_block_types) );
4673
4674     $fh->print(<<EOM);
4675 ------------------------------------------------------------------------
4676 EOM
4677     return;
4678 }
4679
4680 sub make_static_block_comment_pattern {
4681
4682     # create the pattern used to identify static block comments
4683     $static_block_comment_pattern = '^\s*##';
4684
4685     # allow the user to change it
4686     if ( $rOpts->{'static-block-comment-prefix'} ) {
4687         my $prefix = $rOpts->{'static-block-comment-prefix'};
4688         $prefix =~ s/^\s*//;
4689         my $pattern = $prefix;
4690
4691         # user may give leading caret to force matching left comments only
4692         if ( $prefix !~ /^\^#/ ) {
4693             if ( $prefix !~ /^#/ ) {
4694                 Die(
4695 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
4696                 );
4697             }
4698             $pattern = '^\s*' . $prefix;
4699         }
4700         if ( bad_pattern($pattern) ) {
4701             Die(
4702 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
4703             );
4704         }
4705         $static_block_comment_pattern = $pattern;
4706     }
4707     return;
4708 }
4709
4710 sub make_format_skipping_pattern {
4711     my ( $opt_name, $default ) = @_;
4712     my $param = $rOpts->{$opt_name};
4713     unless ($param) { $param = $default }
4714     $param =~ s/^\s*//;
4715     if ( $param !~ /^#/ ) {
4716         Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
4717     }
4718     my $pattern = '^' . $param . '\s';
4719     if ( bad_pattern($pattern) ) {
4720         Die(
4721 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
4722         );
4723     }
4724     return $pattern;
4725 }
4726
4727 sub make_non_indenting_brace_pattern {
4728
4729     # Create the pattern used to identify static side comments.
4730     # Note that we are ending the pattern in a \s. This will allow
4731     # the pattern to be followed by a space and some text, or a newline.
4732     # The pattern is used in sub 'non_indenting_braces'
4733     $non_indenting_brace_pattern = '^#<<<\s';
4734
4735     # allow the user to change it
4736     if ( $rOpts->{'non-indenting-brace-prefix'} ) {
4737         my $prefix = $rOpts->{'non-indenting-brace-prefix'};
4738         $prefix =~ s/^\s*//;
4739         if ( $prefix !~ /^#/ ) {
4740             Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
4741         }
4742         my $pattern = '^' . $prefix . '\s';
4743         if ( bad_pattern($pattern) ) {
4744             Die(
4745 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
4746             );
4747         }
4748         $non_indenting_brace_pattern = $pattern;
4749     }
4750     return;
4751 }
4752
4753 sub make_closing_side_comment_list_pattern {
4754
4755     # turn any input list into a regex for recognizing selected block types
4756     $closing_side_comment_list_pattern = '^\w+';
4757     if ( defined( $rOpts->{'closing-side-comment-list'} )
4758         && $rOpts->{'closing-side-comment-list'} )
4759     {
4760         $closing_side_comment_list_pattern =
4761           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
4762     }
4763     return;
4764 }
4765
4766 sub make_sub_matching_pattern {
4767
4768     # Patterns for standardizing matches to block types for regular subs and
4769     # anonymous subs. Examples
4770     #  'sub process' is a named sub
4771     #  'sub ::m' is a named sub
4772     #  'sub' is an anonymous sub
4773     #  'sub:' is a label, not a sub
4774     #  'sub :' is a label, not a sub   ( block type will be <sub:> )
4775     #   sub'_ is a named sub           ( block type will be <sub '_> )
4776     #  'substr' is a keyword
4777     # So note that named subs always have a space after 'sub'
4778     $SUB_PATTERN  = '^sub\s';    # match normal sub
4779     $ASUB_PATTERN = '^sub$';     # match anonymous sub
4780
4781     # Note (see also RT #133130): These patterns are used by
4782     # sub make_block_pattern, which is used for making most patterns.
4783     # So this sub needs to be called before other pattern-making routines.
4784
4785     if ( $rOpts->{'sub-alias-list'} ) {
4786
4787         # Note that any 'sub-alias-list' has been preprocessed to
4788         # be a trimmed, space-separated list which includes 'sub'
4789         # for example, it might be 'sub method fun'
4790         my $sub_alias_list = $rOpts->{'sub-alias-list'};
4791         $sub_alias_list =~ s/\s+/\|/g;
4792         $SUB_PATTERN    =~ s/sub/\($sub_alias_list\)/;
4793         $ASUB_PATTERN   =~ s/sub/\($sub_alias_list\)/;
4794     }
4795     return;
4796 }
4797
4798 sub make_bl_pattern {
4799
4800     # Set defaults lists to retain historical default behavior for -bl:
4801     my $bl_list_string           = '*';
4802     my $bl_exclusion_list_string = 'sort map grep eval asub';
4803
4804     if ( defined( $rOpts->{'brace-left-list'} )
4805         && $rOpts->{'brace-left-list'} )
4806     {
4807         $bl_list_string = $rOpts->{'brace-left-list'};
4808     }
4809     if ( $bl_list_string =~ /\bsub\b/ ) {
4810         $rOpts->{'opening-sub-brace-on-new-line'} ||=
4811           $rOpts->{'opening-brace-on-new-line'};
4812     }
4813     if ( $bl_list_string =~ /\basub\b/ ) {
4814         $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4815           $rOpts->{'opening-brace-on-new-line'};
4816     }
4817
4818     $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
4819
4820     # for -bl, a list with '*' turns on -sbl and -asbl
4821     if ( $bl_pattern =~ /\.\*/ ) {
4822         $rOpts->{'opening-sub-brace-on-new-line'} ||=
4823           $rOpts->{'opening-brace-on-new-line'};
4824         $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4825           $rOpts->{'opening-anonymous-brace-on-new-line'};
4826     }
4827
4828     if ( defined( $rOpts->{'brace-left-exclusion-list'} )
4829         && $rOpts->{'brace-left-exclusion-list'} )
4830     {
4831         $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
4832         if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
4833             $rOpts->{'opening-sub-brace-on-new-line'} = 0;
4834         }
4835         if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
4836             $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
4837         }
4838     }
4839
4840     $bl_exclusion_pattern =
4841       make_block_pattern( '-blxl', $bl_exclusion_list_string );
4842     return;
4843 }
4844
4845 sub make_bli_pattern {
4846
4847     # default list of block types for which -bli would apply
4848     my $bli_list_string = 'if else elsif unless while for foreach do : sub';
4849     my $bli_exclusion_list_string = ' ';
4850
4851     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
4852         && $rOpts->{'brace-left-and-indent-list'} )
4853     {
4854         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
4855     }
4856
4857     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
4858
4859     if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
4860         && $rOpts->{'brace-left-and-indent-exclusion-list'} )
4861     {
4862         $bli_exclusion_list_string =
4863           $rOpts->{'brace-left-and-indent-exclusion-list'};
4864     }
4865     $bli_exclusion_pattern =
4866       make_block_pattern( '-blixl', $bli_exclusion_list_string );
4867     return;
4868 }
4869
4870 sub make_keyword_group_list_pattern {
4871
4872     # turn any input list into a regex for recognizing selected block types.
4873     # Here are the defaults:
4874     $keyword_group_list_pattern         = '^(our|local|my|use|require|)$';
4875     $keyword_group_list_comment_pattern = '';
4876     if ( defined( $rOpts->{'keyword-group-blanks-list'} )
4877         && $rOpts->{'keyword-group-blanks-list'} )
4878     {
4879         my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
4880         my @keyword_list;
4881         my @comment_list;
4882         foreach my $word (@words) {
4883             if ( $word =~ /^(BC|SBC)$/ ) {
4884                 push @comment_list, $word;
4885                 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
4886             }
4887             else {
4888                 push @keyword_list, $word;
4889             }
4890         }
4891         $keyword_group_list_pattern =
4892           make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
4893         $keyword_group_list_comment_pattern =
4894           make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
4895     }
4896     return;
4897 }
4898
4899 sub make_block_brace_vertical_tightness_pattern {
4900
4901     # turn any input list into a regex for recognizing selected block types
4902     $block_brace_vertical_tightness_pattern =
4903       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4904     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
4905         && $rOpts->{'block-brace-vertical-tightness-list'} )
4906     {
4907         $block_brace_vertical_tightness_pattern =
4908           make_block_pattern( '-bbvtl',
4909             $rOpts->{'block-brace-vertical-tightness-list'} );
4910     }
4911     return;
4912 }
4913
4914 sub make_blank_line_pattern {
4915
4916     $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
4917     my $key = 'blank-lines-before-closing-block-list';
4918     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4919         $blank_lines_before_closing_block_pattern =
4920           make_block_pattern( '-blbcl', $rOpts->{$key} );
4921     }
4922
4923     $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
4924     $key = 'blank-lines-after-opening-block-list';
4925     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4926         $blank_lines_after_opening_block_pattern =
4927           make_block_pattern( '-blaol', $rOpts->{$key} );
4928     }
4929     return;
4930 }
4931
4932 sub make_block_pattern {
4933
4934     #  given a string of block-type keywords, return a regex to match them
4935     #  The only tricky part is that labels are indicated with a single ':'
4936     #  and the 'sub' token text may have additional text after it (name of
4937     #  sub).
4938     #
4939     #  Example:
4940     #
4941     #   input string: "if else elsif unless while for foreach do : sub";
4942     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4943
4944     #  Minor Update:
4945     #
4946     #  To distinguish between anonymous subs and named subs, use 'sub' to
4947     #   indicate a named sub, and 'asub' to indicate an anonymous sub
4948
4949     my ( $abbrev, $string ) = @_;
4950     my @list  = split_words($string);
4951     my @words = ();
4952     my %seen;
4953     for my $i (@list) {
4954         if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
4955         next if $seen{$i};
4956         $seen{$i} = 1;
4957         if ( $i eq 'sub' ) {
4958         }
4959         elsif ( $i eq 'asub' ) {
4960         }
4961         elsif ( $i eq ';' ) {
4962             push @words, ';';
4963         }
4964         elsif ( $i eq '{' ) {
4965             push @words, '\{';
4966         }
4967         elsif ( $i eq ':' ) {
4968             push @words, '\w+:';
4969         }
4970         elsif ( $i =~ /^\w/ ) {
4971             push @words, $i;
4972         }
4973         else {
4974             Warn("unrecognized block type $i after $abbrev, ignoring\n");
4975         }
4976     }
4977
4978     # Fix 2 for c091, prevent the pattern from matching an empty string
4979     # '1 ' is an impossible block name.
4980     if ( !@words ) { push @words, "1 " }
4981
4982     my $pattern      = '(' . join( '|', @words ) . ')$';
4983     my $sub_patterns = "";
4984     if ( $seen{'sub'} ) {
4985         $sub_patterns .= '|' . $SUB_PATTERN;
4986     }
4987     if ( $seen{'asub'} ) {
4988         $sub_patterns .= '|' . $ASUB_PATTERN;
4989     }
4990     if ($sub_patterns) {
4991         $pattern = '(' . $pattern . $sub_patterns . ')';
4992     }
4993     $pattern = '^' . $pattern;
4994     return $pattern;
4995 }
4996
4997 sub make_static_side_comment_pattern {
4998
4999     # create the pattern used to identify static side comments
5000     $static_side_comment_pattern = '^##';
5001
5002     # allow the user to change it
5003     if ( $rOpts->{'static-side-comment-prefix'} ) {
5004         my $prefix = $rOpts->{'static-side-comment-prefix'};
5005         $prefix =~ s/^\s*//;
5006         my $pattern = '^' . $prefix;
5007         if ( bad_pattern($pattern) ) {
5008             Die(
5009 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5010             );
5011         }
5012         $static_side_comment_pattern = $pattern;
5013     }
5014     return;
5015 }
5016
5017 sub make_closing_side_comment_prefix {
5018
5019     # Be sure we have a valid closing side comment prefix
5020     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5021     my $csc_prefix_pattern;
5022     if ( !defined($csc_prefix) ) {
5023         $csc_prefix         = '## end';
5024         $csc_prefix_pattern = '^##\s+end';
5025     }
5026     else {
5027         my $test_csc_prefix = $csc_prefix;
5028         if ( $test_csc_prefix !~ /^#/ ) {
5029             $test_csc_prefix = '#' . $test_csc_prefix;
5030         }
5031
5032         # make a regex to recognize the prefix
5033         my $test_csc_prefix_pattern = $test_csc_prefix;
5034
5035         # escape any special characters
5036         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5037
5038         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5039
5040         # allow exact number of intermediate spaces to vary
5041         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5042
5043         # make sure we have a good pattern
5044         # if we fail this we probably have an error in escaping
5045         # characters.
5046
5047         if ( bad_pattern($test_csc_prefix_pattern) ) {
5048
5049             # shouldn't happen..must have screwed up escaping, above
5050             if (DEVEL_MODE) {
5051                 Fault(<<EOM);
5052 Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5053 EOM
5054             }
5055
5056             # just warn and keep going with defaults
5057             Warn(
5058 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5059             );
5060             Warn("Please consider using a simpler -cscp prefix\n");
5061             Warn("Using default -cscp instead; please check output\n");
5062         }
5063         else {
5064             $csc_prefix         = $test_csc_prefix;
5065             $csc_prefix_pattern = $test_csc_prefix_pattern;
5066         }
5067     }
5068     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5069     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5070     return;
5071 }
5072
5073 ##################################################
5074 # CODE SECTION 4: receive lines from the tokenizer
5075 ##################################################
5076
5077 {    ## begin closure write_line
5078
5079     my $nesting_depth;
5080
5081     # Variables used by sub check_sequence_numbers:
5082     my $last_seqno;
5083     my %saw_opening_seqno;
5084     my %saw_closing_seqno;
5085     my $initial_seqno;
5086
5087     sub initialize_write_line {
5088
5089         $nesting_depth = undef;
5090
5091         $last_seqno        = SEQ_ROOT;
5092         %saw_opening_seqno = ();
5093         %saw_closing_seqno = ();
5094
5095         return;
5096     }
5097
5098     sub check_sequence_numbers {
5099
5100         # Routine for checking sequence numbers.  This only needs to be
5101         # done occasionally in DEVEL_MODE to be sure everything is working
5102         # correctly.
5103         my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5104         my $jmax = @{$rtokens} - 1;
5105         return unless ( $jmax >= 0 );
5106         foreach my $j ( 0 .. $jmax ) {
5107             my $seqno = $rtype_sequence->[$j];
5108             my $token = $rtokens->[$j];
5109             my $type  = $rtoken_type->[$j];
5110             my $err_msg =
5111 "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5112
5113             if ( !$seqno ) {
5114
5115            # Sequence numbers are generated for opening tokens, so every opening
5116            # token should be sequenced.  Closing tokens will be unsequenced
5117            # if they do not have a matching opening token.
5118                 if (   $is_opening_sequence_token{$token}
5119                     && $type ne 'q'
5120                     && $type ne 'Q' )
5121                 {
5122                     Fault(
5123                         <<EOM
5124 $err_msg Unexpected opening token without sequence number
5125 EOM
5126                     );
5127                 }
5128             }
5129             else {
5130
5131                 # Save starting seqno to identify sequence method:
5132                 # New method starts with 2 and has continuous numbering
5133                 # Old method starts with >2 and may have gaps
5134                 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
5135
5136                 if ( $is_opening_sequence_token{$token} ) {
5137
5138                     # New method should have continuous numbering
5139                     if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5140                         Fault(
5141                             <<EOM
5142 $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5143 EOM
5144                         );
5145                     }
5146                     $last_seqno = $seqno;
5147
5148                     # Numbers must be unique
5149                     if ( $saw_opening_seqno{$seqno} ) {
5150                         my $lno = $saw_opening_seqno{$seqno};
5151                         Fault(
5152                             <<EOM
5153 $err_msg Already saw an opening tokens at line $lno with this sequence number
5154 EOM
5155                         );
5156                     }
5157                     $saw_opening_seqno{$seqno} = $input_line_no;
5158                 }
5159
5160                 # only one closing item per seqno
5161                 elsif ( $is_closing_sequence_token{$token} ) {
5162                     if ( $saw_closing_seqno{$seqno} ) {
5163                         my $lno = $saw_closing_seqno{$seqno};
5164                         Fault(
5165                             <<EOM
5166 $err_msg Already saw a closing token with this seqno  at line $lno
5167 EOM
5168                         );
5169                     }
5170                     $saw_closing_seqno{$seqno} = $input_line_no;
5171
5172                     # Every closing seqno must have an opening seqno
5173                     if ( !$saw_opening_seqno{$seqno} ) {
5174                         Fault(
5175                             <<EOM
5176 $err_msg Saw a closing token but no opening token with this seqno
5177 EOM
5178                         );
5179                     }
5180                 }
5181
5182                 # Sequenced items must be opening or closing
5183                 else {
5184                     Fault(
5185                         <<EOM
5186 $err_msg Unexpected token type with a sequence number
5187 EOM
5188                     );
5189                 }
5190             }
5191         }
5192         return;
5193     }
5194
5195     sub write_line {
5196
5197         # This routine receives lines one-by-one from the tokenizer and stores
5198         # them in a format suitable for further processing.  After the last
5199         # line has been sent, the tokenizer will call sub 'finish_formatting'
5200         # to do the actual formatting.
5201
5202         my ( $self, $line_of_tokens_old ) = @_;
5203         my $rLL        = $self->[_rLL_];
5204         my $Klimit     = $self->[_Klimit_];
5205         my $rlines_new = $self->[_rlines_];
5206
5207         my $K_opening_container     = $self->[_K_opening_container_];
5208         my $K_closing_container     = $self->[_K_closing_container_];
5209         my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5210         my $rblock_type_of_seqno    = $self->[_rblock_type_of_seqno_];
5211         my $rSS                     = $self->[_rSS_];
5212         my $Iss_opening             = $self->[_Iss_opening_];
5213         my $Iss_closing             = $self->[_Iss_closing_];
5214
5215         my $Kfirst;
5216         my $line_of_tokens = {};
5217         foreach (
5218             qw(
5219             _curly_brace_depth
5220             _ending_in_quote
5221             _guessed_indentation_level
5222             _line_number
5223             _line_text
5224             _line_type
5225             _paren_depth
5226             _quote_character
5227             _square_bracket_depth
5228             _starting_in_quote
5229             )
5230           )
5231         {
5232             $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
5233         }
5234
5235         # Data needed by Logger
5236         $line_of_tokens->{_level_0}          = 0;
5237         $line_of_tokens->{_ci_level_0}       = 0;
5238         $line_of_tokens->{_nesting_blocks_0} = "";
5239         $line_of_tokens->{_nesting_tokens_0} = "";
5240
5241         # Needed to avoid trimming quotes
5242         $line_of_tokens->{_ended_in_blank_token} = undef;
5243
5244         my $line_type   = $line_of_tokens_old->{_line_type};
5245         my $line_number = $line_of_tokens_old->{_line_number};
5246         my $CODE_type   = "";
5247         my $tee_output;
5248
5249         # Handle line of non-code
5250         if ( $line_type ne 'CODE' ) {
5251             $tee_output ||= $rOpts_tee_pod
5252               && substr( $line_type, 0, 3 ) eq 'POD';
5253         }
5254
5255         # Handle line of code
5256         else {
5257
5258             my $rtokens         = $line_of_tokens_old->{_rtokens};
5259             my $rtoken_type     = $line_of_tokens_old->{_rtoken_type};
5260             my $rblock_type     = $line_of_tokens_old->{_rblock_type};
5261             my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
5262             my $rcontainer_environment =
5263               $line_of_tokens_old->{_rcontainer_environment};
5264             my $rtype_sequence  = $line_of_tokens_old->{_rtype_sequence};
5265             my $rlevels         = $line_of_tokens_old->{_rlevels};
5266             my $rslevels        = $line_of_tokens_old->{_rslevels};
5267             my $rci_levels      = $line_of_tokens_old->{_rci_levels};
5268             my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
5269             my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
5270
5271             my $jmax = @{$rtokens} - 1;
5272             if ( $jmax >= 0 ) {
5273                 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5274
5275                 DEVEL_MODE
5276                   && check_sequence_numbers( $rtokens, $rtoken_type,
5277                     $rtype_sequence, $line_number );
5278
5279                 # Find the starting nesting depth ...
5280                 # It must be the value of variable 'level' of the first token
5281                 # because the nesting depth is used as a token tag in the
5282                 # vertical aligner and is compared to actual levels.
5283                 # So vertical alignment problems will occur with any other
5284                 # starting value.
5285                 if ( !defined($nesting_depth) ) {
5286                     $nesting_depth = $rlevels->[0];
5287                     $nesting_depth = 0 if ( $nesting_depth < 0 );
5288                     $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
5289                 }
5290
5291                 foreach my $j ( 0 .. $jmax ) {
5292
5293                     # Do not clip the 'level' variable yet. We will do this
5294                     # later, in sub 'store_token_to_go'. The reason is that in
5295                     # files with level errors, the logic in 'weld_cuddled_else'
5296                     # uses a stack logic that will give bad welds if we clip
5297                     # levels here.
5298                     ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
5299
5300                     # Handle tokens with sequence numbers ...
5301                     my $seqno = $rtype_sequence->[$j];
5302                     if ($seqno) {
5303                         my $token = $rtokens->[$j];
5304                         my $sign  = 1;
5305                         if ( $is_opening_token{$token} ) {
5306                             $K_opening_container->{$seqno} = @{$rLL};
5307                             $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
5308                             $nesting_depth++;
5309
5310                             # Save a sequenced block type at its opening token.
5311                             # Note that unsequenced block types can occur in
5312                             # unbalanced code with errors but are ignored here.
5313                             if ( $rblock_type->[$j] ) {
5314                                 my $block_type = $rblock_type->[$j];
5315                                 $rblock_type_of_seqno->{$seqno} = $block_type;
5316                                 if ( substr( $block_type, 0, 3 ) eq 'sub'
5317                                     || $rOpts_sub_alias_list )
5318                                 {
5319                                     if ( $block_type =~ /$ASUB_PATTERN/ ) {
5320                                         $self->[_ris_asub_block_]->{$seqno} = 1;
5321                                     }
5322                                     elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5323                                         $self->[_ris_sub_block_]->{$seqno} = 1;
5324                                     }
5325                                 }
5326                             }
5327                         }
5328                         elsif ( $is_closing_token{$token} ) {
5329
5330                             # The opening depth should always be defined, and
5331                             # it should equal $nesting_depth-1.  To protect
5332                             # against unforseen error conditions, however, we
5333                             # will check this and fix things if necessary.  For
5334                             # a test case see issue c055.
5335                             my $opening_depth =
5336                               $rdepth_of_opening_seqno->[$seqno];
5337                             if ( !defined($opening_depth) ) {
5338                                 $opening_depth = $nesting_depth - 1;
5339                                 $opening_depth = 0 if ( $opening_depth < 0 );
5340                                 $rdepth_of_opening_seqno->[$seqno] =
5341                                   $opening_depth;
5342
5343                                 # This is not fatal but should not happen.  The
5344                                 # tokenizer generates sequence numbers
5345                                 # incrementally upon encountering each new
5346                                 # opening token, so every positive sequence
5347                                 # number should correspond to an opening token.
5348                                 if (DEVEL_MODE) {
5349                                     Fault(<<EOM);
5350 No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
5351 EOM
5352                                 }
5353                             }
5354                             $K_closing_container->{$seqno} = @{$rLL};
5355                             $nesting_depth                 = $opening_depth;
5356                             $sign                          = -1;
5357                         }
5358                         elsif ( $token eq '?' ) {
5359                         }
5360                         elsif ( $token eq ':' ) {
5361                             $sign = -1;
5362                         }
5363
5364                         # The only sequenced types output by the tokenizer are
5365                         # the opening & closing containers and the ternary
5366                         # types. So we would only get here if the tokenizer has
5367                         # been changed to mark some other tokens with sequence
5368                         # numbers, or if an error has been introduced in a
5369                         # hash such as %is_opening_container
5370                         else {
5371                             if (DEVEL_MODE) {
5372                                 Fault(<<EOM);
5373 Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
5374 Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
5375 EOM
5376                             }
5377                         }
5378
5379                         if ( $sign > 0 ) {
5380                             $Iss_opening->[$seqno] = @{$rSS};
5381
5382                             # For efficiency, we find the maximum level of
5383                             # opening tokens of any type.  The actual maximum
5384                             # level will be that of their contents which is 1
5385                             # greater.  That will be fixed in sub
5386                             # 'finish_formatting'.
5387                             my $level = $rlevels->[$j];
5388                             if ( $level > $self->[_maximum_level_] ) {
5389                                 $self->[_maximum_level_]         = $level;
5390                                 $self->[_maximum_level_at_line_] = $line_number;
5391                             }
5392                         }
5393                         else { $Iss_closing->[$seqno] = @{$rSS} }
5394                         push @{$rSS}, $sign * $seqno;
5395
5396                     }
5397
5398                     my @tokary;
5399                     @tokary[
5400                       _TOKEN_, _TYPE_,     _TYPE_SEQUENCE_,
5401                       _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
5402                       ]
5403                       = (
5404                         $rtokens->[$j],    $rtoken_type->[$j],
5405                         $seqno,            $rlevels->[$j],
5406                         $rci_levels->[$j], $line_number - 1,
5407                       );
5408                     push @{$rLL}, \@tokary;
5409                 } ## end foreach my $j ( 0 .. $jmax )
5410
5411                 $Klimit = @{$rLL} - 1;
5412
5413                 # Need to remember if we can trim the input line
5414                 $line_of_tokens->{_ended_in_blank_token} =
5415                   $rtoken_type->[$jmax] eq 'b';
5416
5417                 $line_of_tokens->{_level_0}          = $rlevels->[0];
5418                 $line_of_tokens->{_ci_level_0}       = $rci_levels->[0];
5419                 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
5420                 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
5421             } ## end if ( $jmax >= 0 )
5422
5423             $tee_output ||=
5424                  $rOpts_tee_block_comments
5425               && $jmax == 0
5426               && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5427
5428             $tee_output ||=
5429                  $rOpts_tee_side_comments
5430               && defined($Kfirst)
5431               && $Klimit > $Kfirst
5432               && $rLL->[$Klimit]->[_TYPE_] eq '#';
5433
5434         } ## end if ( $line_type eq 'CODE')
5435
5436         # Finish storing line variables
5437         if ($tee_output) {
5438             my $fh_tee    = $self->[_fh_tee_];
5439             my $line_text = $line_of_tokens_old->{_line_text};
5440             $fh_tee->print($line_text) if ($fh_tee);
5441         }
5442
5443         $line_of_tokens->{_rK_range}  = [ $Kfirst, $Klimit ];
5444         $line_of_tokens->{_code_type} = $CODE_type;
5445         $self->[_Klimit_]             = $Klimit;
5446
5447         push @{$rlines_new}, $line_of_tokens;
5448         return;
5449     }
5450 } ## end closure write_line
5451
5452 #############################################
5453 # CODE SECTION 5: Pre-process the entire file
5454 #############################################
5455
5456 sub finish_formatting {
5457
5458     my ( $self, $severe_error ) = @_;
5459
5460     # The file has been tokenized and is ready to be formatted.
5461     # All of the relevant data is stored in $self, ready to go.
5462
5463     # Check the maximum level. If it is extremely large we will give up and
5464     # output the file verbatim.  Note that the actual maximum level is 1
5465     # greater than the saved value, so we fix that here.
5466     $self->[_maximum_level_] += 1;
5467     my $maximum_level       = $self->[_maximum_level_];
5468     my $maximum_table_index = $#maximum_line_length_at_level;
5469     if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
5470         $severe_error ||= 1;
5471         Warn(<<EOM);
5472 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
5473 Something may be wrong; formatting will be skipped.
5474 EOM
5475     }
5476
5477     # output file verbatim if severe error or no formatting requested
5478     if ( $severe_error || $rOpts->{notidy} ) {
5479         $self->dump_verbatim();
5480         $self->wrapup();
5481         return;
5482     }
5483
5484     # Update the 'save_logfile' flag based to include any tokenization errors.
5485     # We can save time by skipping logfile calls if it is not going to be saved.
5486     my $logger_object = $self->[_logger_object_];
5487     if ($logger_object) {
5488         $self->[_save_logfile_] = $logger_object->get_save_logfile();
5489     }
5490
5491     $self->set_CODE_type();
5492
5493     # Verify that the line hash does not have any unknown keys.
5494     $self->check_line_hashes() if (DEVEL_MODE);
5495
5496     # Make a pass through all tokens, adding or deleting any whitespace as
5497     # required.  Also make any other changes, such as adding semicolons.
5498     # All token changes must be made here so that the token data structure
5499     # remains fixed for the rest of this iteration.
5500     $self->respace_tokens();
5501
5502     $self->set_excluded_lp_containers();
5503
5504     $self->find_multiline_qw();
5505
5506     $self->keep_old_line_breaks();
5507
5508     # Implement any welding needed for the -wn or -cb options
5509     $self->weld_containers();
5510
5511     $self->collapsed_lengths()
5512       if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
5513
5514     # Locate small nested blocks which should not be broken
5515     $self->mark_short_nested_blocks();
5516
5517     $self->adjust_indentation_levels();
5518
5519     # Verify that the main token array looks OK.  If this ever causes a fault
5520     # then place similar checks before the sub calls above to localize the
5521     # problem.
5522     $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
5523
5524     # Finishes formatting and write the result to the line sink.
5525     # Eventually this call should just change the 'rlines' data according to the
5526     # new line breaks and then return so that we can do an internal iteration
5527     # before continuing with the next stages of formatting.
5528     $self->process_all_lines();
5529
5530     # A final routine to tie up any loose ends
5531     $self->wrapup();
5532     return;
5533 }
5534
5535 sub set_CODE_type {
5536     my ($self) = @_;
5537
5538     # This routine performs two tasks:
5539
5540     # TASK 1: Examine each line of code and set a flag '$CODE_type' to describe
5541     # any special processing that it requires.
5542
5543     # TASK 2: Delete side comments if requested.
5544
5545     my $rLL                  = $self->[_rLL_];
5546     my $Klimit               = $self->[_Klimit_];
5547     my $rlines               = $self->[_rlines_];
5548     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5549
5550     my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
5551     my $rOpts_format_skipping_end   = $rOpts->{'format-skipping-end'};
5552     my $rOpts_static_block_comment_prefix =
5553       $rOpts->{'static-block-comment-prefix'};
5554
5555     # Remember indexes of lines with side comments
5556     my @ix_side_comments;
5557
5558     my $In_format_skipping_section = 0;
5559     my $Saw_VERSION_in_this_file   = 0;
5560     my $has_side_comment           = 0;
5561     my ( $Kfirst, $Klast );
5562     my $CODE_type;
5563
5564     #------------------------------
5565     # TASK 1: Loop to set CODE_type
5566     #------------------------------
5567
5568     # Possible CODE_types
5569     # 'VB'  = Verbatim - line goes out verbatim (a quote)
5570     # 'FS'  = Format Skipping - line goes out verbatim
5571     # 'BL'  = Blank Line
5572     # 'HSC' = Hanging Side Comment - fix this hanging side comment
5573     # 'SBCX'= Static Block Comment Without Leading Space
5574     # 'SBC' = Static Block Comment
5575     # 'BC'  = Block Comment - an ordinary full line comment
5576     # 'IO'  = Indent Only - line goes out unchanged except for indentation
5577     # 'NIN' = No Internal Newlines - line does not get broken
5578     # 'VER' = VERSION statement
5579     # ''    = ordinary line of code with no restructions
5580
5581     my $ix_line = -1;
5582     foreach my $line_of_tokens ( @{$rlines} ) {
5583         $ix_line++;
5584         my $input_line_no = $line_of_tokens->{_line_number};
5585         my $line_type     = $line_of_tokens->{_line_type};
5586
5587         my $Last_line_had_side_comment = $has_side_comment;
5588         if ($has_side_comment) {
5589             push @ix_side_comments, $ix_line - 1;
5590         }
5591         $has_side_comment = 0;
5592
5593         next unless ( $line_type eq 'CODE' );
5594
5595         my $Klast_prev = $Klast;
5596
5597         my $rK_range = $line_of_tokens->{_rK_range};
5598         ( $Kfirst, $Klast ) = @{$rK_range};
5599
5600         my $last_CODE_type = $CODE_type;
5601         $CODE_type = "";
5602
5603         my $input_line = $line_of_tokens->{_line_text};
5604         my $jmax       = defined($Kfirst) ? $Klast - $Kfirst : -1;
5605
5606         my $is_block_comment = 0;
5607         if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5608             if   ( $jmax == 0 ) { $is_block_comment = 1; }
5609             else                { $has_side_comment = 1 }
5610         }
5611
5612         # Write line verbatim if we are in a formatting skip section
5613         if ($In_format_skipping_section) {
5614
5615             # Note: extra space appended to comment simplifies pattern matching
5616             if (
5617                 $is_block_comment
5618
5619                 # optional fast pre-check
5620                 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
5621                     || $rOpts_format_skipping_end )
5622
5623                 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
5624                 /$format_skipping_pattern_end/
5625               )
5626             {
5627                 $In_format_skipping_section = 0;
5628                 write_logfile_entry(
5629                     "Line $input_line_no: Exiting format-skipping section\n");
5630             }
5631             $CODE_type = 'FS';
5632             goto NEXT;
5633         }
5634
5635         # Check for a continued quote..
5636         if ( $line_of_tokens->{_starting_in_quote} ) {
5637
5638             # A line which is entirely a quote or pattern must go out
5639             # verbatim.  Note: the \n is contained in $input_line.
5640             if ( $jmax <= 0 ) {
5641                 if ( ( $input_line =~ "\t" ) ) {
5642                     my $input_line_number = $line_of_tokens->{_line_number};
5643                     $self->note_embedded_tab($input_line_number);
5644                 }
5645                 $CODE_type = 'VB';
5646                 goto NEXT;
5647             }
5648         }
5649
5650         # See if we are entering a formatting skip section
5651         if (
5652             $is_block_comment
5653
5654             # optional fast pre-check
5655             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
5656                 || $rOpts_format_skipping_begin )
5657
5658             && $rOpts_format_skipping
5659             && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
5660             /$format_skipping_pattern_begin/
5661           )
5662         {
5663             $In_format_skipping_section = 1;
5664             write_logfile_entry(
5665                 "Line $input_line_no: Entering format-skipping section\n");
5666             $CODE_type = 'FS';
5667             goto NEXT;
5668         }
5669
5670         # ignore trailing blank tokens (they will get deleted later)
5671         if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
5672             $jmax--;
5673         }
5674
5675         # blank line..
5676         if ( $jmax < 0 ) {
5677             $CODE_type = 'BL';
5678             goto NEXT;
5679         }
5680
5681         # Handle comments
5682         if ($is_block_comment) {
5683
5684             # see if this is a static block comment (starts with ## by default)
5685             my $is_static_block_comment = 0;
5686             my $no_leading_space        = substr( $input_line, 0, 1 ) eq '#';
5687             if (
5688
5689                 # optional fast pre-check
5690                 (
5691                     substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
5692                     || $rOpts_static_block_comment_prefix
5693                 )
5694
5695                 && $rOpts_static_block_comments
5696                 && $input_line =~ /$static_block_comment_pattern/
5697               )
5698             {
5699                 $is_static_block_comment = 1;
5700             }
5701
5702             # Check for comments which are line directives
5703             # Treat exactly as static block comments without leading space
5704             # reference: perlsyn, near end, section Plain Old Comments (Not!)
5705             # example: '# line 42 "new_filename.plx"'
5706             if (
5707                    $no_leading_space
5708                 && $input_line =~ /^\#   \s*
5709                            line \s+ (\d+)   \s*
5710                            (?:\s("?)([^"]+)\2)? \s*
5711                            $/x
5712               )
5713             {
5714                 $is_static_block_comment = 1;
5715             }
5716
5717             # look for hanging side comment ...
5718             if (
5719                 $Last_line_had_side_comment    # last line had side comment
5720                 && !$no_leading_space          # there is some leading space
5721                 && !
5722                 $is_static_block_comment    # do not make static comment hanging
5723               )
5724             {
5725
5726                 #  continuing an existing HSC chain?
5727                 if ( $last_CODE_type eq 'HSC' ) {
5728                     $has_side_comment = 1;
5729                     $CODE_type        = 'HSC';
5730                     goto NEXT;
5731                 }
5732
5733                 #  starting a new HSC chain?
5734                 elsif (
5735
5736                     $rOpts->{'hanging-side-comments'}    # user is allowing
5737                                                          # hanging side comments
5738                                                          # like this
5739
5740                     && ( defined($Klast_prev) && $Klast_prev > 1 )
5741
5742                     # and the previous side comment was not static (issue c070)
5743                     && !(
5744                            $rOpts->{'static-side-comments'}
5745                         && $rLL->[$Klast_prev]->[_TOKEN_] =~
5746                         /$static_side_comment_pattern/
5747                     )
5748
5749                   )
5750                 {
5751
5752                     # and it is not a closing side comment (issue c070).
5753                     my $K_penult = $Klast_prev - 1;
5754                     $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
5755                     my $follows_csc =
5756                       (      $rLL->[$K_penult]->[_TOKEN_] eq '}'
5757                           && $rLL->[$K_penult]->[_TYPE_] eq '}'
5758                           && $rLL->[$Klast_prev]->[_TOKEN_] =~
5759                           /$closing_side_comment_prefix_pattern/ );
5760
5761                     if ( !$follows_csc ) {
5762                         $has_side_comment = 1;
5763                         $CODE_type        = 'HSC';
5764                         goto NEXT;
5765                     }
5766                 }
5767             }
5768
5769             if ($is_static_block_comment) {
5770                 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
5771                 goto NEXT;
5772             }
5773             elsif ($Last_line_had_side_comment
5774                 && !$rOpts_maximum_consecutive_blank_lines
5775                 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
5776             {
5777                 # Emergency fix to keep a block comment from becoming a hanging
5778                 # side comment.  This fix is for the case that blank lines
5779                 # cannot be inserted.  There is related code in sub
5780                 # 'process_line_of_CODE'
5781                 $CODE_type = 'SBCX';
5782                 goto NEXT;
5783             }
5784             else {
5785                 $CODE_type = 'BC';
5786                 goto NEXT;
5787             }
5788         }
5789
5790         # End of comments. Handle a line of normal code:
5791
5792         if ($rOpts_indent_only) {
5793             $CODE_type = 'IO';
5794             goto NEXT;
5795         }
5796
5797         if ( !$rOpts_add_newlines ) {
5798             $CODE_type = 'NIN';
5799             goto NEXT;
5800         }
5801
5802         #   Patch needed for MakeMaker.  Do not break a statement
5803         #   in which $VERSION may be calculated.  See MakeMaker.pm;
5804         #   this is based on the coding in it.
5805         #   The first line of a file that matches this will be eval'd:
5806         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
5807         #   Examples:
5808         #     *VERSION = \'1.01';
5809         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
5810         #   We will pass such a line straight through without breaking
5811         #   it unless -npvl is used.
5812
5813         #   Patch for problem reported in RT #81866, where files
5814         #   had been flattened into a single line and couldn't be
5815         #   tidied without -npvl.  There are two parts to this patch:
5816         #   First, it is not done for a really long line (80 tokens for now).
5817         #   Second, we will only allow up to one semicolon
5818         #   before the VERSION.  We need to allow at least one semicolon
5819         #   for statements like this:
5820         #      require Exporter;  our $VERSION = $Exporter::VERSION;
5821         #   where both statements must be on a single line for MakeMaker
5822
5823         my $is_VERSION_statement = 0;
5824         if (  !$Saw_VERSION_in_this_file
5825             && $jmax < 80
5826             && $input_line =~
5827             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
5828         {
5829             $Saw_VERSION_in_this_file = 1;
5830             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
5831
5832             # This code type has lower priority than others
5833             $CODE_type = 'VER';
5834             goto NEXT;
5835         }
5836
5837       NEXT:
5838         $line_of_tokens->{_code_type} = $CODE_type;
5839     }
5840
5841     if ($has_side_comment) {
5842         push @ix_side_comments, $ix_line;
5843     }
5844
5845     return
5846       if ( !$rOpts_delete_side_comments
5847         && !$rOpts_delete_closing_side_comments );
5848
5849     #-------------------------------------
5850     # TASK 2: Loop to delete side comments
5851     #-------------------------------------
5852
5853     # Handle any requested side comment deletions. It is easier to get
5854     # this done here rather than farther down the pipeline because IO
5855     # lines take a different route, and because lines with deleted HSC
5856     # become BL lines.  We have already handled any tee requests in sub
5857     # getline, so it is safe to delete side comments now.
5858
5859     # Also, we can get this done efficiently here.
5860
5861     foreach my $ix (@ix_side_comments) {
5862         my $line_of_tokens = $rlines->[$ix];
5863         my $line_type      = $line_of_tokens->{_line_type};
5864
5865         # This fault shouldn't happen because we only saved CODE lines with
5866         # side comments in the TASK 1 loop above.
5867         if ( $line_type ne 'CODE' ) {
5868             if (DEVEL_MODE) {
5869                 Fault(<<EOM);
5870 Hit unexpected line_type = '$line_type' while deleting side comments, should be 'CODE'
5871 EOM
5872             }
5873             next;
5874         }
5875
5876         my $CODE_type = $line_of_tokens->{_code_type};
5877         my $rK_range  = $line_of_tokens->{_rK_range};
5878         my ( $Kfirst, $Klast ) = @{$rK_range};
5879         my $delete_side_comment =
5880              $rOpts_delete_side_comments
5881           && defined($Kfirst)
5882           && $rLL->[$Klast]->[_TYPE_] eq '#'
5883           && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
5884           && (!$CODE_type
5885             || $CODE_type eq 'HSC'
5886             || $CODE_type eq 'IO'
5887             || $CODE_type eq 'NIN' );
5888
5889         if (
5890                $rOpts_delete_closing_side_comments
5891             && !$delete_side_comment
5892             && defined($Kfirst)
5893             && $Klast > $Kfirst
5894             && $rLL->[$Klast]->[_TYPE_] eq '#'
5895             && (  !$CODE_type
5896                 || $CODE_type eq 'HSC'
5897                 || $CODE_type eq 'IO'
5898                 || $CODE_type eq 'NIN' )
5899           )
5900         {
5901             my $token  = $rLL->[$Klast]->[_TOKEN_];
5902             my $K_m    = $Klast - 1;
5903             my $type_m = $rLL->[$K_m]->[_TYPE_];
5904             if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
5905             my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
5906             if ($seqno_m) {
5907                 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
5908                 if (   $block_type_m
5909                     && $token        =~ /$closing_side_comment_prefix_pattern/
5910                     && $block_type_m =~ /$closing_side_comment_list_pattern/ )
5911                 {
5912                     $delete_side_comment = 1;
5913                 }
5914             }
5915         } ## end if ( $rOpts_delete_closing_side_comments...)
5916
5917         if ($delete_side_comment) {
5918
5919             # We are actually just changing the side comment to a blank.
5920             # This may produce multiple blanks in a row, but sub respace_tokens
5921             # will check for this and fix it.
5922             $rLL->[$Klast]->[_TYPE_]  = 'b';
5923             $rLL->[$Klast]->[_TOKEN_] = ' ';
5924
5925             # The -io option outputs the line text, so we have to update
5926             # the line text so that the comment does not reappear.
5927             if ( $CODE_type eq 'IO' ) {
5928                 my $line = "";
5929                 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
5930                     $line .= $rLL->[$KK]->[_TOKEN_];
5931                 }
5932                 $line =~ s/\s+$//;
5933                 $line_of_tokens->{_line_text} = $line . "\n";
5934             }
5935
5936             # If we delete a hanging side comment the line becomes blank.
5937             if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
5938         }
5939     }
5940
5941     return;
5942 }
5943
5944 sub dump_verbatim {
5945     my $self   = shift;
5946     my $rlines = $self->[_rlines_];
5947     foreach my $line ( @{$rlines} ) {
5948         my $input_line = $line->{_line_text};
5949         $self->write_unindented_line($input_line);
5950     }
5951     return;
5952 }
5953
5954 my %wU;
5955 my %wiq;
5956 my %is_wit;
5957 my %is_sigil;
5958 my %is_nonlist_keyword;
5959 my %is_nonlist_type;
5960 my %is_special_check_type;
5961 my %is_s_y_m_slash;
5962 my %is_unexpected_equals;
5963
5964 BEGIN {
5965
5966     # added 'U' to fix cases b1125 b1126 b1127
5967     my @q = qw(w U);
5968     @{wU}{@q} = (1) x scalar(@q);
5969
5970     @q = qw(w i q Q G C Z);
5971     @{wiq}{@q} = (1) x scalar(@q);
5972
5973     @q = qw(w i t);
5974     @{is_wit}{@q} = (1) x scalar(@q);
5975
5976     @q = qw($ & % * @);
5977     @{is_sigil}{@q} = (1) x scalar(@q);
5978
5979     # Parens following these keywords will not be marked as lists. Note that
5980     # 'for' is not included and is handled separately, by including 'f' in the
5981     # hash %is_counted_type, since it may or may not be a c-style for loop.
5982     @q = qw( if elsif unless and or );
5983     @is_nonlist_keyword{@q} = (1) x scalar(@q);
5984
5985     # Parens following these types will not be marked as lists
5986     @q = qw( && || );
5987     @is_nonlist_type{@q} = (1) x scalar(@q);
5988
5989     @q = qw( s y m / );
5990     @is_s_y_m_slash{@q} = (1) x scalar(@q);
5991
5992     @q = qw( = == != );
5993     @is_unexpected_equals{@q} = (1) x scalar(@q);
5994
5995 }
5996
5997 sub respace_tokens {
5998
5999     my $self = shift;
6000     return if $rOpts->{'indent-only'};
6001
6002     # This routine is called once per file to do as much formatting as possible
6003     # before new line breaks are set.
6004
6005     # This routine makes all necessary and possible changes to the tokenization
6006     # after the initial tokenization of the file. This is a tedious routine,
6007     # but basically it consists of inserting and deleting whitespace between
6008     # nonblank tokens according to the selected parameters. In a few cases
6009     # non-space characters are added, deleted or modified.
6010
6011     # The goal of this routine is to create a new token array which only needs
6012     # the definition of new line breaks and padding to complete formatting.  In
6013     # a few cases we have to cheat a little to achieve this goal.  In
6014     # particular, we may not know if a semicolon will be needed, because it
6015     # depends on how the line breaks go.  To handle this, we include the
6016     # semicolon as a 'phantom' which can be displayed as normal or as an empty
6017     # string.
6018
6019     # Method: The old tokens are copied one-by-one, with changes, from the old
6020     # linear storage array $rLL to a new array $rLL_new.
6021
6022     my $rLL             = $self->[_rLL_];
6023     my $Klimit_old      = $self->[_Klimit_];
6024     my $rlines          = $self->[_rlines_];
6025     my $length_function = $self->[_length_function_];
6026     my $is_encoded_data = $self->[_is_encoded_data_];
6027
6028     my $rLL_new = [];    # This is the new array
6029     my $rtoken_vars;
6030     my $Ktoken_vars;                   # the old K value of $rtoken_vars
6031     my ( $Kfirst_old, $Klast_old );    # Range of old line
6032     my $Klast_old_code;                # K of last token if side comment
6033     my $Kmax = @{$rLL} - 1;
6034
6035     my $CODE_type = "";
6036     my $line_type = "";
6037
6038     # Set the whitespace flags, which indicate the token spacing preference.
6039     my $rwhitespace_flags = $self->set_whitespace_flags();
6040
6041     # we will be setting token lengths as we go
6042     my $cumulative_length = 0;
6043
6044     my %seqno_stack;
6045     my %K_old_opening_by_seqno = ();    # Note: old K index
6046     my $depth_next             = 0;
6047     my $depth_next_max         = 0;
6048
6049     # Note that $K_opening_container and $K_closing_container have values
6050     # defined in sub get_line() for the previous K indexes.  They were needed
6051     # in case option 'indent-only' was set, and we didn't get here. We no longer
6052     # need those and will eliminate them now to avoid any possible mixing of
6053     # old and new values.
6054     my $K_opening_container = $self->[_K_opening_container_] = {};
6055     my $K_closing_container = $self->[_K_closing_container_] = {};
6056
6057     my $K_closing_ternary         = $self->[_K_closing_ternary_];
6058     my $K_opening_ternary         = $self->[_K_opening_ternary_];
6059     my $rK_phantom_semicolons     = $self->[_rK_phantom_semicolons_];
6060     my $rchildren_of_seqno        = $self->[_rchildren_of_seqno_];
6061     my $rhas_broken_code_block    = $self->[_rhas_broken_code_block_];
6062     my $rhas_broken_list          = $self->[_rhas_broken_list_];
6063     my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
6064     my $rhas_code_block           = $self->[_rhas_code_block_];
6065     my $rhas_list                 = $self->[_rhas_list_];
6066     my $rhas_ternary              = $self->[_rhas_ternary_];
6067     my $ris_assigned_structure    = $self->[_ris_assigned_structure_];
6068     my $ris_broken_container      = $self->[_ris_broken_container_];
6069     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
6070     my $ris_list_by_seqno         = $self->[_ris_list_by_seqno_];
6071     my $ris_permanently_broken    = $self->[_ris_permanently_broken_];
6072     my $rlec_count_by_seqno       = $self->[_rlec_count_by_seqno_];
6073     my $roverride_cab3            = $self->[_roverride_cab3_];
6074     my $rparent_of_seqno          = $self->[_rparent_of_seqno_];
6075     my $rtype_count_by_seqno      = $self->[_rtype_count_by_seqno_];
6076     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
6077
6078     my $last_nonblank_code_type       = ';';
6079     my $last_nonblank_code_token      = ';';
6080     my $last_nonblank_block_type      = '';
6081     my $last_last_nonblank_code_type  = ';';
6082     my $last_last_nonblank_code_token = ';';
6083
6084     my %K_first_here_doc_by_seqno;
6085
6086     my $set_permanently_broken = sub {
6087         my ($seqno) = @_;
6088         while ( defined($seqno) ) {
6089             $ris_permanently_broken->{$seqno} = 1;
6090             $seqno = $rparent_of_seqno->{$seqno};
6091         }
6092         return;
6093     };
6094     my $store_token = sub {
6095         my ($item) = @_;
6096
6097         # This will be the index of this item in the new array
6098         my $KK_new = @{$rLL_new};
6099
6100         my $type       = $item->[_TYPE_];
6101         my $is_blank   = $type eq 'b';
6102         my $block_type = "";
6103
6104         # Do not output consecutive blanks. This situation should have been
6105         # prevented earlier, but it is worth checking because later routines
6106         # make this assumption.
6107         if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
6108             return;
6109         }
6110
6111         # check for a sequenced item (i.e., container or ?/:)
6112         my $type_sequence = $item->[_TYPE_SEQUENCE_];
6113         my $token         = $item->[_TOKEN_];
6114         if ($type_sequence) {
6115
6116             if ( $is_opening_token{$token} ) {
6117
6118                 $K_opening_container->{$type_sequence} = $KK_new;
6119                 $block_type = $rblock_type_of_seqno->{$type_sequence};
6120
6121                 # Fix for case b1100: Count a line ending in ', [' as having
6122                 # a line-ending comma.  Otherwise, these commas can be hidden
6123                 # with something like --opening-square-bracket-right
6124                 if (   $last_nonblank_code_type eq ','
6125                     && $Ktoken_vars == $Klast_old_code
6126                     && $Ktoken_vars > $Kfirst_old )
6127                 {
6128                     $rlec_count_by_seqno->{$type_sequence}++;
6129                 }
6130
6131                 if (   $last_nonblank_code_type eq '='
6132                     || $last_nonblank_code_type eq '=>' )
6133                 {
6134                     $ris_assigned_structure->{$type_sequence} =
6135                       $last_nonblank_code_type;
6136                 }
6137
6138                 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6139                 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6140                 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
6141                 $rparent_of_seqno->{$type_sequence}     = $seqno_parent;
6142                 $seqno_stack{$depth_next}               = $type_sequence;
6143                 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
6144                 $depth_next++;
6145
6146                 if ( $depth_next > $depth_next_max ) {
6147                     $depth_next_max = $depth_next;
6148                 }
6149             }
6150             elsif ( $is_closing_token{$token} ) {
6151
6152                 $K_closing_container->{$type_sequence} = $KK_new;
6153                 $block_type = $rblock_type_of_seqno->{$type_sequence};
6154
6155                 # Do not include terminal commas in counts
6156                 if (   $last_nonblank_code_type eq ','
6157                     || $last_nonblank_code_type eq '=>' )
6158                 {
6159                     my $seqno = $seqno_stack{ $depth_next - 1 };
6160                     if ($seqno) {
6161                         $rtype_count_by_seqno->{$seqno}
6162                           ->{$last_nonblank_code_type}--;
6163
6164                         if (   $Ktoken_vars == $Kfirst_old
6165                             && $last_nonblank_code_type eq ','
6166                             && $rlec_count_by_seqno->{$seqno} )
6167                         {
6168                             $rlec_count_by_seqno->{$seqno}--;
6169                         }
6170                     }
6171                 }
6172
6173                 # Update the stack...
6174                 $depth_next--;
6175             }
6176             else {
6177
6178                 # For ternary, note parent but do not include as child
6179                 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6180                 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6181                 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
6182
6183                 # These are not yet used but could be useful
6184                 if ( $token eq '?' ) {
6185                     $K_opening_ternary->{$type_sequence} = $KK_new;
6186                 }
6187                 elsif ( $token eq ':' ) {
6188                     $K_closing_ternary->{$type_sequence} = $KK_new;
6189                 }
6190                 else {
6191
6192                     # We really shouldn't arrive here, just being cautious:
6193                     # The only sequenced types output by the tokenizer are the
6194                     # opening & closing containers and the ternary types. Each
6195                     # of those was checked above. So we would only get here
6196                     # if the tokenizer has been changed to mark some other
6197                     # tokens with sequence numbers.
6198                     if (DEVEL_MODE) {
6199                         my $type = $item->[_TYPE_];
6200                         Fault(
6201 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
6202                         );
6203                     }
6204                 }
6205             }
6206         }
6207
6208         # Find the length of this token.  Later it may be adjusted if phantom
6209         # or ignoring side comment lengths.
6210         my $token_length =
6211             $is_encoded_data
6212           ? $length_function->($token)
6213           : length($token);
6214
6215         # handle comments
6216         my $is_comment = $type eq '#';
6217         if ($is_comment) {
6218
6219             # trim comments if necessary
6220             my $ord = ord( substr( $token, -1, 1 ) );
6221             if (
6222                 $ord > 0
6223                 && (   $ord < ORD_PRINTABLE_MIN
6224                     || $ord > ORD_PRINTABLE_MAX )
6225                 && $token =~ s/\s+$//
6226               )
6227             {
6228                 $token_length = $length_function->($token);
6229                 $item->[_TOKEN_] = $token;
6230             }
6231
6232             # Mark length of side comments as just 1 if sc lengths are ignored
6233             if ( $rOpts_ignore_side_comment_lengths
6234                 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
6235             {
6236                 $token_length = 1;
6237             }
6238             my $seqno = $seqno_stack{ $depth_next - 1 };
6239             if ( defined($seqno)
6240                 && !$ris_permanently_broken->{$seqno} )
6241             {
6242                 $set_permanently_broken->($seqno);
6243             }
6244         }
6245
6246         $item->[_TOKEN_LENGTH_] = $token_length;
6247
6248         # and update the cumulative length
6249         $cumulative_length += $token_length;
6250
6251         # Save the length sum to just AFTER this token
6252         $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
6253
6254         if ( !$is_blank && !$is_comment ) {
6255
6256             # Remember the most recent two non-blank, non-comment tokens.
6257             # NOTE: the phantom semicolon code may change the output stack
6258             # without updating these values.  Phantom semicolons are considered
6259             # the same as blanks for now, but future needs might change that.
6260             # See the related note in sub '$add_phantom_semicolon'.
6261             $last_last_nonblank_code_type  = $last_nonblank_code_type;
6262             $last_last_nonblank_code_token = $last_nonblank_code_token;
6263
6264             $last_nonblank_code_type  = $type;
6265             $last_nonblank_code_token = $token;
6266             $last_nonblank_block_type = $block_type;
6267
6268             # count selected types
6269             if ( $is_counted_type{$type} ) {
6270                 my $seqno = $seqno_stack{ $depth_next - 1 };
6271                 if ( defined($seqno) ) {
6272                     $rtype_count_by_seqno->{$seqno}->{$type}++;
6273
6274                     # Count line-ending commas for -bbx
6275                     if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
6276                         $rlec_count_by_seqno->{$seqno}++;
6277                     }
6278
6279                     # Remember index of first here doc target
6280                     if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
6281                         $K_first_here_doc_by_seqno{$seqno} = $KK_new;
6282                     }
6283                 }
6284             }
6285         }
6286
6287         # For reference, here is how to get the parent sequence number.
6288         # This is not used because it is slower than finding it on the fly
6289         # in sub parent_seqno_by_K:
6290
6291         # my $seqno_parent =
6292         #     $type_sequence && $is_opening_token{$token}
6293         #   ? $seqno_stack{ $depth_next - 2 }
6294         #   : $seqno_stack{ $depth_next - 1 };
6295         # my $KK = @{$rLL_new};
6296         # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
6297
6298         # and finally, add this item to the new array
6299         push @{$rLL_new}, $item;
6300         return;
6301     };
6302
6303     my $store_token_and_space = sub {
6304         my ( $item, $want_space ) = @_;
6305
6306         # store a token with preceding space if requested and needed
6307
6308         # First store the space
6309         if (   $want_space
6310             && @{$rLL_new}
6311             && $rLL_new->[-1]->[_TYPE_] ne 'b'
6312             && $rOpts_add_whitespace )
6313         {
6314             my $rcopy = [ @{$item} ];
6315             $rcopy->[_TYPE_]          = 'b';
6316             $rcopy->[_TOKEN_]         = ' ';
6317             $rcopy->[_TYPE_SEQUENCE_] = '';
6318
6319             $rcopy->[_LINE_INDEX_] =
6320               $rLL_new->[-1]->[_LINE_INDEX_];
6321
6322             # Patch 23-Jan-2021 to fix -lp blinkers:
6323             # The level and ci_level of newly created spaces should be the same
6324             # as the previous token.  Otherwise the coding for the -lp option
6325             # can create a blinking state in some rare cases.
6326             $rcopy->[_LEVEL_] =
6327               $rLL_new->[-1]->[_LEVEL_];
6328             $rcopy->[_CI_LEVEL_] =
6329               $rLL_new->[-1]->[_CI_LEVEL_];
6330
6331             $store_token->($rcopy);
6332         }
6333
6334         # then the token
6335         $store_token->($item);
6336         return;
6337     };
6338
6339     my $add_phantom_semicolon = sub {
6340
6341         my ($KK) = @_;
6342
6343         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6344         return unless ( defined($Kp) );
6345
6346         # we are only adding semicolons for certain block types
6347         my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
6348         return unless ($type_sequence);
6349         my $block_type = $rblock_type_of_seqno->{$type_sequence};
6350         return unless ($block_type);
6351         return
6352           unless ( $ok_to_add_semicolon_for_block_type{$block_type}
6353             || $block_type =~ /^(sub|package)/
6354             || $block_type =~ /^\w+\:$/ );
6355
6356         my $type_p          = $rLL_new->[$Kp]->[_TYPE_];
6357         my $token_p         = $rLL_new->[$Kp]->[_TOKEN_];
6358         my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6359
6360         # Do not add a semicolon if...
6361         return
6362           if (
6363
6364             # it would follow a comment (and be isolated)
6365             $type_p eq '#'
6366
6367             # it follows a code block ( because they are not always wanted
6368             # there and may add clutter)
6369             || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
6370
6371             # it would follow a label
6372             || $type_p eq 'J'
6373
6374             # it would be inside a 'format' statement (and cause syntax error)
6375             || (   $type_p eq 'k'
6376                 && $token_p =~ /format/ )
6377
6378           );
6379
6380         # Do not add a semicolon if it would impede a weld with an immediately
6381         # following closing token...like this
6382         #   { ( some code ) }
6383         #                  ^--No semicolon can go here
6384
6385         # look at the previous token... note use of the _NEW rLL array here,
6386         # but sequence numbers are invariant.
6387         my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6388
6389         # If it is also a CLOSING token we have to look closer...
6390         if (
6391                $seqno_inner
6392             && $is_closing_token{$token_p}
6393
6394             # we only need to look if there is just one inner container..
6395             && defined( $rchildren_of_seqno->{$type_sequence} )
6396             && @{ $rchildren_of_seqno->{$type_sequence} } == 1
6397           )
6398         {
6399
6400             # Go back and see if the corresponding two OPENING tokens are also
6401             # together.  Note that we are using the OLD K indexing here:
6402             my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
6403             if ( defined($K_outer_opening) ) {
6404                 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
6405                 if ( defined($K_nxt) ) {
6406                     my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
6407
6408                     # Is the next token after the outer opening the same as
6409                     # our inner closing (i.e. same sequence number)?
6410                     # If so, do not insert a semicolon here.
6411                     return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
6412                 }
6413             }
6414         }
6415
6416         # We will insert an empty semicolon here as a placeholder.  Later, if
6417         # it becomes the last token on a line, we will bring it to life.  The
6418         # advantage of doing this is that (1) we just have to check line
6419         # endings, and (2) the phantom semicolon has zero width and therefore
6420         # won't cause needless breaks of one-line blocks.
6421         my $Ktop = -1;
6422         if (   $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
6423             && $want_left_space{';'} == WS_NO )
6424         {
6425
6426             # convert the blank into a semicolon..
6427             # be careful: we are working on the new stack top
6428             # on a token which has been stored.
6429             my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
6430
6431             # Convert the existing blank to:
6432             #   a phantom semicolon for one_line_block option = 0 or 1
6433             #   a real semicolon    for one_line_block option = 2
6434             my $tok     = '';
6435             my $len_tok = 0;
6436             if ( $rOpts_one_line_block_semicolons == 2 ) {
6437                 $tok     = ';';
6438                 $len_tok = 1;
6439             }
6440
6441             $rLL_new->[$Ktop]->[_TOKEN_]        = $tok;
6442             $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
6443             $rLL_new->[$Ktop]->[_TYPE_]         = ';';
6444
6445             # NOTE: we are changing the output stack without updating variables
6446             # $last_nonblank_code_type, etc. Future needs might require that
6447             # those variables be updated here.  For now, it seems ok to skip
6448             # this.
6449
6450             # Save list of new K indexes of phantom semicolons.
6451             # This will be needed if we want to undo them for iterations in
6452             # future coding.
6453             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6454
6455             # Then store a new blank
6456             $store_token->($rcopy);
6457         }
6458         else {
6459
6460             # Patch for issue c078: keep line indexes in order.  If the top
6461             # token is a space that we are keeping (due to '-wls=';') then
6462             # we have to check that old line indexes stay in order.
6463             # In very rare
6464             # instances in which side comments have been deleted and converted
6465             # into blanks, we may have filtered down multiple blanks into just
6466             # one. In that case the top blank may have a higher line number
6467             # than the previous nonblank token. Although the line indexes of
6468             # blanks are not really significant, we need to keep them in order
6469             # in order to pass error checks.
6470             if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
6471                 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
6472                 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
6473                 if ( $new_top_ix < $old_top_ix ) {
6474                     $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
6475                 }
6476             }
6477
6478             my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
6479             $store_token->($rcopy);
6480             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6481         }
6482         return;
6483     };
6484
6485     my $check_Q = sub {
6486
6487         # Check that a quote looks okay
6488         # This sub works but needs to by sync'd with the log file output
6489         # before it can be used.
6490         my ( $KK, $Kfirst, $line_number ) = @_;
6491         my $token = $rLL->[$KK]->[_TOKEN_];
6492         $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
6493
6494         # The remainder of this routine looks for something like
6495         #        '$var = s/xxx/yyy/;'
6496         # in case it should have been '$var =~ s/xxx/yyy/;'
6497
6498         # Start by looking for a token begining with one of: s y m / tr
6499         return
6500           unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
6501             || substr( $token, 0, 2 ) eq 'tr' );
6502
6503         # ... and preceded by one of: = == !=
6504         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6505         return unless ( defined($Kp) );
6506         my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
6507         return unless ( $is_unexpected_equals{$previous_nonblank_type} );
6508         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
6509
6510         my $previous_nonblank_type_2  = 'b';
6511         my $previous_nonblank_token_2 = "";
6512         my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
6513         if ( defined($Kpp) ) {
6514             $previous_nonblank_type_2  = $rLL_new->[$Kpp]->[_TYPE_];
6515             $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
6516         }
6517
6518         my $next_nonblank_token = "";
6519         my $Kn                  = $KK + 1;
6520         if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
6521         if ( $Kn <= $Kmax ) {
6522             $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
6523         }
6524
6525         my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
6526         my $type_0  = $rLL->[$Kfirst]->[_TYPE_];
6527
6528         if (
6529             ##$token =~ /^(s|tr|y|m|\/)/
6530             ##&& $previous_nonblank_token =~ /^(=|==|!=)$/
6531             1
6532
6533             # preceded by simple scalar
6534             && $previous_nonblank_type_2 eq 'i'
6535             && $previous_nonblank_token_2 =~ /^\$/
6536
6537             # followed by some kind of termination
6538             # (but give complaint if we can not see far enough ahead)
6539             && $next_nonblank_token =~ /^[; \)\}]$/
6540
6541             # scalar is not declared
6542             && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
6543           )
6544         {
6545             my $lno   = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
6546             my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
6547             complain(
6548 "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
6549             );
6550         }
6551         return;
6552     };
6553
6554     #-------------------------------------------
6555     # Main loop to respace all lines of the file
6556     #-------------------------------------------
6557     my $last_K_out;
6558
6559     foreach my $line_of_tokens ( @{$rlines} ) {
6560
6561         my $input_line_number = $line_of_tokens->{_line_number};
6562         my $last_line_type    = $line_type;
6563         $line_type = $line_of_tokens->{_line_type};
6564         next unless ( $line_type eq 'CODE' );
6565         my $last_CODE_type = $CODE_type;
6566         $CODE_type = $line_of_tokens->{_code_type};
6567         my $rK_range = $line_of_tokens->{_rK_range};
6568         my ( $Kfirst, $Klast ) = @{$rK_range};
6569         next unless defined($Kfirst);
6570         ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
6571         $Klast_old_code = $Klast_old;
6572
6573         # Be sure an old K value is defined for sub $store_token
6574         $Ktoken_vars = $Kfirst;
6575
6576         # Check for correct sequence of token indexes...
6577         # An error here means that sub write_line() did not correctly
6578         # package the tokenized lines as it received them.  If we
6579         # get a fault here it has not output a continuous sequence
6580         # of K values.  Or a line of CODE may have been mismarked as
6581         # something else.  There is no good way to continue after such an
6582         # error.
6583         # FIXME: Calling Fault will produce zero output; it would be best to
6584         # find a way to dump the input file.
6585         if ( defined($last_K_out) ) {
6586             if ( $Kfirst != $last_K_out + 1 ) {
6587                 Fault(
6588                     "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
6589                 );
6590             }
6591         }
6592         else {
6593
6594             # The first token should always have been given index 0 by sub
6595             # write_line()
6596             if ( $Kfirst != 0 ) {
6597                 Fault("Program Bug: first K is $Kfirst but should be 0");
6598             }
6599         }
6600         $last_K_out = $Klast;
6601
6602         # Handle special lines of code
6603         if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
6604
6605             # CODE_types are as follows.
6606             # 'BL' = Blank Line
6607             # 'VB' = Verbatim - line goes out verbatim
6608             # 'FS' = Format Skipping - line goes out verbatim, no blanks
6609             # 'IO' = Indent Only - only indentation may be changed
6610             # 'NIN' = No Internal Newlines - line does not get broken
6611             # 'HSC'=Hanging Side Comment - fix this hanging side comment
6612             # 'BC'=Block Comment - an ordinary full line comment
6613             # 'SBC'=Static Block Comment - a block comment which does not get
6614             #      indented
6615             # 'SBCX'=Static Block Comment Without Leading Space
6616             # 'VER'=VERSION statement
6617             # '' or (undefined) - no restructions
6618
6619             # For a hanging side comment we insert an empty quote before
6620             # the comment so that it becomes a normal side comment and
6621             # will be aligned by the vertical aligner
6622             if ( $CODE_type eq 'HSC' ) {
6623
6624                 # Safety Check: This must be a line with one token (a comment)
6625                 my $rtoken_vars = $rLL->[$Kfirst];
6626                 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
6627
6628                     # Note that even if the flag 'noadd-whitespace' is set, we
6629                     # will make an exception here and allow a blank to be
6630                     # inserted to push the comment to the right.  We can think
6631                     # of this as an adjustment of indentation rather than
6632                     # whitespace between tokens. This will also prevent the
6633                     # hanging side comment from getting converted to a block
6634                     # comment if whitespace gets deleted, as for example with
6635                     # the -extrude and -mangle options.
6636                     my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
6637                     $store_token->($rcopy);
6638                     $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
6639                     $store_token->($rcopy);
6640                     $store_token->($rtoken_vars);
6641                     next;
6642                 }
6643                 else {
6644
6645                     # This line was mis-marked by sub scan_comment.  Catch in
6646                     # DEVEL_MODE, otherwise try to repair and keep going.
6647                     Fault(
6648                         "Program bug. A hanging side comment has been mismarked"
6649                     ) if (DEVEL_MODE);
6650
6651                     $CODE_type = "";
6652                     $line_of_tokens->{_code_type} = $CODE_type;
6653                 }
6654             }
6655
6656             if ( $CODE_type eq 'BL' ) {
6657                 my $seqno = $seqno_stack{ $depth_next - 1 };
6658                 if (   defined($seqno)
6659                     && !$ris_permanently_broken->{$seqno}
6660                     && $rOpts_maximum_consecutive_blank_lines )
6661                 {
6662                     $set_permanently_broken->($seqno);
6663                 }
6664             }
6665
6666             # Copy tokens unchanged
6667             foreach my $KK ( $Kfirst .. $Klast ) {
6668                 $Ktoken_vars = $KK;
6669                 $store_token->( $rLL->[$KK] );
6670             }
6671             next;
6672         }
6673
6674         # Handle normal line..
6675
6676         # Define index of last token before any side comment for comma counts
6677         my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
6678         if ( ( $type_end eq '#' || $type_end eq 'b' )
6679             && $Klast_old_code > $Kfirst_old )
6680         {
6681             $Klast_old_code--;
6682             if (   $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
6683                 && $Klast_old_code > $Kfirst_old )
6684             {
6685                 $Klast_old_code--;
6686             }
6687         }
6688
6689         # Insert any essential whitespace between lines
6690         # if last line was normal CODE.
6691         # Patch for rt #125012: use K_previous_code rather than '_nonblank'
6692         # because comments may disappear.
6693         if ( $last_line_type eq 'CODE' ) {
6694             my $type_next  = $rLL->[$Kfirst]->[_TYPE_];
6695             my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
6696             if (
6697                 is_essential_whitespace(
6698                     $last_last_nonblank_code_token,
6699                     $last_last_nonblank_code_type,
6700                     $last_nonblank_code_token,
6701                     $last_nonblank_code_type,
6702                     $token_next,
6703                     $type_next,
6704                 )
6705               )
6706             {
6707
6708                 # Copy this first token as blank, but use previous line number
6709                 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
6710                 $rcopy->[_LINE_INDEX_] =
6711                   $rLL_new->[-1]->[_LINE_INDEX_];
6712
6713                 # The level and ci_level of newly created spaces should be the
6714                 # same as the previous token. Otherwise blinking states can
6715                 # be created if the -lp mode is used. See similar coding in
6716                 # sub 'store_token_and_space'.  Fixes cases b1109 b1110.
6717                 $rcopy->[_LEVEL_] =
6718                   $rLL_new->[-1]->[_LEVEL_];
6719                 $rcopy->[_CI_LEVEL_] =
6720                   $rLL_new->[-1]->[_CI_LEVEL_];
6721
6722                 $store_token->($rcopy);
6723             }
6724         }
6725
6726         #-------------------------------------------------------
6727         # Loop to copy all tokens on this line, with any changes
6728         #-------------------------------------------------------
6729         my $type_sequence;
6730         for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
6731             $Ktoken_vars = $KK;
6732             $rtoken_vars = $rLL->[$KK];
6733             my $token              = $rtoken_vars->[_TOKEN_];
6734             my $type               = $rtoken_vars->[_TYPE_];
6735             my $last_type_sequence = $type_sequence;
6736             $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6737
6738             # Handle a blank space ...
6739             if ( $type eq 'b' ) {
6740
6741                 # Delete it if not wanted by whitespace rules
6742                 # or we are deleting all whitespace
6743                 # Note that whitespace flag is a flag indicating whether a
6744                 # white space BEFORE the token is needed
6745                 next if ( $KK >= $Klast );    # skip terminal blank
6746                 my $Knext = $KK + 1;
6747
6748                 if ($rOpts_freeze_whitespace) {
6749                     $store_token->($rtoken_vars);
6750                     next;
6751                 }
6752
6753                 my $ws = $rwhitespace_flags->[$Knext];
6754                 if (   $ws == -1
6755                     || $rOpts_delete_old_whitespace )
6756                 {
6757
6758                     my $token_next = $rLL->[$Knext]->[_TOKEN_];
6759                     my $type_next  = $rLL->[$Knext]->[_TYPE_];
6760
6761                     my $do_not_delete = is_essential_whitespace(
6762                         $last_last_nonblank_code_token,
6763                         $last_last_nonblank_code_type,
6764                         $last_nonblank_code_token,
6765                         $last_nonblank_code_type,
6766                         $token_next,
6767                         $type_next,
6768                     );
6769
6770                     # Note that repeated blanks will get filtered out here
6771                     next unless ($do_not_delete);
6772                 }
6773
6774                 # make it just one character
6775                 $rtoken_vars->[_TOKEN_] = ' ';
6776                 $store_token->($rtoken_vars);
6777                 next;
6778             }
6779
6780             # Handle a nonblank token...
6781
6782             if ($type_sequence) {
6783
6784                 # Insert a tentative missing semicolon if the next token is
6785                 # a closing block brace
6786                 if (
6787                        $type eq '}'
6788                     && $token eq '}'
6789
6790                     # not preceded by a ';'
6791                     && $last_nonblank_code_type ne ';'
6792
6793                     # and this is not a VERSION stmt (is all one line, we
6794                     # are not inserting semicolons on one-line blocks)
6795                     && $CODE_type ne 'VER'
6796
6797                     # and we are allowed to add semicolons
6798                     && $rOpts->{'add-semicolons'}
6799                   )
6800                 {
6801                     $add_phantom_semicolon->($KK);
6802                 }
6803             }
6804
6805             # Modify certain tokens here for whitespace
6806             # The following is not yet done, but could be:
6807             #   sub (x x x)
6808             #     ( $type =~ /^[wit]$/ )
6809             elsif ( $is_wit{$type} ) {
6810
6811                 # change '$  var'  to '$var' etc
6812                 # change '@    '   to '@'
6813                 # Examples: <<snippets/space1.in>>
6814                 my $ord = ord( substr( $token, 1, 1 ) );
6815                 if (
6816
6817                     # quick test for possible blank at second char
6818                     $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
6819                         || $ord > ORD_PRINTABLE_MAX )
6820                   )
6821                 {
6822                     my ( $sigil, $word ) = split /\s+/, $token, 2;
6823
6824                     # $sigil =~ /^[\$\&\%\*\@]$/ )
6825                     if ( $is_sigil{$sigil} ) {
6826                         $token = $sigil;
6827                         $token .= $word if ( defined($word) );    # fix c104
6828                         $rtoken_vars->[_TOKEN_] = $token;
6829                     }
6830                 }
6831
6832                 # Split identifiers with leading arrows, inserting blanks
6833                 # if necessary.  It is easier and safer here than in the
6834                 # tokenizer.  For example '->new' becomes two tokens, '->'
6835                 # and 'new' with a possible blank between.
6836                 #
6837                 # Note: there is a related patch in sub set_whitespace_flags
6838                 elsif (length($token) > 2
6839                     && substr( $token, 0, 2 ) eq '->'
6840                     && $token =~ /^\-\>(.*)$/
6841                     && $1 )
6842                 {
6843
6844                     my $token_save = $1;
6845                     my $type_save  = $type;
6846
6847                     # Change '-> new'  to '->new'
6848                     $token_save =~ s/^\s+//g;
6849
6850                     # store a blank to left of arrow if necessary
6851                     my $Kprev = $self->K_previous_nonblank($KK);
6852                     if (   defined($Kprev)
6853                         && $rLL->[$Kprev]->[_TYPE_] ne 'b'
6854                         && $rOpts_add_whitespace
6855                         && $want_left_space{'->'} == WS_YES )
6856                     {
6857                         my $rcopy =
6858                           copy_token_as_type( $rtoken_vars, 'b', ' ' );
6859                         $store_token->($rcopy);
6860                     }
6861
6862                     # then store the arrow
6863                     my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
6864                     $store_token->($rcopy);
6865
6866                     # store a blank after the arrow if requested
6867                     # added for issue git #33
6868                     if ( $want_right_space{'->'} == WS_YES ) {
6869                         my $rcopy =
6870                           copy_token_as_type( $rtoken_vars, 'b', ' ' );
6871                         $store_token->($rcopy);
6872                     }
6873
6874                     # then reset the current token to be the remainder,
6875                     # and reset the whitespace flag according to the arrow
6876                     $token = $rtoken_vars->[_TOKEN_] = $token_save;
6877                     $type  = $rtoken_vars->[_TYPE_]  = $type_save;
6878                     $store_token->($rtoken_vars);
6879                     next;
6880                 }
6881
6882                 # Trim certain spaces in identifiers
6883                 if ( $type eq 'i' ) {
6884
6885                     if (
6886                         (
6887                             substr( $token, 0, 3 ) eq 'sub'
6888                             || $rOpts_sub_alias_list
6889                         )
6890                         && $token =~ /$SUB_PATTERN/
6891                       )
6892                     {
6893
6894                         # -spp = 0 : no space before opening prototype paren
6895                         # -spp = 1 : stable (follow input spacing)
6896                         # -spp = 2 : always space before opening prototype paren
6897                         my $spp = $rOpts->{'space-prototype-paren'};
6898                         if ( defined($spp) ) {
6899                             if    ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
6900                             elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
6901                         }
6902
6903                         # one space max, and no tabs
6904                         $token =~ s/\s+/ /g;
6905                         $rtoken_vars->[_TOKEN_] = $token;
6906                     }
6907
6908                     # clean up spaces in package identifiers, like
6909                     #   "package        Bob::Dog;"
6910                     elsif ( substr( $token, 0, 7 ) eq 'package'
6911                         && $token =~ /^package\s/ )
6912                     {
6913                         $token =~ s/\s+/ /g;
6914                         $rtoken_vars->[_TOKEN_] = $token;
6915                     }
6916
6917                     # trim identifiers of trailing blanks which can occur
6918                     # under some unusual circumstances, such as if the
6919                     # identifier 'witch' has trailing blanks on input here:
6920                     #
6921                     # sub
6922                     # witch
6923                     # ()   # prototype may be on new line ...
6924                     # ...
6925                     my $ord = ord( substr( $token, -1, 1 ) );
6926                     if (
6927
6928                         # quick check for possible ending space
6929                         $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
6930                             || $ord > ORD_PRINTABLE_MAX )
6931                       )
6932                     {
6933                         $token =~ s/\s+$//g;
6934                         $rtoken_vars->[_TOKEN_] = $token;
6935                     }
6936                 }
6937             }
6938
6939             # handle semicolons
6940             elsif ( $type eq ';' ) {
6941
6942                 # Remove unnecessary semicolons, but not after bare
6943                 # blocks, where it could be unsafe if the brace is
6944                 # mistokenized.
6945                 if (
6946                     $rOpts->{'delete-semicolons'}
6947                     && (
6948                         (
6949                                $last_nonblank_block_type
6950                             && $last_nonblank_code_type eq '}'
6951                             && (
6952                                 $is_block_without_semicolon{
6953                                     $last_nonblank_block_type}
6954                                 || $last_nonblank_block_type =~ /$SUB_PATTERN/
6955                                 || $last_nonblank_block_type =~ /^\w+:$/
6956                             )
6957                         )
6958                         || $last_nonblank_code_type eq ';'
6959                     )
6960                   )
6961                 {
6962
6963                     # This looks like a deletable semicolon, but even if a
6964                     # semicolon can be deleted it is not necessarily best to do
6965                     # so.  We apply these additional rules for deletion:
6966                     # - Always ok to delete a ';' at the end of a line
6967                     # - Never delete a ';' before a '#' because it would
6968                     #   promote it to a block comment.
6969                     # - If a semicolon is not at the end of line, then only
6970                     #   delete if it is followed by another semicolon or closing
6971                     #   token.  This includes the comment rule.  It may take
6972                     #   two passes to get to a final state, but it is a little
6973                     #   safer.  For example, keep the first semicolon here:
6974                     #      eval { sub bubba { ok(0) }; ok(0) } || ok(1);
6975                     #   It is not required but adds some clarity.
6976                     my $ok_to_delete = 1;
6977                     if ( $KK < $Klast ) {
6978                         my $Kn = $self->K_next_nonblank($KK);
6979                         if ( defined($Kn) && $Kn <= $Klast ) {
6980                             my $next_nonblank_token_type =
6981                               $rLL->[$Kn]->[_TYPE_];
6982                             $ok_to_delete = $next_nonblank_token_type eq ';'
6983                               || $next_nonblank_token_type eq '}';
6984                         }
6985                     }
6986
6987                     # do not delete only nonblank token in a file
6988                     else {
6989                         my $Kp = $self->K_previous_code( undef, $rLL_new );
6990                         my $Kn = $self->K_next_nonblank($KK);
6991                         $ok_to_delete = defined($Kn) || defined($Kp);
6992                     }
6993
6994                     if ($ok_to_delete) {
6995                         $self->note_deleted_semicolon($input_line_number);
6996                         next;
6997                     }
6998                     else {
6999                         write_logfile_entry("Extra ';'\n");
7000                     }
7001                 }
7002             }
7003
7004             # Old patch to add space to something like "x10".
7005             # Note: This is now done in the Tokenizer, but this code remains
7006             # for reference.
7007             elsif ( $type eq 'n' ) {
7008                 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
7009                     $token =~ s/x/x /;
7010                     $rtoken_vars->[_TOKEN_] = $token;
7011                     if (DEVEL_MODE) {
7012                         Fault(<<EOM);
7013 Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
7014 EOM
7015                     }
7016                 }
7017             }
7018
7019             # check for a qw quote
7020             elsif ( $type eq 'q' ) {
7021
7022                 # trim blanks from right of qw quotes
7023                 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
7024                 # this)
7025                 $token =~ s/\s*$//;
7026                 $rtoken_vars->[_TOKEN_] = $token;
7027                 $self->note_embedded_tab($input_line_number)
7028                   if ( $token =~ "\t" );
7029                 $store_token_and_space->(
7030                     $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
7031                 );
7032                 next;
7033             } ## end if ( $type eq 'q' )
7034
7035             # change 'LABEL   :'   to 'LABEL:'
7036             elsif ( $type eq 'J' ) {
7037                 $token =~ s/\s+//g;
7038                 $rtoken_vars->[_TOKEN_] = $token;
7039             }
7040
7041             # check a quote for problems
7042             elsif ( $type eq 'Q' ) {
7043                 $check_Q->( $KK, $Kfirst, $input_line_number );
7044             }
7045
7046             # Store this token with possible previous blank
7047             $store_token_and_space->(
7048                 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
7049             );
7050
7051         }    # End token loop
7052     }    # End line loop
7053
7054     # Walk backwards through the tokens, making forward links to sequence items.
7055     if ( @{$rLL_new} ) {
7056         my $KNEXT;
7057         for ( my $KK = @{$rLL_new} - 1 ; $KK >= 0 ; $KK-- ) {
7058             $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
7059             if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
7060         }
7061         $self->[_K_first_seq_item_] = $KNEXT;
7062     }
7063
7064     # Find and remember lists by sequence number
7065     foreach my $seqno ( keys %{$K_opening_container} ) {
7066         my $K_opening = $K_opening_container->{$seqno};
7067         next unless defined($K_opening);
7068
7069         # code errors may leave undefined closing tokens
7070         my $K_closing = $K_closing_container->{$seqno};
7071         next unless defined($K_closing);
7072
7073         my $lx_open   = $rLL_new->[$K_opening]->[_LINE_INDEX_];
7074         my $lx_close  = $rLL_new->[$K_closing]->[_LINE_INDEX_];
7075         my $line_diff = $lx_close - $lx_open;
7076         $ris_broken_container->{$seqno} = $line_diff;
7077
7078         # See if this is a list
7079         my $is_list;
7080         my $rtype_count = $rtype_count_by_seqno->{$seqno};
7081         if ($rtype_count) {
7082             my $comma_count     = $rtype_count->{','};
7083             my $fat_comma_count = $rtype_count->{'=>'};
7084             my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
7085
7086             # We will define a list to be a container with one or more commas
7087             # and no semicolons. Note that we have included the semicolons
7088             # in a 'for' container in the simicolon count to keep c-style for
7089             # statements from being formatted as lists.
7090             if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
7091                 $is_list = 1;
7092
7093                 # We need to do one more check for a perenthesized list:
7094                 # At an opening paren following certain tokens, such as 'if',
7095                 # we do not want to format the contents as a list.
7096                 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
7097                     my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
7098                     if ( defined($Kp) ) {
7099                         my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7100                         if ( $type_p eq 'k' ) {
7101                             my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
7102                             $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
7103                         }
7104                         else {
7105                             $is_list = 0 if ( $is_nonlist_type{$type_p} );
7106                         }
7107                     }
7108                 }
7109             }
7110         }
7111
7112         # Look for a block brace marked as uncertain.  If the tokenizer thinks
7113         # its guess is uncertain for the type of a brace following an unknown
7114         # bareword then it adds a trailing space as a signal.  We can fix the
7115         # type here now that we have had a better look at the contents of the
7116         # container. This fixes case b1085. To find the corresponding code in
7117         # Tokenizer.pm search for 'b1085' with an editor.
7118         my $block_type = $rblock_type_of_seqno->{$seqno};
7119         if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
7120
7121             # Always remove the trailing space
7122             $block_type =~ s/\s+$//;
7123
7124             # Try to filter out parenless sub calls
7125             my ( $Knn1, $Knn2 );
7126             my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
7127             $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
7128             $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
7129             $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
7130             $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
7131
7132             #   if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
7133             if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
7134                 $is_list = 0;
7135             }
7136
7137             # Convert to a hash brace if it looks like it holds a list
7138             if ($is_list) {
7139
7140                 $block_type = "";
7141
7142                 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
7143                 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
7144             }
7145
7146             $rblock_type_of_seqno->{$seqno} = $block_type;
7147         }
7148
7149         # Handle a list container
7150         if ( $is_list && !$block_type ) {
7151             $ris_list_by_seqno->{$seqno} = $seqno;
7152             my $seqno_parent = $rparent_of_seqno->{$seqno};
7153             my $depth        = 0;
7154             while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7155                 $depth++;
7156
7157                 # for $rhas_list we need to save the minimum depth
7158                 if (  !$rhas_list->{$seqno_parent}
7159                     || $rhas_list->{$seqno_parent} > $depth )
7160                 {
7161                     $rhas_list->{$seqno_parent} = $depth;
7162                 }
7163
7164                 if ($line_diff) {
7165                     $rhas_broken_list->{$seqno_parent} = 1;
7166
7167                     # Patch1: We need to mark broken lists with non-terminal
7168                     # line-ending commas for the -bbx=2 parameter. This insures
7169                     # that the list will stay broken.  Otherwise the flag
7170                     # -bbx=2 can be unstable.  This fixes case b789 and b938.
7171
7172                     # Patch2: Updated to also require either one fat comma or
7173                     # one more line-ending comma.  Fixes cases b1069 b1070
7174                     # b1072 b1076.
7175                     if (
7176                         $rlec_count_by_seqno->{$seqno}
7177                         && (   $rlec_count_by_seqno->{$seqno} > 1
7178                             || $rtype_count_by_seqno->{$seqno}->{'=>'} )
7179                       )
7180                     {
7181                         $rhas_broken_list_with_lec->{$seqno_parent} = 1;
7182                     }
7183                 }
7184                 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7185             }
7186         }
7187
7188         # Handle code blocks ...
7189         # The -lp option needs to know if a container holds a code block
7190         elsif ( $block_type && $rOpts_line_up_parentheses ) {
7191             my $seqno_parent = $rparent_of_seqno->{$seqno};
7192             while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7193                 $rhas_code_block->{$seqno_parent}        = 1;
7194                 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
7195                 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7196             }
7197         }
7198     }
7199
7200     # Find containers with ternaries, needed for -lp formatting.
7201     foreach my $seqno ( keys %{$K_opening_ternary} ) {
7202         my $seqno_parent = $rparent_of_seqno->{$seqno};
7203         while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7204             $rhas_ternary->{$seqno_parent} = 1;
7205             $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7206         }
7207     }
7208
7209     # Turn off -lp for containers with here-docs with text within a container,
7210     # since they have their own fixed indentation.  Fixes case b1081.
7211     if ($rOpts_line_up_parentheses) {
7212         foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
7213             my $Kh      = $K_first_here_doc_by_seqno{$seqno};
7214             my $Kc      = $K_closing_container->{$seqno};
7215             my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
7216             my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
7217             next if ( $line_Kh == $line_Kc );
7218             $ris_excluded_lp_container->{$seqno} = 1;
7219         }
7220     }
7221
7222     # Set a flag to turn off -cab=3 in complex structures.  Otherwise,
7223     # instability can occur.  When it is overridden the behavior of the closest
7224     # match, -cab=2, will be used instead.  This fixes cases b1096 b1113.
7225     if ( $rOpts_comma_arrow_breakpoints == 3 ) {
7226         foreach my $seqno ( keys %{$K_opening_container} ) {
7227
7228             my $rtype_count = $rtype_count_by_seqno->{$seqno};
7229             next unless ( $rtype_count && $rtype_count->{'=>'} );
7230
7231             # override -cab=3 if this contains a sub-list
7232             if ( $rhas_list->{$seqno} ) {
7233                 $roverride_cab3->{$seqno} = 1;
7234             }
7235
7236             # or if this is a sub-list of its parent container
7237             else {
7238                 my $seqno_parent = $rparent_of_seqno->{$seqno};
7239                 if ( defined($seqno_parent)
7240                     && $ris_list_by_seqno->{$seqno_parent} )
7241                 {
7242                     $roverride_cab3->{$seqno} = 1;
7243                 }
7244             }
7245         }
7246     }
7247
7248     # Reset memory to be the new array
7249     $self->[_rLL_] = $rLL_new;
7250     my $Klimit;
7251     if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
7252     $self->[_Klimit_] = $Klimit;
7253
7254     # During development, verify that the new array still looks okay.
7255     DEVEL_MODE && $self->check_token_array();
7256
7257     # reset the token limits of each line
7258     $self->resync_lines_and_tokens();
7259
7260     return;
7261 }
7262
7263 sub copy_token_as_type {
7264
7265     # This provides a quick way to create a new token by
7266     # slightly modifying an existing token.
7267     my ( $rold_token, $type, $token ) = @_;
7268     if ( $type eq 'b' ) {
7269         $token = " " unless defined($token);
7270     }
7271     elsif ( $type eq 'q' ) {
7272         $token = '' unless defined($token);
7273     }
7274     elsif ( $type eq '->' ) {
7275         $token = '->' unless defined($token);
7276     }
7277     elsif ( $type eq ';' ) {
7278         $token = ';' unless defined($token);
7279     }
7280     else {
7281
7282         # Unexpected type ... this sub will work as long as both $token and
7283         # $type are defined, but we should catch any unexpected types during
7284         # development.
7285         if (DEVEL_MODE) {
7286             Fault(<<EOM);
7287 sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
7288 EOM
7289         }
7290         else {
7291             # shouldn't happen
7292         }
7293     }
7294
7295     my @rnew_token = @{$rold_token};
7296     $rnew_token[_TYPE_]          = $type;
7297     $rnew_token[_TOKEN_]         = $token;
7298     $rnew_token[_TYPE_SEQUENCE_] = '';
7299     return \@rnew_token;
7300 }
7301
7302 sub Debug_dump_tokens {
7303
7304     # a debug routine, not normally used
7305     my ( $self, $msg ) = @_;
7306     my $rLL   = $self->[_rLL_];
7307     my $nvars = @{$rLL};
7308     print STDERR "$msg\n";
7309     print STDERR "ntokens=$nvars\n";
7310     print STDERR "K\t_TOKEN_\t_TYPE_\n";
7311     my $K = 0;
7312
7313     foreach my $item ( @{$rLL} ) {
7314         print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
7315         $K++;
7316     }
7317     return;
7318 }
7319
7320 sub K_next_code {
7321     my ( $self, $KK, $rLL ) = @_;
7322
7323     # return the index K of the next nonblank, non-comment token
7324     return unless ( defined($KK) && $KK >= 0 );
7325
7326     # use the standard array unless given otherwise
7327     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7328     my $Num  = @{$rLL};
7329     my $Knnb = $KK + 1;
7330     while ( $Knnb < $Num ) {
7331         if ( !defined( $rLL->[$Knnb] ) ) {
7332
7333             # We seem to have encountered a gap in our array.
7334             # This shouldn't happen because sub write_line() pushed
7335             # items into the $rLL array.
7336             Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7337             return;
7338         }
7339         if (   $rLL->[$Knnb]->[_TYPE_] ne 'b'
7340             && $rLL->[$Knnb]->[_TYPE_] ne '#' )
7341         {
7342             return $Knnb;
7343         }
7344         $Knnb++;
7345     }
7346     return;
7347 }
7348
7349 sub K_next_nonblank {
7350     my ( $self, $KK, $rLL ) = @_;
7351
7352     # return the index K of the next nonblank token, or
7353     # return undef if none
7354     return unless ( defined($KK) && $KK >= 0 );
7355
7356     # The third arg allows this routine to be used on any array.  This is
7357     # useful in sub respace_tokens when we are copying tokens from an old $rLL
7358     # to a new $rLL array.  But usually the third arg will not be given and we
7359     # will just use the $rLL array in $self.
7360     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7361     my $Num  = @{$rLL};
7362     my $Knnb = $KK + 1;
7363     return unless ( $Knnb < $Num );
7364     return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7365     return unless ( ++$Knnb < $Num );
7366     return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7367
7368     # Backup loop. Very unlikely to get here; it means we have neighboring
7369     # blanks in the token stream.
7370     $Knnb++;
7371     while ( $Knnb < $Num ) {
7372
7373         # Safety check, this fault shouldn't happen:  The $rLL array is the
7374         # main array of tokens, so all entries should be used.  It is
7375         # initialized in sub write_line, and then re-initialized by sub
7376         # $store_token() within sub respace_tokens.  Tokens are pushed on
7377         # so there shouldn't be any gaps.
7378         if ( !defined( $rLL->[$Knnb] ) ) {
7379             Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7380             return;
7381         }
7382         if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
7383         $Knnb++;
7384     }
7385     return;
7386 }
7387
7388 sub K_previous_code {
7389
7390     # return the index K of the previous nonblank, non-comment token
7391     # Call with $KK=undef to start search at the top of the array
7392     my ( $self, $KK, $rLL ) = @_;
7393
7394     # use the standard array unless given otherwise
7395     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7396     my $Num = @{$rLL};
7397     if    ( !defined($KK) ) { $KK = $Num }
7398     elsif ( $KK > $Num ) {
7399
7400         # This fault can be caused by a programming error in which a bad $KK is
7401         # given.  The caller should make the first call with KK_new=undef to
7402         # avoid this error.
7403         Fault(
7404 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7405         ) if (DEVEL_MODE);
7406         return;
7407     }
7408     my $Kpnb = $KK - 1;
7409     while ( $Kpnb >= 0 ) {
7410         if (   $rLL->[$Kpnb]->[_TYPE_] ne 'b'
7411             && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
7412         {
7413             return $Kpnb;
7414         }
7415         $Kpnb--;
7416     }
7417     return;
7418 }
7419
7420 sub K_previous_nonblank {
7421
7422     # return index of previous nonblank token before item K;
7423     # Call with $KK=undef to start search at the top of the array
7424     my ( $self, $KK, $rLL ) = @_;
7425
7426     # use the standard array unless given otherwise
7427     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7428     my $Num = @{$rLL};
7429     if    ( !defined($KK) ) { $KK = $Num }
7430     elsif ( $KK > $Num ) {
7431
7432         # This fault can be caused by a programming error in which a bad $KK is
7433         # given.  The caller should make the first call with KK_new=undef to
7434         # avoid this error.
7435         Fault(
7436 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7437         ) if (DEVEL_MODE);
7438         return;
7439     }
7440     my $Kpnb = $KK - 1;
7441     return unless ( $Kpnb >= 0 );
7442     return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7443     return unless ( --$Kpnb >= 0 );
7444     return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7445
7446     # Backup loop. We should not get here unless some routine
7447     # slipped repeated blanks into the token stream.
7448     return unless ( --$Kpnb >= 0 );
7449     while ( $Kpnb >= 0 ) {
7450         if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
7451         $Kpnb--;
7452     }
7453     return;
7454 }
7455
7456 sub parent_seqno_by_K {
7457
7458     # Return the sequence number of the parent container of token K, if any.
7459
7460     my ( $self, $KK ) = @_;
7461     my $rLL = $self->[_rLL_];
7462
7463     # The task is to jump forward to the next container token
7464     # and use the sequence number of either it or its parent.
7465
7466     # For example, consider the following with seqno=5 of the '[' and ']'
7467     # being called with index K of the first token of each line:
7468
7469     #                                              # result
7470     #    push @tests,                              # -
7471     #      [                                       # -
7472     #        sub { 99 },   'do {&{%s} for 1,2}',   # 5
7473     #        '(&{})(&{})', undef,                  # 5
7474     #        [ 2, 2, 0 ],  0                       # 5
7475     #      ];                                      # -
7476
7477     # NOTE: The ending parent will be SEQ_ROOT for a balanced file.  For
7478     # unbalanced files, last sequence number will either be undefined or it may
7479     # be at a deeper level.  In either case we will just return SEQ_ROOT to
7480     # have a defined value and allow formatting to proceed.
7481     my $parent_seqno  = SEQ_ROOT;
7482     my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7483     if ($type_sequence) {
7484         $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7485     }
7486     else {
7487         my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
7488         if ( defined($Kt) ) {
7489             $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
7490             my $type = $rLL->[$Kt]->[_TYPE_];
7491
7492             # if next container token is closing, it is the parent seqno
7493             if ( $is_closing_type{$type} ) {
7494                 $parent_seqno = $type_sequence;
7495             }
7496
7497             # otherwise we want its parent container
7498             else {
7499                 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7500             }
7501         }
7502     }
7503     $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
7504     return $parent_seqno;
7505 }
7506
7507 sub is_in_block_by_i {
7508     my ( $self, $i ) = @_;
7509
7510     # returns true if
7511     #     token at i is contained in a BLOCK
7512     #     or is at root level
7513     #     or there is some kind of error (i.e. unbalanced file)
7514     # returns false otherwise
7515     return 1 if ( $i < 0 );    # shouldn't happen, bad call
7516     my $seqno = $parent_seqno_to_go[$i];
7517     return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
7518     return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
7519     return;
7520 }
7521
7522 sub is_in_list_by_i {
7523     my ( $self, $i ) = @_;
7524
7525     # returns true if token at i is contained in a LIST
7526     # returns false otherwise
7527     my $seqno = $parent_seqno_to_go[$i];
7528     return unless ( $seqno && $seqno ne SEQ_ROOT );
7529     if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
7530         return 1;
7531     }
7532     return;
7533 }
7534
7535 sub is_list_by_K {
7536
7537     # Return true if token K is in a list
7538     my ( $self, $KK ) = @_;
7539
7540     my $parent_seqno = $self->parent_seqno_by_K($KK);
7541     return unless defined($parent_seqno);
7542     return $self->[_ris_list_by_seqno_]->{$parent_seqno};
7543 }
7544
7545 sub is_list_by_seqno {
7546
7547     # Return true if the immediate contents of a container appears to be a
7548     # list.
7549     my ( $self, $seqno ) = @_;
7550     return unless defined($seqno);
7551     return $self->[_ris_list_by_seqno_]->{$seqno};
7552 }
7553
7554 sub resync_lines_and_tokens {
7555
7556     my $self   = shift;
7557     my $rLL    = $self->[_rLL_];
7558     my $Klimit = $self->[_Klimit_];
7559     my $rlines = $self->[_rlines_];
7560     my @Krange_code_without_comments;
7561     my @Klast_valign_code;
7562
7563     # Re-construct the arrays of tokens associated with the original input lines
7564     # since they have probably changed due to inserting and deleting blanks
7565     # and a few other tokens.
7566
7567     # This is the next token and its line index:
7568     my $Knext = 0;
7569     my $Kmax  = defined($Klimit) ? $Klimit : -1;
7570
7571     # Verify that old line indexes are in still order.  If this error occurs,
7572     # check locations where sub 'respace_tokens' creates new tokens (like
7573     # blank spaces).  It must have set a bad old line index.
7574     if ( DEVEL_MODE && defined($Klimit) ) {
7575         my $iline = $rLL->[0]->[_LINE_INDEX_];
7576         for ( my $KK = 1 ; $KK <= $Klimit ; $KK++ ) {
7577             my $iline_last = $iline;
7578             $iline = $rLL->[$KK]->[_LINE_INDEX_];
7579             if ( $iline < $iline_last ) {
7580                 my $KK_m    = $KK - 1;
7581                 my $token_m = $rLL->[$KK_m]->[_TOKEN_];
7582                 my $token   = $rLL->[$KK]->[_TOKEN_];
7583                 my $type_m  = $rLL->[$KK_m]->[_TYPE_];
7584                 my $type    = $rLL->[$KK]->[_TYPE_];
7585                 Fault(<<EOM);
7586 Line indexes out of order at index K=$KK:
7587 at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
7588 at KK   =$KK: old line=$iline, type='$type', token='$token',
7589 EOM
7590             }
7591         }
7592     }
7593
7594     my $iline = -1;
7595     foreach my $line_of_tokens ( @{$rlines} ) {
7596         $iline++;
7597         my $line_type = $line_of_tokens->{_line_type};
7598         if ( $line_type eq 'CODE' ) {
7599
7600             # Get the old number of tokens on this line
7601             my $rK_range_old = $line_of_tokens->{_rK_range};
7602             my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
7603             my $Kdiff_old = 0;
7604             if ( defined($Kfirst_old) ) {
7605                 $Kdiff_old = $Klast_old - $Kfirst_old;
7606             }
7607
7608             # Find the range of NEW K indexes for the line:
7609             # $Kfirst = index of first token on line
7610             # $Klast  = index of last token on line
7611             my ( $Kfirst, $Klast );
7612
7613             my $Knext_beg = $Knext;    # this will be $Kfirst if we find tokens
7614
7615             # Optimization: Although the actual K indexes may be completely
7616             # changed after respacing, the number of tokens on any given line
7617             # will often be nearly unchanged.  So we will see if we can start
7618             # our search by guessing that the new line has the same number
7619             # of tokens as the old line.
7620             my $Knext_guess = $Knext + $Kdiff_old;
7621             if (   $Knext_guess > $Knext
7622                 && $Knext_guess < $Kmax
7623                 && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
7624             {
7625
7626                 # the guess is good, so we can start our search here
7627                 $Knext = $Knext_guess + 1;
7628             }
7629
7630             while ($Knext <= $Kmax
7631                 && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
7632             {
7633                 $Knext++;
7634             }
7635
7636             if ( $Knext > $Knext_beg ) {
7637
7638                 $Klast = $Knext - 1;
7639
7640                 # Delete any terminal blank token
7641                 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
7642
7643                 if ( $Klast < $Knext_beg ) {
7644                     $Klast = undef;
7645                 }
7646                 else {
7647
7648                     $Kfirst = $Knext_beg;
7649
7650                     # Save ranges of non-comment code. This will be used by
7651                     # sub keep_old_line_breaks.
7652                     if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
7653                         push @Krange_code_without_comments, [ $Kfirst, $Klast ];
7654                     }
7655
7656                     # Only save ending K indexes of code types which are blank
7657                     # or 'VER'.  These will be used for a convergence check.
7658                     # See related code in sub 'convey_batch_to_vertical_aligner'
7659                     my $CODE_type = $line_of_tokens->{_code_type};
7660                     if (  !$CODE_type
7661                         || $CODE_type eq 'VER' )
7662                     {
7663                         push @Klast_valign_code, $Klast;
7664                     }
7665                 }
7666             }
7667
7668             # It is only safe to trim the actual line text if the input
7669             # line had a terminal blank token. Otherwise, we may be
7670             # in a quote.
7671             if ( $line_of_tokens->{_ended_in_blank_token} ) {
7672                 $line_of_tokens->{_line_text} =~ s/\s+$//;
7673             }
7674             $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
7675
7676             # Deleting semicolons can create new empty code lines
7677             # which should be marked as blank
7678             if ( !defined($Kfirst) ) {
7679                 my $CODE_type = $line_of_tokens->{_code_type};
7680                 if ( !$CODE_type ) {
7681                     $line_of_tokens->{_code_type} = 'BL';
7682                 }
7683             }
7684         }
7685     }
7686
7687     # There shouldn't be any nodes beyond the last one.  This routine is
7688     # relinking lines and tokens after the tokens have been respaced.  A fault
7689     # here indicates some kind of bug has been introduced into the above loops.
7690     # There is not good way to keep going; we better stop here.
7691     # FIXME: This will produce zero output. it would be best to find a way to
7692     # dump the input file.
7693     if ( $Knext <= $Kmax ) {
7694
7695         Fault("unexpected tokens at end of file when reconstructing lines");
7696     }
7697     $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
7698
7699     # Setup the convergence test in the FileWriter based on line-ending indexes
7700     my $file_writer_object = $self->[_file_writer_object_];
7701     $file_writer_object->setup_convergence_test( \@Klast_valign_code );
7702
7703     # Mark essential old breakpoints if combination -iob -lp is used.  These
7704     # two options do not work well together, but we can avoid turning -iob off
7705     # by ignoring -iob at certain essential line breaks.
7706     # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
7707     if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
7708         my %is_assignment_or_fat_comma = %is_assignment;
7709         $is_assignment_or_fat_comma{'=>'} = 1;
7710         my $ris_essential_old_breakpoint =
7711           $self->[_ris_essential_old_breakpoint_];
7712         my $iline = -1;
7713         my ( $Kfirst, $Klast );
7714         foreach my $line_of_tokens ( @{$rlines} ) {
7715             $iline++;
7716             my $line_type = $line_of_tokens->{_line_type};
7717             if ( $line_type ne 'CODE' ) {
7718                 ( $Kfirst, $Klast ) = ( undef, undef );
7719                 next;
7720             }
7721             my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
7722             ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
7723
7724             next unless defined($Klast_prev);
7725             next unless defined($Kfirst);
7726             my $type_last  = $rLL->[$Klast_prev]->[_TOKEN_];
7727             my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
7728             next
7729               unless ( $is_assignment_or_fat_comma{$type_last}
7730                 || $is_assignment_or_fat_comma{$type_first} );
7731             $ris_essential_old_breakpoint->{$Klast_prev} = 1;
7732         }
7733     }
7734     return;
7735 }
7736
7737 sub keep_old_line_breaks {
7738
7739     # Called once per file to find and mark any old line breaks which
7740     # should be kept.  We will be translating the input hashes into
7741     # token indexes.
7742
7743     # A flag is set as follows:
7744     # = 1 make a hard break (flush the current batch)
7745     #     best for something like leading commas (-kbb=',')
7746     # = 2 make a soft break (keep building current batch)
7747     #     best for something like leading ->
7748
7749     my ($self) = @_;
7750
7751     my $rLL = $self->[_rLL_];
7752     my $rKrange_code_without_comments =
7753       $self->[_rKrange_code_without_comments_];
7754     my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
7755     my $rbreak_after_Klast   = $self->[_rbreak_after_Klast_];
7756     my $rwant_container_open = $self->[_rwant_container_open_];
7757     my $K_opening_container  = $self->[_K_opening_container_];
7758     my $ris_broken_container = $self->[_ris_broken_container_];
7759     my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
7760
7761     # This code moved here from sub break_lists to fix b1120
7762     if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
7763         foreach my $item ( @{$rKrange_code_without_comments} ) {
7764             my ( $Kfirst, $Klast ) = @{$item};
7765             my $type  = $rLL->[$Kfirst]->[_TYPE_];
7766             my $token = $rLL->[$Kfirst]->[_TOKEN_];
7767
7768             # leading '->' use a value of 2 which causes a soft
7769             # break rather than a hard break
7770             if ( $type eq '->' ) {
7771                 $rbreak_before_Kfirst->{$Kfirst} = 2;
7772             }
7773
7774             # leading ')->' use a special flag to insure that both
7775             # opening and closing parens get opened
7776             # Fix for b1120: only for parens, not braces
7777             elsif ( $token eq ')' ) {
7778                 my $Kn = $self->K_next_nonblank($Kfirst);
7779                 next
7780                   unless ( defined($Kn)
7781                     && $Kn <= $Klast
7782                     && $rLL->[$Kn]->[_TYPE_] eq '->' );
7783                 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
7784                 next unless ($seqno);
7785
7786                 # Note: in previous versions there was a fix here to avoid
7787                 # instability between conflicting -bom and -pvt or -pvtc flags.
7788                 # The fix skipped -bom for a small line difference.  But this
7789                 # was troublesome, and instead the fix has been moved to
7790                 # sub set_vertical_tightness_flags where priority is given to
7791                 # the -bom flag over -pvt and -pvtc flags.  Both opening and
7792                 # closing paren flags are involved because even though -bom only
7793                 # requests breaking before the closing paren, automated logic
7794                 # opens the opening paren when the closing paren opens.
7795                 # Relevant cases are b977, b1215, b1270, b1303
7796
7797                 $rwant_container_open->{$seqno} = 1;
7798             }
7799         }
7800     }
7801
7802     return unless ( %keep_break_before_type || %keep_break_after_type );
7803
7804     my $check_for_break = sub {
7805         my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
7806         my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7807
7808         # non-container tokens use the type as the key
7809         if ( !$seqno ) {
7810             my $type = $rLL->[$KK]->[_TYPE_];
7811             if ( $rkeep_break_hash->{$type} ) {
7812                 $rbreak_hash->{$KK} = 1;
7813             }
7814         }
7815
7816         # container tokens use the token as the key
7817         else {
7818             my $token = $rLL->[$KK]->[_TOKEN_];
7819             my $flag  = $rkeep_break_hash->{$token};
7820             if ($flag) {
7821
7822                 my $match = $flag eq '1' || $flag eq '*';
7823
7824                 # check for special matching codes
7825                 if ( !$match ) {
7826                     if ( $token eq '(' || $token eq ')' ) {
7827                         $match = $self->match_paren_flag( $KK, $flag );
7828                     }
7829                     elsif ( $token eq '{' || $token eq '}' ) {
7830
7831                         # These tentative codes 'b' and 'B' for brace types are
7832                         # placeholders for possible future brace types. They
7833                         # are not documented and may be changed.
7834                         my $block_type =
7835                           $self->[_rblock_type_of_seqno_]->{$seqno};
7836                         if    ( $flag eq 'b' ) { $match = $block_type }
7837                         elsif ( $flag eq 'B' ) { $match = !$block_type }
7838                         else {
7839                             # unknown code - no match
7840                         }
7841                     }
7842                 }
7843                 $rbreak_hash->{$KK} = 1 if ($match);
7844             }
7845         }
7846     };
7847
7848     foreach my $item ( @{$rKrange_code_without_comments} ) {
7849         my ( $Kfirst, $Klast ) = @{$item};
7850         $check_for_break->(
7851             $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
7852         );
7853         $check_for_break->(
7854             $Klast, \%keep_break_after_type, $rbreak_after_Klast
7855         );
7856     }
7857     return;
7858 }
7859
7860 sub weld_containers {
7861
7862     # Called once per file to do any welding operations requested by --weld*
7863     # flags.
7864     my ($self) = @_;
7865
7866     # This count is used to eliminate needless calls for weld checks elsewere
7867     $total_weld_count = 0;
7868
7869     return if ( $rOpts->{'indent-only'} );
7870     return unless ($rOpts_add_newlines);
7871
7872     # Important: sub 'weld_cuddled_blocks' must be called before
7873     # sub 'weld_nested_containers'. This is because the cuddled option needs to
7874     # use the original _LEVEL_ values of containers, but the weld nested
7875     # containers changes _LEVEL_ of welded containers.
7876
7877     # Here is a good test case to be sure that both cuddling and welding
7878     # are working and not interfering with each other: <<snippets/ce_wn1.in>>
7879
7880     #   perltidy -wn -ce
7881
7882    # if ($BOLD_MATH) { (
7883    #     $labels, $comment,
7884    #     join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
7885    # ) } else { (
7886    #     &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
7887    #     $after
7888    # ) }
7889
7890     $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
7891
7892     if ( $rOpts->{'weld-nested-containers'} ) {
7893
7894         $self->weld_nested_containers();
7895
7896         $self->weld_nested_quotes();
7897     }
7898
7899     #-------------------------------------------------------------
7900     # All welding is done. Finish setting up weld data structures.
7901     #-------------------------------------------------------------
7902
7903     my $rLL                  = $self->[_rLL_];
7904     my $rK_weld_left         = $self->[_rK_weld_left_];
7905     my $rK_weld_right        = $self->[_rK_weld_right_];
7906     my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
7907
7908     my @K_multi_weld;
7909     my @keys = keys %{$rK_weld_right};
7910     $total_weld_count = @keys;
7911
7912     # First pass to process binary welds.
7913     # This loop is processed in unsorted order for efficiency.
7914     foreach my $Kstart (@keys) {
7915         my $Kend = $rK_weld_right->{$Kstart};
7916
7917         # An error here would be due to an incorrect initialization introduced
7918         # in one of the above weld routines, like sub weld_nested.
7919         if ( $Kend <= $Kstart ) {
7920             Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
7921               if (DEVEL_MODE);
7922             next;
7923         }
7924
7925         # Set weld values for all tokens this welded pair
7926         foreach ( $Kstart + 1 .. $Kend ) {
7927             $rK_weld_left->{$_} = $Kstart;
7928         }
7929         foreach my $Kx ( $Kstart .. $Kend - 1 ) {
7930             $rK_weld_right->{$Kx} = $Kend;
7931             $rweld_len_right_at_K->{$Kx} =
7932               $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
7933               $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
7934         }
7935
7936         # Remember the leftmost index of welds which continue to the right
7937         if ( defined( $rK_weld_right->{$Kend} )
7938             && !defined( $rK_weld_left->{$Kstart} ) )
7939         {
7940             push @K_multi_weld, $Kstart;
7941         }
7942     }
7943
7944     # Second pass to process chains of welds (these are rare).
7945     # This has to be processed in sorted order.
7946     if (@K_multi_weld) {
7947         my $Kend = -1;
7948         foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
7949
7950             # Skip any interior K which was originally missing a left link
7951             next if ( $Kstart <= $Kend );
7952
7953             # Find the end of this chain
7954             $Kend = $rK_weld_right->{$Kstart};
7955             my $Knext = $rK_weld_right->{$Kend};
7956             while ( defined($Knext) ) {
7957                 $Kend  = $Knext;
7958                 $Knext = $rK_weld_right->{$Kend};
7959             }
7960
7961             # Set weld values this chain
7962             foreach ( $Kstart + 1 .. $Kend ) {
7963                 $rK_weld_left->{$_} = $Kstart;
7964             }
7965             foreach my $Kx ( $Kstart .. $Kend - 1 ) {
7966                 $rK_weld_right->{$Kx} = $Kend;
7967                 $rweld_len_right_at_K->{$Kx} =
7968                   $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
7969                   $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
7970             }
7971         }
7972     }
7973
7974     return;
7975 }
7976
7977 sub cumulative_length_before_K {
7978     my ( $self, $KK ) = @_;
7979     my $rLL = $self->[_rLL_];
7980     return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
7981 }
7982
7983 sub weld_cuddled_blocks {
7984     my ($self) = @_;
7985
7986     # Called once per file to handle cuddled formatting
7987
7988     my $rK_weld_left         = $self->[_rK_weld_left_];
7989     my $rK_weld_right        = $self->[_rK_weld_right_];
7990     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
7991
7992     # This routine implements the -cb flag by finding the appropriate
7993     # closing and opening block braces and welding them together.
7994     return unless ( %{$rcuddled_block_types} );
7995
7996     my $rLL = $self->[_rLL_];
7997     return unless ( defined($rLL) && @{$rLL} );
7998     my $rbreak_container = $self->[_rbreak_container_];
7999
8000     my $K_opening_container = $self->[_K_opening_container_];
8001     my $K_closing_container = $self->[_K_closing_container_];
8002
8003     my $length_to_opening_seqno = sub {
8004         my ($seqno) = @_;
8005         my $KK      = $K_opening_container->{$seqno};
8006         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8007         return $lentot;
8008     };
8009     my $length_to_closing_seqno = sub {
8010         my ($seqno) = @_;
8011         my $KK      = $K_closing_container->{$seqno};
8012         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8013         return $lentot;
8014     };
8015
8016     my $is_broken_block = sub {
8017
8018         # a block is broken if the input line numbers of the braces differ
8019         # we can only cuddle between broken blocks
8020         my ($seqno) = @_;
8021         my $K_opening = $K_opening_container->{$seqno};
8022         return unless ( defined($K_opening) );
8023         my $K_closing = $K_closing_container->{$seqno};
8024         return unless ( defined($K_closing) );
8025         return $rbreak_container->{$seqno}
8026           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8027           $rLL->[$K_opening]->[_LINE_INDEX_];
8028     };
8029
8030     # A stack to remember open chains at all levels: This is a hash rather than
8031     # an array for safety because negative levels can occur in files with
8032     # errors.  This allows us to keep processing with negative levels.
8033     # $in_chain{$level} = [$chain_type, $type_sequence];
8034     my %in_chain;
8035     my $CBO = $rOpts->{'cuddled-break-option'};
8036
8037     # loop over structure items to find cuddled pairs
8038     my $level = 0;
8039     my $KNEXT = $self->[_K_first_seq_item_];
8040     while ( defined($KNEXT) ) {
8041         my $KK = $KNEXT;
8042         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8043         my $rtoken_vars   = $rLL->[$KK];
8044         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8045         if ( !$type_sequence ) {
8046             next if ( $KK == 0 );    # first token in file may not be container
8047
8048             # A fault here implies that an error was made in the little loop at
8049             # the bottom of sub 'respace_tokens' which set the values of
8050             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
8051             # loop control lines above.
8052             Fault("sequence = $type_sequence not defined at K=$KK")
8053               if (DEVEL_MODE);
8054             next;
8055         }
8056
8057         # NOTE: we must use the original levels here. They can get changed
8058         # by sub 'weld_nested_containers', so this routine must be called
8059         # before sub 'weld_nested_containers'.
8060         my $last_level = $level;
8061         $level = $rtoken_vars->[_LEVEL_];
8062
8063         if    ( $level < $last_level ) { $in_chain{$last_level} = undef }
8064         elsif ( $level > $last_level ) { $in_chain{$level}      = undef }
8065
8066         # We are only looking at code blocks
8067         my $token = $rtoken_vars->[_TOKEN_];
8068         my $type  = $rtoken_vars->[_TYPE_];
8069         next unless ( $type eq $token );
8070
8071         if ( $token eq '{' ) {
8072
8073             my $block_type = $rblock_type_of_seqno->{$type_sequence};
8074             if ( !$block_type ) {
8075
8076                 # patch for unrecognized block types which may not be labeled
8077                 my $Kp = $self->K_previous_nonblank($KK);
8078                 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
8079                     $Kp = $self->K_previous_nonblank($Kp);
8080                 }
8081                 next unless $Kp;
8082                 $block_type = $rLL->[$Kp]->[_TOKEN_];
8083             }
8084             if ( $in_chain{$level} ) {
8085
8086                 # we are in a chain and are at an opening block brace.
8087                 # See if we are welding this opening brace with the previous
8088                 # block brace.  Get their identification numbers:
8089                 my $closing_seqno = $in_chain{$level}->[1];
8090                 my $opening_seqno = $type_sequence;
8091
8092                 # The preceding block must be on multiple lines so that its
8093                 # closing brace will start a new line.
8094                 if ( !$is_broken_block->($closing_seqno) ) {
8095                     next unless ( $CBO == 2 );
8096                     $rbreak_container->{$closing_seqno} = 1;
8097                 }
8098
8099                 # we will let the trailing block be either broken or intact
8100                 ## && $is_broken_block->($opening_seqno);
8101
8102                 # We can weld the closing brace to its following word ..
8103                 my $Ko = $K_closing_container->{$closing_seqno};
8104                 my $Kon;
8105                 if ( defined($Ko) ) {
8106                     $Kon = $self->K_next_nonblank($Ko);
8107                 }
8108
8109                 # ..unless it is a comment
8110                 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
8111
8112                     # OK to weld these two tokens...
8113                     $rK_weld_right->{$Ko} = $Kon;
8114                     $rK_weld_left->{$Kon} = $Ko;
8115
8116                     # Set flag that we want to break the next container
8117                     # so that the cuddled line is balanced.
8118                     $rbreak_container->{$opening_seqno} = 1
8119                       if ($CBO);
8120                 }
8121
8122             }
8123             else {
8124
8125                 # We are not in a chain. Start a new chain if we see the
8126                 # starting block type.
8127                 if ( $rcuddled_block_types->{$block_type} ) {
8128                     $in_chain{$level} = [ $block_type, $type_sequence ];
8129                 }
8130                 else {
8131                     $block_type = '*';
8132                     $in_chain{$level} = [ $block_type, $type_sequence ];
8133                 }
8134             }
8135         }
8136         elsif ( $token eq '}' ) {
8137             if ( $in_chain{$level} ) {
8138
8139                 # We are in a chain at a closing brace.  See if this chain
8140                 # continues..
8141                 my $Knn = $self->K_next_code($KK);
8142                 next unless $Knn;
8143
8144                 my $chain_type          = $in_chain{$level}->[0];
8145                 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
8146                 if (
8147                     $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
8148                   )
8149                 {
8150
8151                     # Note that we do not weld yet because we must wait until
8152                     # we we are sure that an opening brace for this follows.
8153                     $in_chain{$level}->[1] = $type_sequence;
8154                 }
8155                 else { $in_chain{$level} = undef }
8156             }
8157         }
8158     }
8159     return;
8160 }
8161
8162 sub find_nested_pairs {
8163     my $self = shift;
8164
8165     # This routine is called once per file to do preliminary work needed for
8166     # the --weld-nested option.  This information is also needed for adding
8167     # semicolons.
8168
8169     my $rLL = $self->[_rLL_];
8170     return unless ( defined($rLL) && @{$rLL} );
8171     my $Num = @{$rLL};
8172
8173     my $K_opening_container  = $self->[_K_opening_container_];
8174     my $K_closing_container  = $self->[_K_closing_container_];
8175     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8176
8177     # We define an array of pairs of nested containers
8178     my @nested_pairs;
8179
8180     # Names of calling routines can either be marked as 'i' or 'w',
8181     # and they may invoke a sub call with an '->'. We will consider
8182     # any consecutive string of such types as a single unit when making
8183     # weld decisions.  We also allow a leading !
8184     my $is_name_type = {
8185         'i'  => 1,
8186         'w'  => 1,
8187         'U'  => 1,
8188         '->' => 1,
8189         '!'  => 1,
8190     };
8191
8192     # Loop over all closing container tokens
8193     foreach my $inner_seqno ( keys %{$K_closing_container} ) {
8194         my $K_inner_closing = $K_closing_container->{$inner_seqno};
8195
8196         # See if it is immediately followed by another, outer closing token
8197         my $K_outer_closing = $K_inner_closing + 1;
8198         $K_outer_closing += 1
8199           if ( $K_outer_closing < $Num
8200             && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
8201
8202         next unless ( $K_outer_closing < $Num );
8203         my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
8204         next unless ($outer_seqno);
8205         my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
8206         next unless ( $is_closing_token{$token_outer_closing} );
8207
8208         # Now we have to check the opening tokens.
8209         my $K_outer_opening = $K_opening_container->{$outer_seqno};
8210         my $K_inner_opening = $K_opening_container->{$inner_seqno};
8211         next unless defined($K_outer_opening) && defined($K_inner_opening);
8212
8213         my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
8214         my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
8215
8216         # Verify that the inner opening token is the next container after the
8217         # outer opening token.
8218         my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
8219         next unless defined($K_io_check);
8220         if ( $K_io_check != $K_inner_opening ) {
8221
8222             # The inner opening container does not immediately follow the outer
8223             # opening container, but we may still allow a weld if they are
8224             # separated by a sub signature.  For example, we may have something
8225             # like this, where $K_io_check may be at the first 'x' instead of
8226             # 'io'.  So we need to hop over the signature and see if we arrive
8227             # at 'io'.
8228
8229             #            oo               io
8230             #             |     x       x |
8231             #   $obj->then( sub ( $code ) {
8232             #       ...
8233             #       return $c->render(text => '', status => $code);
8234             #   } );
8235             #   | |
8236             #  ic oc
8237
8238             next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
8239             next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
8240             my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
8241             next unless defined($seqno_signature);
8242             my $K_signature_closing = $K_closing_container->{$seqno_signature};
8243             next unless defined($K_signature_closing);
8244             my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
8245             next
8246               unless ( defined($K_test) && $K_test == $K_inner_opening );
8247
8248             # OK, we have arrived at 'io' in the above diagram.  We should put
8249             # a limit on the length or complexity of the signature here.  There
8250             # is no perfect way to do this, one way is to put a limit on token
8251             # count.  For consistency with older versions, we should allow a
8252             # signature with a single variable to weld, but not with
8253             # multiple variables.  A single variable as in 'sub ($code) {' can
8254             # have a $Kdiff of 2 to 4, depending on spacing.
8255
8256             # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
8257             # 7, depending on spacing. So to keep formatting consistent with
8258             # previous versions, we will also avoid welding if there is a comma
8259             # in the signature.
8260
8261             my $Kdiff = $K_signature_closing - $K_io_check;
8262             next if ( $Kdiff > 4 );
8263
8264             my $saw_comma;
8265             foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
8266                 if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
8267             }
8268             next if ($saw_comma);
8269         }
8270
8271         # Yes .. this is a possible nesting pair.
8272         # They can be separated by a small amount.
8273         my $K_diff = $K_inner_opening - $K_outer_opening;
8274
8275         # Count nonblank characters separating them.
8276         if ( $K_diff < 0 ) { next }    # Shouldn't happen
8277         my $Kn             = $K_outer_opening;
8278         my $nonblank_count = 0;
8279         my $type;
8280         my $is_name;
8281
8282         # Here is an example of a long identifier chain which counts as a
8283         # single nonblank here (this spans about 10 K indexes):
8284         #     if ( !Boucherot::SetOfConnections->new->handler->execute(
8285         #        ^--K_o_o                                             ^--K_i_o
8286         #       @array) )
8287         my $Kn_first = $K_outer_opening;
8288         my $Kn_last_nonblank;
8289         my $saw_comment;
8290         for (
8291             my $Kn = $K_outer_opening + 1 ;
8292             $Kn <= $K_inner_opening ;
8293             $Kn += 1
8294           )
8295         {
8296             next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
8297             if ( !$nonblank_count )        { $Kn_first = $Kn }
8298             if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
8299             $Kn_last_nonblank = $Kn;
8300
8301             # skip chain of identifier tokens
8302             my $last_type    = $type;
8303             my $last_is_name = $is_name;
8304             $type = $rLL->[$Kn]->[_TYPE_];
8305             if ( $type eq '#' ) { $saw_comment = 1; last }
8306             $is_name = $is_name_type->{$type};
8307             next if ( $is_name && $last_is_name );
8308
8309             $nonblank_count++;
8310             last if ( $nonblank_count > 2 );
8311         }
8312
8313         # Do not weld across a comment .. fix for c058.
8314         next if ($saw_comment);
8315
8316         # Patch for b1104: do not weld to a paren preceded by sort/map/grep
8317         # because the special line break rules may cause a blinking state
8318         if (   defined($Kn_last_nonblank)
8319             && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
8320             && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
8321         {
8322             my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
8323
8324             # Turn off welding at sort/map/grep (
8325             if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
8326         }
8327
8328         if (
8329
8330             # adjacent opening containers, like: do {{
8331             $nonblank_count == 1
8332
8333             # short item following opening paren, like:  fun( yyy (
8334             || (   $nonblank_count == 2
8335                 && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
8336
8337             # anonymous sub + prototype or sig:  )->then( sub ($code) {
8338             # ... but it seems best not to stack two structural blocks, like
8339             # this
8340             #    sub make_anon_with_my_sub { sub {
8341             # because it probably hides the structure a little too much.
8342             || (   $inner_blocktype
8343                 && $inner_blocktype eq 'sub'
8344                 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
8345                 && !$outer_blocktype )
8346           )
8347         {
8348             push @nested_pairs,
8349               [ $inner_seqno, $outer_seqno, $K_inner_closing ];
8350         }
8351         next;
8352     }
8353
8354     # The weld routine expects the pairs in order in the form
8355     #   [$seqno_inner, $seqno_outer]
8356     # And they must be in the same order as the inner closing tokens
8357     # (otherwise, welds of three or more adjacent tokens will not work).  The K
8358     # value of this inner closing token has temporarily been stored for
8359     # sorting.
8360     @nested_pairs =
8361
8362       # Drop the K index after sorting (it would cause trouble downstream)
8363       map { [ $_->[0], $_->[1] ] }
8364
8365       # Sort on the K values
8366       sort { $a->[2] <=> $b->[2] } @nested_pairs;
8367
8368     return \@nested_pairs;
8369 }
8370
8371 sub match_paren_flag {
8372
8373     # Decide if this paren is excluded by user request:
8374     #   undef matches no parens
8375     #   '*' matches all parens
8376     #   'k' matches only if the previous nonblank token is a perl builtin
8377     #       keyword (such as 'if', 'while'),
8378     #   'K' matches if 'k' does not, meaning if the previous token is not a
8379     #       keyword.
8380     #   'f' matches if the previous token is a function other than a keyword.
8381     #   'F' matches if 'f' does not.
8382     #   'w' matches if either 'k' or 'f' match.
8383     #   'W' matches if 'w' does not.
8384     my ( $self, $KK, $flag ) = @_;
8385
8386     return 0 unless ( defined($flag) );
8387     return 0 if $flag eq '0';
8388     return 1 if $flag eq '1';
8389     return 1 if $flag eq '*';
8390     return 0 unless ( defined($KK) );
8391
8392     my $rLL         = $self->[_rLL_];
8393     my $rtoken_vars = $rLL->[$KK];
8394     my $seqno       = $rtoken_vars->[_TYPE_SEQUENCE_];
8395     return 0 unless ($seqno);
8396     my $token     = $rtoken_vars->[_TOKEN_];
8397     my $K_opening = $KK;
8398     if ( !$is_opening_token{$token} ) {
8399         $K_opening = $self->[_K_opening_container_]->{$seqno};
8400     }
8401     return unless ( defined($K_opening) );
8402
8403     my ( $is_f, $is_k, $is_w );
8404     my $Kp = $self->K_previous_nonblank($K_opening);
8405     if ( defined($Kp) ) {
8406         my $type_p = $rLL->[$Kp]->[_TYPE_];
8407
8408         # keyword?
8409         $is_k = $type_p eq 'k';
8410
8411         # function call?
8412         $is_f = $self->[_ris_function_call_paren_]->{$seqno};
8413
8414         # either keyword or function call?
8415         $is_w = $is_k || $is_f;
8416     }
8417     my $match;
8418     if    ( $flag eq 'k' ) { $match = $is_k }
8419     elsif ( $flag eq 'K' ) { $match = !$is_k }
8420     elsif ( $flag eq 'f' ) { $match = $is_f }
8421     elsif ( $flag eq 'F' ) { $match = !$is_f }
8422     elsif ( $flag eq 'w' ) { $match = $is_w }
8423     elsif ( $flag eq 'W' ) { $match = !$is_w }
8424     return $match;
8425 }
8426
8427 sub is_excluded_weld {
8428
8429     # decide if this weld is excluded by user request
8430     my ( $self, $KK, $is_leading ) = @_;
8431     my $rLL         = $self->[_rLL_];
8432     my $rtoken_vars = $rLL->[$KK];
8433     my $token       = $rtoken_vars->[_TOKEN_];
8434     my $rflags      = $weld_nested_exclusion_rules{$token};
8435     return 0 unless ( defined($rflags) );
8436     my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
8437     return 0 unless ( defined($flag) );
8438     return 1 if $flag eq '*';
8439     return $self->match_paren_flag( $KK, $flag );
8440 }
8441
8442 # hashes to simplify welding logic
8443 my %type_ok_after_bareword;
8444 my %is_ternary;
8445 my %has_tight_paren;
8446
8447 BEGIN {
8448
8449     # types needed for welding RULE 6
8450     my @q = qw# => -> { ( [ #;
8451     @type_ok_after_bareword{@q} = (1) x scalar(@q);
8452
8453     @q = qw( ? : );
8454     @is_ternary{@q} = (1) x scalar(@q);
8455
8456     # these types do not 'like' to be separated from a following paren
8457     @q = qw(w i q Q G C Z U);
8458     @{has_tight_paren}{@q} = (1) x scalar(@q);
8459 }
8460
8461 use constant DEBUG_WELD => 0;
8462
8463 sub setup_new_weld_measurements {
8464
8465     # Define quantities to check for excess line lengths when welded.
8466     # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
8467
8468     my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
8469
8470     # Given indexes of outer and inner opening containers to be welded:
8471     #   $Kouter_opening, $Kinner_opening
8472
8473     # Returns these variables:
8474     #   $new_weld_ok = true (new weld ok) or false (do not start new weld)
8475     #   $starting_indent = starting indentation
8476     #   $starting_lentot = starting cumulative length
8477     #   $msg = diagnostic message for debugging
8478
8479     my $rLL    = $self->[_rLL_];
8480     my $rlines = $self->[_rlines_];
8481
8482     my $starting_level;
8483     my $starting_ci;
8484     my $starting_lentot;
8485     my $maximum_text_length;
8486     my $msg = "";
8487
8488     my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
8489     my $rK_range = $rlines->[$iline_oo]->{_rK_range};
8490     my ( $Kfirst, $Klast ) = @{$rK_range};
8491
8492     #-------------------------------------------------------------------------
8493     # We now define a reference index, '$Kref', from which to start measuring
8494     # This choice turns out to be critical for keeping welds stable during
8495     # iterations, so we go through a number of STEPS...
8496     #-------------------------------------------------------------------------
8497
8498     # STEP 1: Our starting guess is to use measure from the first token of the
8499     # current line.  This is usually a good guess.
8500     my $Kref = $Kfirst;
8501
8502     # STEP 2: See if we should go back a little farther
8503     my $Kprev = $self->K_previous_nonblank($Kfirst);
8504     if ( defined($Kprev) ) {
8505
8506         # Avoid measuring from between an opening paren and a previous token
8507         # which should stay close to it ... fixes b1185
8508         my $token_oo  = $rLL->[$Kouter_opening]->[_TOKEN_];
8509         my $type_prev = $rLL->[$Kprev]->[_TYPE_];
8510         if (   $Kouter_opening == $Kfirst
8511             && $token_oo eq '('
8512             && $has_tight_paren{$type_prev} )
8513         {
8514             $Kref = $Kprev;
8515         }
8516
8517         # Back up and count length from a token like '=' or '=>' if -lp
8518         # is used (this fixes b520)
8519         # ...or if a break is wanted before there
8520         elsif ($rOpts_line_up_parentheses
8521             || $want_break_before{$type_prev} )
8522         {
8523
8524             # If there are other sequence items between the start of this line
8525             # and the opening token in question, then do not include tokens on
8526             # the previous line in length calculations.  This check added to
8527             # fix case b1174 which had a '?' on the line
8528             my $no_previous_seq_item = $Kref == $Kouter_opening
8529               || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
8530
8531             if ( $no_previous_seq_item
8532                 && substr( $type_prev, 0, 1 ) eq '=' )
8533             {
8534                 $Kref = $Kprev;
8535
8536                 # Fix for b1144 and b1112: backup to the first nonblank
8537                 # character before the =>, or to the start of its line.
8538                 if ( $type_prev eq '=>' ) {
8539                     my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
8540                     my $rK_range   = $rlines->[$iline_prev]->{_rK_range};
8541                     my ( $Kfirst, $Klast ) = @{$rK_range};
8542                     for ( my $KK = $Kref - 1 ; $KK >= $Kfirst ; $KK-- ) {
8543                         next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
8544                         $Kref = $KK;
8545                         last;
8546                     }
8547                 }
8548             }
8549         }
8550     }
8551
8552     # STEP 3: Now look ahead for a ternary and, if found, use it.
8553     # This fixes case b1182.
8554     # Also look for a ')' at the same level and, if found, use it.
8555     # This fixes case b1224.
8556     if ( $Kref < $Kouter_opening ) {
8557         my $Knext    = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
8558         my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8559         while ( $Knext < $Kouter_opening ) {
8560             if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
8561                 if (   $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
8562                     || $rLL->[$Knext]->[_TOKEN_] eq ')' )
8563                 {
8564                     $Kref = $Knext;
8565                     last;
8566                 }
8567             }
8568             $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
8569         }
8570     }
8571
8572     # Define the starting measurements we will need
8573     $starting_lentot =
8574       $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
8575     $starting_level = $rLL->[$Kref]->[_LEVEL_];
8576     $starting_ci    = $rLL->[$Kref]->[_CI_LEVEL_];
8577
8578     $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
8579       $starting_ci * $rOpts_continuation_indentation;
8580
8581     # STEP 4: Switch to using the outer opening token as the reference
8582     # point if a line break before it would make a longer line.
8583     # Fixes case b1055 and is also an alternate fix for b1065.
8584     my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8585     if ( $Kref < $Kouter_opening ) {
8586         my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
8587         my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
8588         my $maximum_text_length_oo =
8589           $maximum_text_length_at_level[$starting_level_oo] -
8590           $starting_ci_oo * $rOpts_continuation_indentation;
8591
8592         # The excess length to any cumulative length K = lenK is either
8593         #     $excess = $lenk - ($lentot    + $maximum_text_length),     or
8594         #     $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
8595         # so the worst case (maximum excess) corresponds to the configuration
8596         # with minimum value of the sum: $lentot + $maximum_text_length
8597         if ( $lentot_oo + $maximum_text_length_oo <
8598             $starting_lentot + $maximum_text_length )
8599         {
8600             $Kref                = $Kouter_opening;
8601             $starting_level      = $starting_level_oo;
8602             $starting_ci         = $starting_ci_oo;
8603             $starting_lentot     = $lentot_oo;
8604             $maximum_text_length = $maximum_text_length_oo;
8605         }
8606     }
8607
8608     my $new_weld_ok = 1;
8609
8610     # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination.  The
8611     # combination -wn -lp -dws -naws does not work well and can cause blinkers.
8612     # It will probably only occur in stress testing.  For this situation we
8613     # will only start a new weld if we start at a 'good' location.
8614     # - Added 'if' to fix case b1032.
8615     # - Require blank before certain previous characters to fix b1111.
8616     # - Add ';' to fix case b1139
8617     # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
8618     # - relaxed constraints for b1227
8619     if (   $starting_ci
8620         && $rOpts_line_up_parentheses
8621         && $rOpts_delete_old_whitespace
8622         && !$rOpts_add_whitespace
8623         && defined($Kprev) )
8624     {
8625         my $type_first  = $rLL->[$Kfirst]->[_TYPE_];
8626         my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
8627         my $type_prev   = $rLL->[$Kprev]->[_TYPE_];
8628         my $type_pp     = 'b';
8629         if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
8630         unless (
8631                $type_prev =~ /^[\,\.\;]/
8632             || $type_prev =~ /^[=\{\[\(\L]/
8633             && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
8634             || $type_first =~ /^[=\,\.\;\{\[\(\L]/
8635             || $type_first eq '||'
8636             || (
8637                 $type_first eq 'k'
8638                 && (   $token_first eq 'if'
8639                     || $token_first eq 'or' )
8640             )
8641           )
8642         {
8643             $msg =
8644 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
8645             $new_weld_ok = 0;
8646         }
8647     }
8648     return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
8649 }
8650
8651 sub excess_line_length_for_Krange {
8652     my ( $self, $Kfirst, $Klast ) = @_;
8653
8654     # returns $excess_length =
8655     #   by how many characters a line composed of tokens $Kfirst .. $Klast will
8656     #   exceed the allowed line length
8657
8658     my $rLL = $self->[_rLL_];
8659     my $length_before_Kfirst =
8660       $Kfirst <= 0
8661       ? 0
8662       : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
8663
8664     # backup before a side comment if necessary
8665     my $Kend = $Klast;
8666     if (   $rOpts_ignore_side_comment_lengths
8667         && $rLL->[$Klast]->[_TYPE_] eq '#' )
8668     {
8669         my $Kprev = $self->K_previous_nonblank($Klast);
8670         if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
8671     }
8672
8673     # get the length of the text
8674     my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
8675
8676     # get the size of the text window
8677     my $level           = $rLL->[$Kfirst]->[_LEVEL_];
8678     my $ci_level        = $rLL->[$Kfirst]->[_CI_LEVEL_];
8679     my $max_text_length = $maximum_text_length_at_level[$level] -
8680       $ci_level * $rOpts_continuation_indentation;
8681
8682     my $excess_length = $length - $max_text_length;
8683
8684     DEBUG_WELD
8685       && print
8686 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
8687     return ($excess_length);
8688 }
8689
8690 sub weld_nested_containers {
8691     my ($self) = @_;
8692
8693     # Called once per file for option '--weld-nested-containers'
8694
8695     my $rK_weld_left  = $self->[_rK_weld_left_];
8696     my $rK_weld_right = $self->[_rK_weld_right_];
8697
8698     # This routine implements the -wn flag by "welding together"
8699     # the nested closing and opening tokens which were previously
8700     # identified by sub 'find_nested_pairs'.  "welding" simply
8701     # involves setting certain hash values which will be checked
8702     # later during formatting.
8703
8704     my $rLL                       = $self->[_rLL_];
8705     my $rlines                    = $self->[_rlines_];
8706     my $K_opening_container       = $self->[_K_opening_container_];
8707     my $K_closing_container       = $self->[_K_closing_container_];
8708     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
8709     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
8710     my $ris_asub_block            = $self->[_ris_asub_block_];
8711     my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
8712
8713     # Find nested pairs of container tokens for any welding.
8714     my $rnested_pairs = $self->find_nested_pairs();
8715
8716     # Return unless there are nested pairs to weld
8717     return unless defined($rnested_pairs) && @{$rnested_pairs};
8718
8719     my $rOpts_break_at_old_method_breakpoints =
8720       $rOpts->{'break-at-old-method-breakpoints'};
8721
8722     # This array will hold the sequence numbers of the tokens to be welded.
8723     my @welds;
8724
8725     # Variables needed for estimating line lengths
8726     my $maximum_text_length;    # maximum spaces available for text
8727     my $starting_lentot;        # cumulative text to start of current line
8728
8729     my $iline_outer_opening   = -1;
8730     my $weld_count_this_start = 0;
8731
8732     # OLD: $single_line_tol added to fix cases b1180 b1181
8733     #       = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
8734     # NEW: $single_line_tol=0;  fixes b1212 and b1180-1181 work now
8735     my $single_line_tol = 0;
8736
8737     my $multiline_tol = $single_line_tol + 1 +
8738       max( $rOpts_indent_columns, $rOpts_continuation_indentation );
8739
8740     # Define a welding cutoff level: do not start a weld if the inside
8741     # container level equals or exceeds this level.
8742
8743     # We use the minimum of two criteria, either of which may be more
8744     # restrictive.  The 'alpha' value is more restrictive in (b1206, b1252) and
8745     # the 'beta' value is more restrictive in other cases (b1243).
8746
8747     my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 );
8748
8749     # The vertical tightness flags can throw off line length calculations.
8750     # This patch was added to fix instability issue b1284.
8751     # It works to always use a tol of 1 for 1 line block length tests, but
8752     # this restricted value keeps test case wn6.wn working as before.
8753     # It may be necessary to include '[' and '{' here in the future.
8754     my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
8755
8756     my $length_to_opening_seqno = sub {
8757         my ($seqno) = @_;
8758         my $KK      = $K_opening_container->{$seqno};
8759         my $lentot  = defined($KK)
8760           && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8761         return $lentot;
8762     };
8763
8764     my $length_to_closing_seqno = sub {
8765         my ($seqno) = @_;
8766         my $KK      = $K_closing_container->{$seqno};
8767         my $lentot  = defined($KK)
8768           && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8769         return $lentot;
8770     };
8771
8772     # Abbreviations:
8773     #  _oo=outer opening, i.e. first of  { {
8774     #  _io=inner opening, i.e. second of { {
8775     #  _oc=outer closing, i.e. second of } {
8776     #  _ic=inner closing, i.e. first of  } }
8777
8778     my $previous_pair;
8779
8780     # Main loop over nested pairs...
8781     # We are working from outermost to innermost pairs so that
8782     # level changes will be complete when we arrive at the inner pairs.
8783     while ( my $item = pop( @{$rnested_pairs} ) ) {
8784         my ( $inner_seqno, $outer_seqno ) = @{$item};
8785
8786         my $Kouter_opening = $K_opening_container->{$outer_seqno};
8787         my $Kinner_opening = $K_opening_container->{$inner_seqno};
8788         my $Kouter_closing = $K_closing_container->{$outer_seqno};
8789         my $Kinner_closing = $K_closing_container->{$inner_seqno};
8790
8791         # RULE: do not weld if inner container has <= 3 tokens unless the next
8792         # token is a heredoc (so we know there will be multiple lines)
8793         if ( $Kinner_closing - $Kinner_opening <= 4 ) {
8794             my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
8795             next unless defined($Knext_nonblank);
8796             my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
8797             next unless ( $type eq 'h' );
8798         }
8799
8800         my $outer_opening = $rLL->[$Kouter_opening];
8801         my $inner_opening = $rLL->[$Kinner_opening];
8802         my $outer_closing = $rLL->[$Kouter_closing];
8803         my $inner_closing = $rLL->[$Kinner_closing];
8804
8805         # RULE: do not weld to a hash brace.  The reason is that it has a very
8806         # strong bond strength to the next token, so a line break after it
8807         # may not work.  Previously we allowed welding to something like @{
8808         # but that caused blinking states (cases b751, b779).
8809         if ( $inner_opening->[_TYPE_] eq 'L' ) {
8810             next;
8811         }
8812
8813         # RULE: do not weld to a square bracket which does not contain commas
8814         if ( $inner_opening->[_TYPE_] eq '[' ) {
8815             my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
8816             next unless ($rtype_count);
8817             my $comma_count = $rtype_count->{','};
8818             next unless ($comma_count);
8819
8820             # Do not weld if there is text before a '[' such as here:
8821             #      curr_opt ( @beg [2,5] )
8822             # It will not break into the desired sandwich structure.
8823             # This fixes case b109, 110.
8824             my $Kdiff = $Kinner_opening - $Kouter_opening;
8825             next if ( $Kdiff > 2 );
8826             next
8827               if ( $Kdiff == 2
8828                 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
8829
8830         }
8831
8832         # RULE: Avoid welding under stress.  The idea is that we need to have a
8833         # little space* within a welded container to avoid instability.  Note
8834         # that after each weld the level values are reduced, so long multiple
8835         # welds can still be made.  This rule will seldom be a limiting factor
8836         # in actual working code. Fixes b1206, b1243.
8837         my $inner_level = $inner_opening->[_LEVEL_];
8838         if ( $inner_level >= $weld_cutoff_level ) { next }
8839
8840         # Set flag saying if this pair starts a new weld
8841         my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
8842
8843         # Set flag saying if this pair is adjacent to the previous nesting pair
8844         # (even if previous pair was rejected as a weld)
8845         my $touch_previous_pair =
8846           defined($previous_pair) && $outer_seqno == $previous_pair->[0];
8847         $previous_pair = $item;
8848
8849         my $do_not_weld_rule = 0;
8850         my $Msg              = "";
8851         my $is_one_line_weld;
8852
8853         my $iline_oo = $outer_opening->[_LINE_INDEX_];
8854         my $iline_io = $inner_opening->[_LINE_INDEX_];
8855         my $iline_ic = $inner_closing->[_LINE_INDEX_];
8856         my $iline_oc = $outer_closing->[_LINE_INDEX_];
8857         my $token_oo = $outer_opening->[_TOKEN_];
8858         my $token_io = $inner_opening->[_TOKEN_];
8859
8860         my $is_multiline_weld =
8861              $iline_oo == $iline_io
8862           && $iline_ic == $iline_oc
8863           && $iline_io != $iline_ic;
8864
8865         if (DEBUG_WELD) {
8866             my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
8867             my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
8868             $Msg .= <<EOM;
8869 Pair seqo=$outer_seqno seqi=$inner_seqno  lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
8870 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
8871 tokens '$token_oo' .. '$token_io'
8872 EOM
8873         }
8874
8875         # DO-NOT-WELD RULE 0:
8876         # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
8877         # by one line).  This can produce instabilities (fixes b1250 b1251
8878         # 1256).
8879         if (  !$is_multiline_weld
8880             && $iline_ic == $iline_io + 1
8881             && $token_oo eq '('
8882             && $token_io eq '(' )
8883         {
8884             if (DEBUG_WELD) {
8885                 $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
8886                 print $Msg;
8887             }
8888             next;
8889         }
8890
8891         # If this pair is not adjacent to the previous pair (skipped or not),
8892         # then measure lengths from the start of line of oo.
8893         if (
8894             !$touch_previous_pair
8895
8896             # Also do this if restarting at a new line; fixes case b965, s001
8897             || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
8898           )
8899         {
8900
8901             # Remember the line we are using as a reference
8902             $iline_outer_opening   = $iline_oo;
8903             $weld_count_this_start = 0;
8904
8905             ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
8906               = $self->setup_new_weld_measurements( $Kouter_opening,
8907                 $Kinner_opening );
8908
8909             if (
8910                 !$new_weld_ok
8911                 && (   $iline_oo != $iline_io
8912                     || $iline_ic != $iline_oc )
8913               )
8914             {
8915                 if (DEBUG_WELD) { print $msg}
8916                 next;
8917             }
8918
8919             my $rK_range = $rlines->[$iline_oo]->{_rK_range};
8920             my ( $Kfirst, $Klast ) = @{$rK_range};
8921
8922             # An existing one-line weld is a line in which
8923             # (1) the containers are all on one line, and
8924             # (2) the line does not exceed the allowable length
8925             if ( $iline_oo == $iline_oc ) {
8926
8927                 # All the tokens are on one line, now check their length.
8928                 # Start with the full line index range. We will reduce this
8929                 # in the coding below in some cases.
8930                 my $Kstart = $Kfirst;
8931                 my $Kstop  = $Klast;
8932
8933                 # Note that the following minimal choice for measuring will
8934                 # work and will not cause any instabilities because it is
8935                 # invariant:
8936
8937                 ##  my $Kstart = $Kouter_opening;
8938                 ##  my $Kstop  = $Kouter_closing;
8939
8940                 # But that can lead to some undesirable welds.  So a little
8941                 # more complicated method has been developed.
8942
8943                 # We are trying to avoid creating bad two-line welds when we are
8944                 # working on long, previously unwelded input text, such as
8945
8946                 # INPUT (example of a long input line weld candidate):
8947                 ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
8948
8949                 #  GOOD two-line break: (not welded; result marked too long):
8950                 ## $mutation->transpos(
8951                 ##                 $self->RNA->position($mutation->label, $atg_label));
8952
8953                 #  BAD two-line break: (welded; result if we weld):
8954                 ## $mutation->transpos($self->RNA->position(
8955                 ##                                      $mutation->label, $atg_label));
8956
8957                 # We can only get an approximate estimate of the final length,
8958                 # since the line breaks may change, and for -lp mode because
8959                 # even the indentation is not yet known.
8960
8961                 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
8962                 my $level_last  = $rLL->[$Klast]->[_LEVEL_];
8963                 my $level_oo    = $rLL->[$Kouter_opening]->[_LEVEL_];
8964                 my $level_oc    = $rLL->[$Kouter_closing]->[_LEVEL_];
8965
8966                 # - measure to the end of the original line if balanced
8967                 # - measure to the closing container if unbalanced (fixes b1230)
8968                 #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
8969                 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
8970
8971                 # - measure from the start of the original line if balanced
8972                 # - measure from the most previous token with same level
8973                 #   if unbalanced (b1232)
8974                 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
8975                     $Kstart = $Kouter_opening;
8976                     for (
8977                         my $KK = $Kouter_opening - 1 ;
8978                         $KK > $Kfirst ;
8979                         $KK -= 1
8980                       )
8981                     {
8982                         next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
8983                         last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
8984                         $Kstart = $KK;
8985                     }
8986                 }
8987
8988                 my $excess =
8989                   $self->excess_line_length_for_Krange( $Kstart, $Kstop );
8990
8991                 # Coding simplified here for case b1219.
8992                 # Increased tol from 0 to 1 when pvt>0 to fix b1284.
8993                 $is_one_line_weld = $excess <= $one_line_tol;
8994             }
8995
8996             # DO-NOT-WELD RULE 1:
8997             # Do not weld something that looks like the start of a two-line
8998             # function call, like this: <<snippets/wn6.in>>
8999             #    $trans->add_transformation(
9000             #        PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
9001             # We will look for a semicolon after the closing paren.
9002
9003             # We want to weld something complex, like this though
9004             # my $compass = uc( opposite_direction( line_to_canvas_direction(
9005             #     @{ $coords[0] }, @{ $coords[1] } ) ) );
9006             # Otherwise we will get a 'blinker'. For example, the following
9007             # would become a blinker without this rule:
9008             #        $Self->_Add( $SortOrderDisplay{ $Field
9009             #              ->GenerateFieldForSelectSQL() } );
9010             # But it is okay to weld a two-line statement if it looks like
9011             # it was already welded, meaning that the two opening containers are
9012             # on a different line that the two closing containers.  This is
9013             # necessary to prevent blinking of something like this with
9014             # perltidy -wn -pbp (starting indentation two levels deep):
9015
9016             # $top_label->set_text( gettext(
9017             #    "Unable to create personal directory - check permissions.") );
9018             if (   $iline_oc == $iline_oo + 1
9019                 && $iline_io == $iline_ic
9020                 && $token_oo eq '(' )
9021             {
9022
9023                 # Look for following semicolon...
9024                 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
9025                 my $next_nonblank_type =
9026                   defined($Knext_nonblank)
9027                   ? $rLL->[$Knext_nonblank]->[_TYPE_]
9028                   : 'b';
9029                 if ( $next_nonblank_type eq ';' ) {
9030
9031                     # Then do not weld if no other containers between inner
9032                     # opening and closing.
9033                     my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
9034                     if ( $Knext_seq_item == $Kinner_closing ) {
9035                         $do_not_weld_rule = 1;
9036                     }
9037                 }
9038             }
9039         } ## end starting new weld sequence
9040
9041         else {
9042
9043             # set the 1-line flag if continuing a weld sequence; fixes b1239
9044             $is_one_line_weld = ( $iline_oo == $iline_oc );
9045         }
9046
9047         # DO-NOT-WELD RULE 2:
9048         # Do not weld an opening paren to an inner one line brace block
9049         # We will just use old line numbers for this test and require
9050         # iterations if necessary for convergence
9051
9052         # For example, otherwise we could cause the opening paren
9053         # in the following example to separate from the caller name
9054         # as here:
9055
9056         #    $_[0]->code_handler
9057         #      ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
9058
9059         # Here is another example where we do not want to weld:
9060         #  $wrapped->add_around_modifier(
9061         #    sub { push @tracelog => 'around 1'; $_[0]->(); } );
9062
9063         # If the one line sub block gets broken due to length or by the
9064         # user, then we can weld.  The result will then be:
9065         # $wrapped->add_around_modifier( sub {
9066         #    push @tracelog => 'around 1';
9067         #    $_[0]->();
9068         # } );
9069
9070         # Updated to fix cases b1082 b1102 b1106 b1115:
9071         # Also, do not weld to an intact inner block if the outer opening token
9072         # is on a different line. For example, this prevents oscillation
9073         # between these two states in case b1106:
9074
9075         #    return map{
9076         #        ($_,[$self->$_(@_[1..$#_])])
9077         #    }@every;
9078
9079         #    return map { (
9080         #        $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
9081         #    ) } @every;
9082
9083         # The effect of this change on typical code is very minimal.  Sometimes
9084         # it may take a second iteration to converge, but this gives protection
9085         # against blinking.
9086         if (   !$do_not_weld_rule
9087             && !$is_one_line_weld
9088             && $iline_ic == $iline_io )
9089         {
9090             $do_not_weld_rule = 2
9091               if ( $token_oo eq '(' || $iline_oo != $iline_io );
9092         }
9093
9094         # DO-NOT-WELD RULE 2A:
9095         # Do not weld an opening asub brace in -lp mode if -asbl is set. This
9096         # helps avoid instabilities in one-line block formation, and fixes
9097         # b1241.  Previously, the '$is_one_line_weld' flag was tested here
9098         # instead of -asbl, and this fixed most cases. But it turns out that
9099         # the real problem was the -asbl flag, and switching to this was
9100         # necessary to fixe b1268.  This also fixes b1269, b1277, b1278.
9101         if (
9102             !$do_not_weld_rule
9103             ##&& $is_one_line_weld
9104             && $rOpts_line_up_parentheses
9105             && $rOpts_asbl
9106             && $ris_asub_block->{$outer_seqno}
9107           )
9108         {
9109             $do_not_weld_rule = '2A';
9110         }
9111
9112         # DO-NOT-WELD RULE 3:
9113         # Do not weld if this makes our line too long.
9114         # Use a tolerance which depends on if the old tokens were welded
9115         # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
9116         if ( !$do_not_weld_rule ) {
9117
9118             # Measure to a little beyond the inner opening token if it is
9119             # followed by a bare word, which may have unusual line break rules.
9120
9121             # NOTE: Originally this was OLD RULE 6: do not weld to a container
9122             # which is followed on the same line by an unknown bareword token.
9123             # This can cause blinkers (cases b626, b611).  But OK to weld one
9124             # line welds to fix cases b1057 b1064.  For generality, OLD RULE 6
9125             # has been merged into RULE 3 here to also fix cases b1078 b1091.
9126
9127             my $K_for_length = $Kinner_opening;
9128             my $Knext_io     = $self->K_next_nonblank($Kinner_opening);
9129             next unless ( defined($Knext_io) );    # shouldn't happen
9130             my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
9131
9132             # Note: may need to eventually also include other types here,
9133             # such as 'Z' and 'Y':   if ($type_io_next =~ /^[ZYw]$/) {
9134             if ( $type_io_next eq 'w' ) {
9135                 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
9136                 next unless ( defined($Knext_io2) );
9137                 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
9138                 if ( !$type_ok_after_bareword{$type_io_next2} ) {
9139                     $K_for_length = $Knext_io2;
9140                 }
9141             }
9142
9143             # Use a tolerance for welds over multiple lines to avoid blinkers.
9144             # We can use zero tolerance if it looks like we are working on an
9145             # existing weld.
9146             my $tol =
9147                 $is_one_line_weld || $is_multiline_weld
9148               ? $single_line_tol
9149               : $multiline_tol;
9150
9151             # By how many characters does this exceed the text window?
9152             my $excess =
9153               $self->cumulative_length_before_K($K_for_length) -
9154               $starting_lentot + 1 + $tol -
9155               $maximum_text_length;
9156
9157             # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
9158             # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
9159             # Revised patch: New tolerance definition allows going back to '> 0'
9160             # here.  This fixes case b1124.  See also cases b1087 and b1087a.
9161             if ( $excess > 0 ) { $do_not_weld_rule = 3 }
9162
9163             if (DEBUG_WELD) {
9164                 $Msg .=
9165 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
9166             }
9167         }
9168
9169         # DO-NOT-WELD RULE 4; implemented for git#10:
9170         # Do not weld an opening -ce brace if the next container is on a single
9171         # line, different from the opening brace. (This is very rare).  For
9172         # example, given the following with -ce, we will avoid joining the {
9173         # and [
9174
9175         #  } else {
9176         #      [ $_, length($_) ]
9177         #  }
9178
9179         # because this would produce a terminal one-line block:
9180
9181         #  } else { [ $_, length($_) ]  }
9182
9183         # which may not be what is desired. But given this input:
9184
9185         #  } else { [ $_, length($_) ]  }
9186
9187         # then we will do the weld and retain the one-line block
9188         if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
9189             my $block_type = $rblock_type_of_seqno->{$outer_seqno};
9190             if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
9191                 my $io_line = $inner_opening->[_LINE_INDEX_];
9192                 my $ic_line = $inner_closing->[_LINE_INDEX_];
9193                 my $oo_line = $outer_opening->[_LINE_INDEX_];
9194                 if ( $oo_line < $io_line && $ic_line == $io_line ) {
9195                     $do_not_weld_rule = 4;
9196                 }
9197             }
9198         }
9199
9200         # DO-NOT-WELD RULE 5: do not include welds excluded by user
9201         if (
9202               !$do_not_weld_rule
9203             && %weld_nested_exclusion_rules
9204             && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
9205                 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
9206           )
9207         {
9208             $do_not_weld_rule = 5;
9209         }
9210
9211         # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
9212
9213         # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
9214         # (case b973)
9215         if (  !$do_not_weld_rule
9216             && $rOpts_break_at_old_method_breakpoints
9217             && $iline_io > $iline_oo )
9218         {
9219
9220             foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
9221                 my $rK_range = $rlines->[$iline]->{_rK_range};
9222                 next unless defined($rK_range);
9223                 my ( $Kfirst, $Klast ) = @{$rK_range};
9224                 next unless defined($Kfirst);
9225                 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
9226                     $do_not_weld_rule = 7;
9227                     last;
9228                 }
9229             }
9230         }
9231
9232         if ($do_not_weld_rule) {
9233
9234             # After neglecting a pair, we start measuring from start of point
9235             # io ... but not if previous type does not like to be separated
9236             # from its container (fixes case b1184)
9237             my $Kprev     = $self->K_previous_nonblank($Kinner_opening);
9238             my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
9239             if ( !$has_tight_paren{$type_prev} ) {
9240                 my $starting_level    = $inner_opening->[_LEVEL_];
9241                 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
9242                 $starting_lentot =
9243                   $self->cumulative_length_before_K($Kinner_opening);
9244                 $maximum_text_length =
9245                   $maximum_text_length_at_level[$starting_level] -
9246                   $starting_ci_level * $rOpts_continuation_indentation;
9247             }
9248
9249             if (DEBUG_WELD) {
9250                 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
9251                 print $Msg;
9252             }
9253
9254             # Normally, a broken pair should not decrease indentation of
9255             # intermediate tokens:
9256             ##      if ( $last_pair_broken ) { next }
9257             # However, for long strings of welded tokens, such as '{{{{{{...'
9258             # we will allow broken pairs to also remove indentation.
9259             # This will keep very long strings of opening and closing
9260             # braces from marching off to the right.  We will do this if the
9261             # number of tokens in a weld before the broken weld is 4 or more.
9262             # This rule will mainly be needed for test scripts, since typical
9263             # welds have fewer than about 4 welded tokens.
9264             if ( !@welds || @{ $welds[-1] } < 4 ) { next }
9265         }
9266
9267         # otherwise start new weld ...
9268         elsif ($starting_new_weld) {
9269             $weld_count_this_start++;
9270             if (DEBUG_WELD) {
9271                 $Msg .= "Starting new weld\n";
9272                 print $Msg;
9273             }
9274             push @welds, $item;
9275
9276             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9277             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
9278
9279             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9280             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
9281         }
9282
9283         # ... or extend current weld
9284         else {
9285             $weld_count_this_start++;
9286             if (DEBUG_WELD) {
9287                 $Msg .= "Extending current weld\n";
9288                 print $Msg;
9289             }
9290             unshift @{ $welds[-1] }, $inner_seqno;
9291             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9292             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
9293
9294             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9295             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
9296         }
9297
9298         # After welding, reduce the indentation level if all intermediate tokens
9299         my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
9300         if ( $dlevel != 0 ) {
9301             my $Kstart = $Kinner_opening;
9302             my $Kstop  = $Kinner_closing;
9303             for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
9304                 $rLL->[$KK]->[_LEVEL_] += $dlevel;
9305             }
9306
9307             # Copy opening ci level to help break at = for -lp mode (case b1124)
9308             $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
9309               $rLL->[$Kouter_opening]->[_CI_LEVEL_];
9310
9311             # But do not copy the closing ci level ... it can give poor results
9312             ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
9313             ##  $rLL->[$Kouter_closing]->[_CI_LEVEL_];
9314         }
9315     }
9316
9317     return;
9318 }
9319
9320 sub weld_nested_quotes {
9321
9322     # Called once per file for option '--weld-nested-containers'. This
9323     # does welding on qw quotes.
9324
9325     my $self = shift;
9326
9327     # See if quotes are excluded from welding
9328     my $rflags = $weld_nested_exclusion_rules{'q'};
9329     return if ( defined($rflags) && defined( $rflags->[1] ) );
9330
9331     my $rK_weld_left  = $self->[_rK_weld_left_];
9332     my $rK_weld_right = $self->[_rK_weld_right_];
9333
9334     my $rLL = $self->[_rLL_];
9335     return unless ( defined($rLL) && @{$rLL} );
9336     my $Num = @{$rLL};
9337
9338     my $K_opening_container = $self->[_K_opening_container_];
9339     my $K_closing_container = $self->[_K_closing_container_];
9340     my $rlines              = $self->[_rlines_];
9341
9342     my $starting_lentot;
9343     my $maximum_text_length;
9344
9345     my $is_single_quote = sub {
9346         my ( $Kbeg, $Kend, $quote_type ) = @_;
9347         foreach my $K ( $Kbeg .. $Kend ) {
9348             my $test_type = $rLL->[$K]->[_TYPE_];
9349             next   if ( $test_type eq 'b' );
9350             return if ( $test_type ne $quote_type );
9351         }
9352         return 1;
9353     };
9354
9355     # Length tolerance - same as previously used for sub weld_nested
9356     my $multiline_tol =
9357       1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
9358
9359     # look for single qw quotes nested in containers
9360     my $KNEXT = $self->[_K_first_seq_item_];
9361     while ( defined($KNEXT) ) {
9362         my $KK = $KNEXT;
9363         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9364         my $rtoken_vars = $rLL->[$KK];
9365         my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9366         if ( !$outer_seqno ) {
9367             next if ( $KK == 0 );    # first token in file may not be container
9368
9369             # A fault here implies that an error was made in the little loop at
9370             # the bottom of sub 'respace_tokens' which set the values of
9371             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
9372             # loop control lines above.
9373             Fault("sequence = $outer_seqno not defined at K=$KK")
9374               if (DEVEL_MODE);
9375             next;
9376         }
9377
9378         my $token = $rtoken_vars->[_TOKEN_];
9379         if ( $is_opening_token{$token} ) {
9380
9381             # see if the next token is a quote of some type
9382             my $Kn = $KK + 1;
9383             $Kn += 1
9384               if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
9385             next unless ( $Kn < $Num );
9386
9387             my $next_token = $rLL->[$Kn]->[_TOKEN_];
9388             my $next_type  = $rLL->[$Kn]->[_TYPE_];
9389             next
9390               unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
9391                 && $next_token =~ /^q/ );
9392
9393             # The token before the closing container must also be a quote
9394             my $Kouter_closing = $K_closing_container->{$outer_seqno};
9395             my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
9396             next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
9397
9398             # This is an inner opening container
9399             my $Kinner_opening = $Kn;
9400
9401             # Do not weld to single-line quotes. Nothing is gained, and it may
9402             # look bad.
9403             next if ( $Kinner_closing == $Kinner_opening );
9404
9405             # Only weld to quotes delimited with container tokens. This is
9406             # because welding to arbitrary quote delimiters can produce code
9407             # which is less readable than without welding.
9408             my $closing_delimiter =
9409               substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
9410             next
9411               unless ( $is_closing_token{$closing_delimiter}
9412                 || $closing_delimiter eq '>' );
9413
9414             # Now make sure that there is just a single quote in the container
9415             next
9416               unless (
9417                 $is_single_quote->(
9418                     $Kinner_opening + 1,
9419                     $Kinner_closing - 1,
9420                     $next_type
9421                 )
9422               );
9423
9424             # OK: This is a candidate for welding
9425             my $Msg = "";
9426             my $do_not_weld;
9427
9428             my $Kouter_opening = $K_opening_container->{$outer_seqno};
9429             my $iline_oo       = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
9430             my $iline_io       = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
9431             my $iline_oc       = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
9432             my $iline_ic       = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
9433             my $is_old_weld =
9434               ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
9435
9436             # Fix for case b1189. If quote is marked as type 'Q' then only weld
9437             # if the two closing tokens are on the same input line.  Otherwise,
9438             # the closing line will be output earlier in the pipeline than
9439             # other CODE lines and welding will not actually occur. This will
9440             # leave a half-welded structure with potential formatting
9441             # instability.  This might be fixed by adding a check for a weld on
9442             # a closing Q token and sending it down the normal channel, but it
9443             # would complicate the code and is potentially risky.
9444             next
9445               if (!$is_old_weld
9446                 && $next_type eq 'Q'
9447                 && $iline_ic != $iline_oc );
9448
9449             # If welded, the line must not exceed allowed line length
9450             ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
9451               = $self->setup_new_weld_measurements( $Kouter_opening,
9452                 $Kinner_opening );
9453             if ( !$ok_to_weld ) {
9454                 if (DEBUG_WELD) { print $msg}
9455                 next;
9456             }
9457
9458             my $length =
9459               $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
9460             my $excess = $length + $multiline_tol - $maximum_text_length;
9461
9462             my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
9463             if ( $excess >= $excess_max ) {
9464                 $do_not_weld = 1;
9465             }
9466
9467             if (DEBUG_WELD) {
9468                 if ( !$is_old_weld ) { $is_old_weld = "" }
9469                 $Msg .=
9470 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
9471             }
9472
9473             # Check weld exclusion rules for outer container
9474             if ( !$do_not_weld ) {
9475                 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
9476                 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
9477                     if (DEBUG_WELD) {
9478                         $Msg .=
9479 "No qw weld due to weld exclusion rules for outer container\n";
9480                     }
9481                     $do_not_weld = 1;
9482                 }
9483             }
9484
9485             # Check the length of the last line (fixes case b1039)
9486             if ( !$do_not_weld ) {
9487                 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
9488                 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
9489                 my $excess_ic =
9490                   $self->excess_line_length_for_Krange( $Kfirst_ic,
9491                     $Kouter_closing );
9492
9493                 # Allow extra space for additional welded closing container(s)
9494                 # and a space and comma or semicolon.
9495                 # NOTE: weld len has not been computed yet. Use 2 spaces
9496                 # for now, correct for a single weld. This estimate could
9497                 # be made more accurate if necessary.
9498                 my $weld_len =
9499                   defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
9500                 if ( $excess_ic + $weld_len + 2 > 0 ) {
9501                     if (DEBUG_WELD) {
9502                         $Msg .=
9503 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
9504                     }
9505                     $do_not_weld = 1;
9506                 }
9507             }
9508
9509             if ($do_not_weld) {
9510                 if (DEBUG_WELD) {
9511                     $Msg .= "Not Welding QW\n";
9512                     print $Msg;
9513                 }
9514                 next;
9515             }
9516
9517             # OK to weld
9518             if (DEBUG_WELD) {
9519                 $Msg .= "Welding QW\n";
9520                 print $Msg;
9521             }
9522
9523             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9524             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
9525
9526             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9527             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
9528
9529             # Undo one indentation level if an extra level was added to this
9530             # multiline quote
9531             my $qw_seqno =
9532               $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
9533             if (   $qw_seqno
9534                 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
9535             {
9536                 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
9537                     $rLL->[$K]->[_LEVEL_] -= 1;
9538                 }
9539                 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
9540                 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
9541             }
9542
9543             # undo CI for other welded quotes
9544             else {
9545
9546                 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
9547                     $rLL->[$K]->[_CI_LEVEL_] = 0;
9548                 }
9549             }
9550
9551             # Change the level of a closing qw token to be that of the outer
9552             # containing token. This will allow -lp indentation to function
9553             # correctly in the vertical aligner.
9554             # Patch to fix c002: but not if it contains text
9555             if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
9556                 $rLL->[$Kinner_closing]->[_LEVEL_] =
9557                   $rLL->[$Kouter_closing]->[_LEVEL_];
9558             }
9559         }
9560     }
9561     return;
9562 }
9563
9564 sub is_welded_at_seqno {
9565
9566     my ( $self, $seqno ) = @_;
9567
9568     # given a sequence number:
9569     #   return true if it is welded either left or right
9570     #   return false otherwise
9571     return unless ( $total_weld_count && defined($seqno) );
9572     my $KK_o = $self->[_K_opening_container_]->{$seqno};
9573     return unless defined($KK_o);
9574     return defined( $self->[_rK_weld_left_]->{$KK_o} )
9575       || defined( $self->[_rK_weld_right_]->{$KK_o} );
9576 }
9577
9578 sub mark_short_nested_blocks {
9579
9580     # This routine looks at the entire file and marks any short nested blocks
9581     # which should not be broken.  The results are stored in the hash
9582     #     $rshort_nested->{$type_sequence}
9583     # which will be true if the container should remain intact.
9584     #
9585     # For example, consider the following line:
9586
9587     #   sub cxt_two { sort { $a <=> $b } test_if_list() }
9588
9589     # The 'sort' block is short and nested within an outer sub block.
9590     # Normally, the existence of the 'sort' block will force the sub block to
9591     # break open, but this is not always desirable. Here we will set a flag for
9592     # the sort block to prevent this.  To give the user control, we will
9593     # follow the input file formatting.  If either of the blocks is broken in
9594     # the input file then we will allow it to remain broken. Otherwise we will
9595     # set a flag to keep it together in later formatting steps.
9596
9597     # The flag which is set here will be checked in two places:
9598     # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
9599
9600     my $self = shift;
9601     return if $rOpts->{'indent-only'};
9602
9603     my $rLL = $self->[_rLL_];
9604     return unless ( defined($rLL) && @{$rLL} );
9605
9606     return unless ( $rOpts->{'one-line-block-nesting'} );
9607
9608     my $K_opening_container  = $self->[_K_opening_container_];
9609     my $K_closing_container  = $self->[_K_closing_container_];
9610     my $rbreak_container     = $self->[_rbreak_container_];
9611     my $rshort_nested        = $self->[_rshort_nested_];
9612     my $rlines               = $self->[_rlines_];
9613     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9614
9615     # Variables needed for estimating line lengths
9616     my $maximum_text_length;
9617     my $starting_lentot;
9618     my $length_tol = 1;
9619
9620     my $excess_length_to_K = sub {
9621         my ($K) = @_;
9622
9623         # Estimate the length from the line start to a given token
9624         my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
9625         my $excess_length = $length + $length_tol - $maximum_text_length;
9626         return ($excess_length);
9627     };
9628
9629     my $is_broken_block = sub {
9630
9631         # a block is broken if the input line numbers of the braces differ
9632         my ($seqno) = @_;
9633         my $K_opening = $K_opening_container->{$seqno};
9634         return unless ( defined($K_opening) );
9635         my $K_closing = $K_closing_container->{$seqno};
9636         return unless ( defined($K_closing) );
9637         return $rbreak_container->{$seqno}
9638           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
9639           $rLL->[$K_opening]->[_LINE_INDEX_];
9640     };
9641
9642     # loop over all containers
9643     my @open_block_stack;
9644     my $iline = -1;
9645     my $KNEXT = $self->[_K_first_seq_item_];
9646     while ( defined($KNEXT) ) {
9647         my $KK = $KNEXT;
9648         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9649         my $rtoken_vars   = $rLL->[$KK];
9650         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
9651         if ( !$type_sequence ) {
9652             next if ( $KK == 0 );    # first token in file may not be container
9653
9654             # A fault here implies that an error was made in the little loop at
9655             # the bottom of sub 'respace_tokens' which set the values of
9656             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
9657             # loop control lines above.
9658             Fault("sequence = $type_sequence not defined at K=$KK")
9659               if (DEVEL_MODE);
9660             next;
9661         }
9662
9663         # Patch: do not mark short blocks with welds.
9664         # In some cases blinkers can form (case b690).
9665         if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
9666             next;
9667         }
9668
9669         # We are just looking at code blocks
9670         my $token = $rtoken_vars->[_TOKEN_];
9671         my $type  = $rtoken_vars->[_TYPE_];
9672         next unless ( $type eq $token );
9673         next unless ( $rblock_type_of_seqno->{$type_sequence} );
9674
9675         # Keep a stack of all acceptable block braces seen.
9676         # Only consider blocks entirely on one line so dump the stack when line
9677         # changes.
9678         my $iline_last = $iline;
9679         $iline = $rLL->[$KK]->[_LINE_INDEX_];
9680         if ( $iline != $iline_last ) { @open_block_stack = () }
9681
9682         if ( $token eq '}' ) {
9683             if (@open_block_stack) { pop @open_block_stack }
9684         }
9685         next unless ( $token eq '{' );
9686
9687         # block must be balanced (bad scripts may be unbalanced)
9688         my $K_opening = $K_opening_container->{$type_sequence};
9689         my $K_closing = $K_closing_container->{$type_sequence};
9690         next unless ( defined($K_opening) && defined($K_closing) );
9691
9692         # require that this block be entirely on one line
9693         next if ( $is_broken_block->($type_sequence) );
9694
9695         # See if this block fits on one line of allowed length (which may
9696         # be different from the input script)
9697         $starting_lentot =
9698           $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
9699         my $level    = $rLL->[$KK]->[_LEVEL_];
9700         my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
9701         $maximum_text_length =
9702           $maximum_text_length_at_level[$level] -
9703           $ci_level * $rOpts_continuation_indentation;
9704
9705         # Dump the stack if block is too long and skip this block
9706         if ( $excess_length_to_K->($K_closing) > 0 ) {
9707             @open_block_stack = ();
9708             next;
9709         }
9710
9711         # OK, Block passes tests, remember it
9712         push @open_block_stack, $type_sequence;
9713
9714         # We are only marking nested code blocks,
9715         # so check for a previous block on the stack
9716         next unless ( @open_block_stack > 1 );
9717
9718         # Looks OK, mark this as a short nested block
9719         $rshort_nested->{$type_sequence} = 1;
9720
9721     }
9722     return;
9723 }
9724
9725 sub adjust_indentation_levels {
9726
9727     my ($self) = @_;
9728
9729     # Called once per file to do special indentation adjustments.
9730     # These routines adjust levels either by changing _CI_LEVEL_ directly or
9731     # by setting modified levels in the array $self->[_radjusted_levels_].
9732
9733     # Initialize the adjusted levels. These will be the levels actually used
9734     # for computing indentation.
9735
9736     # NOTE: This routine is called after the weld routines, which may have
9737     # already adjusted _LEVEL_, so we are making adjustments on top of those
9738     # levels.  It would be much nicer to have the weld routines also use this
9739     # adjustment, but that gets complicated when we combine -gnu -wn and have
9740     # some welded quotes.
9741     my $Klimit           = $self->[_Klimit_];
9742     my $rLL              = $self->[_rLL_];
9743     my $radjusted_levels = $self->[_radjusted_levels_];
9744
9745     return unless ( defined($Klimit) );
9746
9747     foreach my $KK ( 0 .. $Klimit ) {
9748         $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
9749     }
9750
9751     # First set adjusted levels for any non-indenting braces.
9752     $self->non_indenting_braces();
9753
9754     # Adjust breaks and indentation list containers
9755     $self->break_before_list_opening_containers();
9756
9757     # Set adjusted levels for the whitespace cycle option.
9758     $self->whitespace_cycle_adjustment();
9759
9760     $self->braces_left_setup();
9761
9762     # Adjust continuation indentation if -bli is set
9763     $self->bli_adjustment();
9764
9765     $self->extended_ci()
9766       if ($rOpts_extended_continuation_indentation);
9767
9768     # Now clip any adjusted levels to be non-negative
9769     $self->clip_adjusted_levels();
9770
9771     return;
9772 }
9773
9774 sub clip_adjusted_levels {
9775
9776     # Replace any negative adjusted levels with zero.
9777     # Negative levels can occur in files with brace errors.
9778     my ($self) = @_;
9779     my $radjusted_levels = $self->[_radjusted_levels_];
9780     return unless defined($radjusted_levels) && @{$radjusted_levels};
9781     foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
9782     return;
9783 }
9784
9785 sub non_indenting_braces {
9786
9787     # Called once per file to handle the --non-indenting-braces parameter.
9788     # Remove indentation within marked braces if requested
9789     my ($self) = @_;
9790     return unless ( $rOpts->{'non-indenting-braces'} );
9791
9792     my $rLL = $self->[_rLL_];
9793     return unless ( defined($rLL) && @{$rLL} );
9794
9795     my $Klimit                     = $self->[_Klimit_];
9796     my $rblock_type_of_seqno       = $self->[_rblock_type_of_seqno_];
9797     my $K_opening_container        = $self->[_K_opening_container_];
9798     my $K_closing_container        = $self->[_K_closing_container_];
9799     my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
9800     my $radjusted_levels           = $self->[_radjusted_levels_];
9801
9802     # First locate all of the marked blocks
9803     my @K_stack;
9804     foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
9805         my $KK = $K_opening_container->{$seqno};
9806
9807         # followed by a comment
9808         my $K_sc = $KK + 1;
9809         $K_sc += 1
9810           if ( $K_sc <= $Klimit && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
9811         next unless ( $K_sc <= $Klimit );
9812         my $type_sc = $rLL->[$K_sc]->[_TYPE_];
9813         next unless ( $type_sc eq '#' );
9814
9815         # on the same line
9816         my $line_index    = $rLL->[$KK]->[_LINE_INDEX_];
9817         my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
9818         next unless ( $line_index_sc == $line_index );
9819
9820         # get the side comment text
9821         my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
9822
9823         # The pattern ends in \s but we have removed the newline, so
9824         # we added it back for the match. That way we require an exact
9825         # match to the special string and also allow additional text.
9826         $token_sc .= "\n";
9827         next unless ( $token_sc =~ /$non_indenting_brace_pattern/ );
9828         $rspecial_side_comment_type->{$K_sc} = 'NIB';
9829         push @K_stack, [ $KK, 1 ];
9830         my $Kc = $K_closing_container->{$seqno};
9831         push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
9832     }
9833     return unless (@K_stack);
9834     @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
9835
9836     # Then loop to remove indentation within marked blocks
9837     my $KK_last = 0;
9838     my $ndeep   = 0;
9839     foreach my $item (@K_stack) {
9840         my ( $KK, $inc ) = @{$item};
9841         if ( $ndeep > 0 ) {
9842
9843             foreach ( $KK_last + 1 .. $KK ) {
9844                 $radjusted_levels->[$_] -= $ndeep;
9845             }
9846
9847             # We just subtracted the old $ndeep value, which only applies to a
9848             # '{'.  The new $ndeep applies to a '}', so we undo the error.
9849             if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
9850         }
9851
9852         $ndeep += $inc;
9853         $KK_last = $KK;
9854     }
9855     return;
9856 }
9857
9858 sub whitespace_cycle_adjustment {
9859
9860     my $self = shift;
9861
9862     # Called once per file to implement the --whitespace-cycle option
9863     my $rLL = $self->[_rLL_];
9864     return unless ( defined($rLL) && @{$rLL} );
9865     my $radjusted_levels = $self->[_radjusted_levels_];
9866     my $maximum_level    = $self->[_maximum_level_];
9867
9868     if (   $rOpts_whitespace_cycle
9869         && $rOpts_whitespace_cycle > 0
9870         && $rOpts_whitespace_cycle < $maximum_level )
9871     {
9872
9873         my $Kmax = @{$rLL} - 1;
9874
9875         my $whitespace_last_level  = -1;
9876         my @whitespace_level_stack = ();
9877         my $last_nonblank_type     = 'b';
9878         my $last_nonblank_token    = '';
9879         foreach my $KK ( 0 .. $Kmax ) {
9880             my $level_abs = $radjusted_levels->[$KK];
9881             my $level     = $level_abs;
9882             if ( $level_abs < $whitespace_last_level ) {
9883                 pop(@whitespace_level_stack);
9884             }
9885             if ( !@whitespace_level_stack ) {
9886                 push @whitespace_level_stack, $level_abs;
9887             }
9888             elsif ( $level_abs > $whitespace_last_level ) {
9889                 $level = $whitespace_level_stack[-1] +
9890                   ( $level_abs - $whitespace_last_level );
9891
9892                 if (
9893                     # 1 Try to break at a block brace
9894                     (
9895                            $level > $rOpts_whitespace_cycle
9896                         && $last_nonblank_type eq '{'
9897                         && $last_nonblank_token eq '{'
9898                     )
9899
9900                     # 2 Then either a brace or bracket
9901                     || (   $level > $rOpts_whitespace_cycle + 1
9902                         && $last_nonblank_token =~ /^[\{\[]$/ )
9903
9904                     # 3 Then a paren too
9905                     || $level > $rOpts_whitespace_cycle + 2
9906                   )
9907                 {
9908                     $level = 1;
9909                 }
9910                 push @whitespace_level_stack, $level;
9911             }
9912             $level = $whitespace_level_stack[-1];
9913             $radjusted_levels->[$KK] = $level;
9914
9915             $whitespace_last_level = $level_abs;
9916             my $type  = $rLL->[$KK]->[_TYPE_];
9917             my $token = $rLL->[$KK]->[_TOKEN_];
9918             if ( $type ne 'b' ) {
9919                 $last_nonblank_type  = $type;
9920                 $last_nonblank_token = $token;
9921             }
9922         }
9923     }
9924     return;
9925 }
9926
9927 use constant DEBUG_BBX => 0;
9928
9929 sub break_before_list_opening_containers {
9930
9931     my ($self) = @_;
9932
9933     # This routine is called once per batch to implement parameters
9934     # --break-before-hash-brace=n and similar -bbx=n flags
9935     #    and their associated indentation flags:
9936     # --break-before-hash-brace-and-indent and similar -bbxi=n
9937
9938     # Nothing to do if none of the -bbx=n parameters has been set
9939     return unless %break_before_container_types;
9940
9941     my $rLL = $self->[_rLL_];
9942     return unless ( defined($rLL) && @{$rLL} );
9943
9944     # Loop over all opening container tokens
9945     my $K_opening_container       = $self->[_K_opening_container_];
9946     my $K_closing_container       = $self->[_K_closing_container_];
9947     my $ris_broken_container      = $self->[_ris_broken_container_];
9948     my $ris_permanently_broken    = $self->[_ris_permanently_broken_];
9949     my $rhas_list                 = $self->[_rhas_list_];
9950     my $rhas_broken_list          = $self->[_rhas_broken_list_];
9951     my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
9952     my $radjusted_levels          = $self->[_radjusted_levels_];
9953     my $rparent_of_seqno          = $self->[_rparent_of_seqno_];
9954     my $rlines                    = $self->[_rlines_];
9955     my $rtype_count_by_seqno      = $self->[_rtype_count_by_seqno_];
9956     my $rlec_count_by_seqno       = $self->[_rlec_count_by_seqno_];
9957     my $rno_xci_by_seqno          = $self->[_rno_xci_by_seqno_];
9958     my $rK_weld_right             = $self->[_rK_weld_right_];
9959     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
9960
9961     my $length_tol =
9962       max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
9963     if ($rOpts_ignore_old_breakpoints) {
9964
9965         # Patch suggested by b1231; the old tol was excessive.
9966         ## $length_tol += $rOpts_maximum_line_length;
9967         $length_tol *= 2;
9968     }
9969
9970     my $rbreak_before_container_by_seqno = {};
9971     my $rwant_reduced_ci                 = {};
9972     foreach my $seqno ( keys %{$K_opening_container} ) {
9973
9974         #----------------------------------------------------------------
9975         # Part 1: Examine any -bbx=n flags
9976         #----------------------------------------------------------------
9977
9978         next if ( $rblock_type_of_seqno->{$seqno} );
9979         my $KK = $K_opening_container->{$seqno};
9980
9981         # This must be a list or contain a list.
9982         # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
9983         # Note2: 'has_list' holds the depth to the sub-list.  We will require
9984         #  a depth of just 1
9985         my $is_list  = $self->is_list_by_seqno($seqno);
9986         my $has_list = $rhas_list->{$seqno};
9987
9988         # Fix for b1173: if welded opening container, use flag of innermost
9989         # seqno.  Otherwise, the restriction $has_list==1 prevents triple and
9990         # higher welds from following the -BBX parameters.
9991         if ($total_weld_count) {
9992             my $KK_test = $rK_weld_right->{$KK};
9993             if ( defined($KK_test) ) {
9994                 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
9995                 $is_list ||= $self->is_list_by_seqno($seqno_inner);
9996                 $has_list = $rhas_list->{$seqno_inner};
9997             }
9998         }
9999
10000         next unless ( $is_list || $has_list && $has_list == 1 );
10001
10002         my $has_broken_list   = $rhas_broken_list->{$seqno};
10003         my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
10004
10005         # Only for types of container tokens with a non-default break option
10006         my $token        = $rLL->[$KK]->[_TOKEN_];
10007         my $break_option = $break_before_container_types{$token};
10008         next unless ($break_option);
10009
10010         # Do not use -bbx under stress for stability ... fixes b1300
10011         my $level = $rLL->[$KK]->[_LEVEL_];
10012         if ( $level >= $stress_level_beta ) {
10013             DEBUG_BBX
10014               && print
10015 "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
10016             next;
10017         }
10018
10019         # Require previous nonblank to be '=' or '=>'
10020         my $Kprev = $KK - 1;
10021         next if ( $Kprev < 0 );
10022         my $prev_type = $rLL->[$Kprev]->[_TYPE_];
10023         if ( $prev_type eq 'b' ) {
10024             $Kprev--;
10025             next if ( $Kprev < 0 );
10026             $prev_type = $rLL->[$Kprev]->[_TYPE_];
10027         }
10028         next unless ( $is_equal_or_fat_comma{$prev_type} );
10029
10030         my $ci = $rLL->[$KK]->[_CI_LEVEL_];
10031
10032         #--------------------------------------------
10033         # New coding for option 2 (break if complex).
10034         #--------------------------------------------
10035         # This new coding uses clues which are invariant under formatting to
10036         # decide if a list is complex.  For now it is only applied when -lp
10037         # and -vmll are used, but eventually it may become the standard method.
10038         # Fixes b1274, b1275, and others, including b1099.
10039         if ( $break_option == 2 ) {
10040
10041             if (   $rOpts_line_up_parentheses
10042                 || $rOpts_variable_maximum_line_length )
10043             {
10044
10045                 # Start with the basic definition of a complex list...
10046                 my $is_complex = $is_list && $has_list;
10047
10048                 # and it is also complex if the parent is a list
10049                 if ( !$is_complex ) {
10050                     my $parent = $rparent_of_seqno->{$seqno};
10051                     if ( $self->is_list_by_seqno($parent) ) {
10052                         $is_complex = 1;
10053                     }
10054                 }
10055
10056                 # finally, we will call it complex if there are inner opening
10057                 # and closing container tokens, not parens, within the outer
10058                 # container tokens.
10059                 if ( !$is_complex ) {
10060                     my $Kp      = $self->K_next_nonblank($KK);
10061                     my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
10062                     if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
10063
10064                         my $Kc = $K_closing_container->{$seqno};
10065                         my $Km = $self->K_previous_nonblank($Kc);
10066                         my $token_m =
10067                           defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10068
10069                         # ignore any optional ending comma
10070                         if ( $token_m eq ',' ) {
10071                             $Km = $self->K_previous_nonblank($Km);
10072                             $token_m =
10073                               defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10074                         }
10075
10076                         $is_complex ||=
10077                           $is_closing_token{$token_m} && $token_m ne ')';
10078                     }
10079                 }
10080
10081                 # Convert to option 3 (always break) if complex
10082                 next unless ($is_complex);
10083                 $break_option = 3;
10084             }
10085         }
10086
10087         # Fix for b1231: the has_list_with_lec does not cover all cases.
10088         # A broken container containing a list and with line-ending commas
10089         # will stay broken, so can be treated as if it had a list with lec.
10090         $has_list_with_lec ||=
10091              $has_list
10092           && $ris_broken_container->{$seqno}
10093           && $rlec_count_by_seqno->{$seqno};
10094
10095         DEBUG_BBX
10096           && print STDOUT
10097 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
10098
10099         # -bbx=1 = stable, try to follow input
10100         if ( $break_option == 1 ) {
10101
10102             my $iline    = $rLL->[$KK]->[_LINE_INDEX_];
10103             my $rK_range = $rlines->[$iline]->{_rK_range};
10104             my ( $Kfirst, $Klast ) = @{$rK_range};
10105             next unless ( $KK == $Kfirst );
10106         }
10107
10108         # -bbx=2 => apply this style only for a 'complex' list
10109         elsif ( $break_option == 2 ) {
10110
10111             #  break if this list contains a broken list with line-ending comma
10112             my $ok_to_break;
10113             my $Msg = "";
10114             if ($has_list_with_lec) {
10115                 $ok_to_break = 1;
10116                 DEBUG_BBX && do { $Msg = "has list with lec;" };
10117             }
10118
10119             if ( !$ok_to_break ) {
10120
10121                 # Turn off -xci if -bbx=2 and this container has a sublist but
10122                 # not a broken sublist. This avoids creating blinkers.  The
10123                 # problem is that -xci can cause one-line lists to break open,
10124                 # and thereby creating formatting instability.
10125                 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
10126                 # b1045 b1046 b1047 b1051 b1052 b1061.
10127                 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
10128
10129                 my $parent = $rparent_of_seqno->{$seqno};
10130                 if ( $self->is_list_by_seqno($parent) ) {
10131                     DEBUG_BBX && do { $Msg = "parent is list" };
10132                     $ok_to_break = 1;
10133                 }
10134             }
10135
10136             if ( !$ok_to_break ) {
10137                 DEBUG_BBX
10138                   && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
10139                 next;
10140             }
10141
10142             DEBUG_BBX
10143               && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
10144
10145             # Patch: turn off -xci if -bbx=2 and -lp
10146             # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
10147             $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
10148         }
10149
10150         # -bbx=3 = always break
10151         elsif ( $break_option == 3 ) {
10152
10153             # ok to break
10154         }
10155
10156         # Shouldn't happen! Bad flag, but make behavior same as 3
10157         else {
10158             # ok to break
10159         }
10160
10161         # Set a flag for actual implementation later in
10162         # sub insert_breaks_before_list_opening_containers
10163         $rbreak_before_container_by_seqno->{$seqno} = 1;
10164         DEBUG_BBX
10165           && print STDOUT "BBX: ok to break at seqno=$seqno\n";
10166
10167         # -bbxi=0: Nothing more to do if the ci value remains unchanged
10168         my $ci_flag = $container_indentation_options{$token};
10169         next unless ($ci_flag);
10170
10171         # -bbxi=1: This option removes ci and is handled in
10172         # later sub final_indentation_adjustment
10173         if ( $ci_flag == 1 ) {
10174             $rwant_reduced_ci->{$seqno} = 1;
10175             next;
10176         }
10177
10178         # -bbxi=2 ...
10179
10180         #----------------------------------------------------------------
10181         # Part 2: Perform tests before committing to changing ci and level
10182         #----------------------------------------------------------------
10183
10184         # Before changing the ci level of the opening container, we need
10185         # to be sure that the container will be broken in the later stages of
10186         # formatting.  We have to do this because we are working early in the
10187         # formatting pipeline.  A problem can occur if we change the ci or
10188         # level of the opening token but do not actually break the container
10189         # open as expected.  In most cases it wouldn't make any difference if
10190         # we changed ci or not, but there are some edge cases where this
10191         # can cause blinking states, so we need to try to only change ci if
10192         # the container will really be broken.
10193
10194         # Only consider containers already broken
10195         next if ( !$ris_broken_container->{$seqno} );
10196
10197         # Patch to fix issue b1305: the combination of -naws and ci>i appears
10198         # to cause an instability.  It should almost never occur in practice.
10199         next
10200           if (!$rOpts_add_whitespace
10201             && $rOpts_continuation_indentation > $rOpts_indent_columns );
10202
10203         # Always ok to change ci for permanently broken containers
10204         if ( $ris_permanently_broken->{$seqno} ) {
10205             goto OK;
10206         }
10207
10208         # Always OK if this list contains a broken sub-container with
10209         # a non-terminal line-ending comma
10210         if ($has_list_with_lec) { goto OK }
10211
10212         # From here on we are considering a single container...
10213
10214         # A single container must have at least 1 line-ending comma:
10215         next unless ( $rlec_count_by_seqno->{$seqno} );
10216
10217         # Since it has a line-ending comma, it will stay broken if the -boc
10218         # flag is set
10219         if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
10220
10221         # OK if the container contains multiple fat commas
10222         # Better: multiple lines with fat commas
10223         if ( !$rOpts_ignore_old_breakpoints ) {
10224             my $rtype_count = $rtype_count_by_seqno->{$seqno};
10225             next unless ($rtype_count);
10226             my $fat_comma_count = $rtype_count->{'=>'};
10227             DEBUG_BBX
10228               && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
10229             if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
10230         }
10231
10232         # The last check we can make is to see if this container could fit on a
10233         # single line.  Use the least possble indentation in the estmate (ci=0),
10234         # so we are not subtracting $ci * $rOpts_continuation_indentation from
10235         # tablulated $maximum_text_length  value.
10236         my $maximum_text_length = $maximum_text_length_at_level[$level];
10237         my $K_closing           = $K_closing_container->{$seqno};
10238         my $length = $self->cumulative_length_before_K($K_closing) -
10239           $self->cumulative_length_before_K($KK);
10240         my $excess_length = $length - $maximum_text_length;
10241         DEBUG_BBX
10242           && print STDOUT
10243 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
10244
10245         # OK if the net container definitely breaks on length
10246         if ( $excess_length > $length_tol ) {
10247             DEBUG_BBX
10248               && print STDOUT "BBX: excess_length=$excess_length\n";
10249             goto OK;
10250         }
10251
10252         # Otherwise skip it
10253         next;
10254
10255         #################################################################
10256         # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
10257         #################################################################
10258
10259       OK:
10260
10261         DEBUG_BBX && print STDOUT "BBX: OK to break\n";
10262
10263         # -bbhbi=n
10264         # -bbsbi=n
10265         # -bbpi=n
10266
10267         # where:
10268
10269         # n=0  default indentation (usually one ci)
10270         # n=1  outdent one ci
10271         # n=2  indent one level (minus one ci)
10272         # n=3  indent one extra ci [This may be dropped]
10273
10274         # NOTE: We are adjusting indentation of the opening container. The
10275         # closing container will normally follow the indentation of the opening
10276         # container automatically, so this is not currently done.
10277         next unless ($ci);
10278
10279         # option 1: outdent
10280         if ( $ci_flag == 1 ) {
10281             $ci -= 1;
10282         }
10283
10284         # option 2: indent one level
10285         elsif ( $ci_flag == 2 ) {
10286             $ci -= 1;
10287             $radjusted_levels->[$KK] += 1;
10288         }
10289
10290         # unknown option
10291         else {
10292             # Shouldn't happen - leave ci unchanged
10293         }
10294
10295         $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
10296     }
10297
10298     $self->[_rbreak_before_container_by_seqno_] =
10299       $rbreak_before_container_by_seqno;
10300     $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
10301     return;
10302 }
10303
10304 use constant DEBUG_XCI => 0;
10305
10306 sub extended_ci {
10307
10308     # This routine implements the -xci (--extended-continuation-indentation)
10309     # flag.  We add CI to interior tokens of a container which itself has CI but
10310     # only if a token does not already have CI.
10311
10312     # To do this, we will locate opening tokens which themselves have
10313     # continuation indentation (CI).  We track them with their sequence
10314     # numbers.  These sequence numbers are called 'controlling sequence
10315     # numbers'.  They apply continuation indentation to the tokens that they
10316     # contain.  These inner tokens remember their controlling sequence numbers.
10317     # Later, when these inner tokens are output, they have to see if the output
10318     # lines with their controlling tokens were output with CI or not.  If not,
10319     # then they must remove their CI too.
10320
10321     # The controlling CI concept works hierarchically.  But CI itself is not
10322     # hierarchical; it is either on or off. There are some rare instances where
10323     # it would be best to have hierarchical CI too, but not enough to be worth
10324     # the programming effort.
10325
10326     # The operations to remove unwanted CI are done in sub 'undo_ci'.
10327
10328     my ($self) = @_;
10329
10330     my $rLL = $self->[_rLL_];
10331     return unless ( defined($rLL) && @{$rLL} );
10332
10333     my $ris_list_by_seqno        = $self->[_ris_list_by_seqno_];
10334     my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
10335     my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
10336     my $rlines                   = $self->[_rlines_];
10337     my $rno_xci_by_seqno         = $self->[_rno_xci_by_seqno_];
10338     my $ris_bli_container        = $self->[_ris_bli_container_];
10339     my $rblock_type_of_seqno     = $self->[_rblock_type_of_seqno_];
10340
10341     my %available_space;
10342
10343     # Loop over all opening container tokens
10344     my $K_opening_container  = $self->[_K_opening_container_];
10345     my $K_closing_container  = $self->[_K_closing_container_];
10346     my $ris_broken_container = $self->[_ris_broken_container_];
10347     my @seqno_stack;
10348     my $seqno_top;
10349     my $KLAST;
10350     my $KNEXT = $self->[_K_first_seq_item_];
10351
10352     # The following variable can be used to allow a little extra space to
10353     # avoid blinkers.  A value $len_tol = 20 fixed the following
10354     # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
10355     # It turned out that the real problem was misparsing a list brace as
10356     # a code block in a 'use' statement when the line length was extremely
10357     # small.  A value of 0 works now, but a slightly larger value can
10358     # be used to minimize the chance of a blinker.
10359     my $len_tol = 0;
10360
10361     while ( defined($KNEXT) ) {
10362
10363         # Fix all tokens up to the next sequence item if we are changing CI
10364         if ($seqno_top) {
10365
10366             my $is_list = $ris_list_by_seqno->{$seqno_top};
10367             my $space   = $available_space{$seqno_top};
10368             my $length  = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
10369             my $count   = 0;
10370             for ( my $Kt = $KLAST + 1 ; $Kt < $KNEXT ; $Kt++ ) {
10371
10372                 # But do not include tokens which might exceed the line length
10373                 # and are not in a list.
10374                 # ... This fixes case b1031
10375                 my $length_before = $length;
10376                 $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
10377                 if (
10378                     !$rLL->[$Kt]->[_CI_LEVEL_]
10379                     && (   $is_list
10380                         || $length - $length_before < $space
10381                         || $rLL->[$Kt]->[_TYPE_] eq '#' )
10382                   )
10383                 {
10384                     $rLL->[$Kt]->[_CI_LEVEL_] = 1;
10385                     $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
10386                     $count++;
10387                 }
10388             }
10389             $ris_seqno_controlling_ci->{$seqno_top} += $count;
10390         }
10391
10392         $KLAST = $KNEXT;
10393         my $KK = $KNEXT;
10394         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
10395
10396         my $seqno     = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10397         my $K_opening = $K_opening_container->{$seqno};
10398
10399         # see if we have reached the end of the current controlling container
10400         if ( $seqno_top && $seqno == $seqno_top ) {
10401             $seqno_top = pop @seqno_stack;
10402         }
10403
10404         # Patch to fix some block types...
10405         # Certain block types arrive from the tokenizer without CI but should
10406         # have it for this option.  These include anonymous subs and
10407         #     do sort map grep eval
10408         my $block_type = $rblock_type_of_seqno->{$seqno};
10409         if ( $block_type && $is_block_with_ci{$block_type} ) {
10410             $rLL->[$KK]->[_CI_LEVEL_] = 1;
10411             if ($seqno_top) {
10412                 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10413                 $ris_seqno_controlling_ci->{$seqno_top}++;
10414             }
10415         }
10416
10417         # If this does not have ci, update ci if necessary and continue looking
10418         if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
10419             if ($seqno_top) {
10420                 $rLL->[$KK]->[_CI_LEVEL_] = 1;
10421                 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10422                 $ris_seqno_controlling_ci->{$seqno_top}++;
10423             }
10424             next;
10425         }
10426
10427         # Skip if requested by -bbx to avoid blinkers
10428         if ( $rno_xci_by_seqno->{$seqno} ) {
10429             next;
10430         }
10431
10432         # Skip if this is a -bli container (this fixes case b1065) Note: case
10433         # b1065 is also fixed by the update for b1055, so this update is not
10434         # essential now.  But there does not seem to be a good reason to add
10435         # xci and bli together, so the update is retained.
10436         if ( $ris_bli_container->{$seqno} ) {
10437             next;
10438         }
10439
10440         # We are looking for opening container tokens with ci
10441         next unless ( defined($K_opening) && $KK == $K_opening );
10442
10443         # Make sure there is a corresponding closing container
10444         # (could be missing if the script has a brace error)
10445         my $K_closing = $K_closing_container->{$seqno};
10446         next unless defined($K_closing);
10447
10448         # Require different input lines. This will filter out a large number
10449         # of small hash braces and array brackets.  If we accidentally filter
10450         # out an important container, it will get fixed on the next pass.
10451         if (
10452             $rLL->[$K_opening]->[_LINE_INDEX_] ==
10453             $rLL->[$K_closing]->[_LINE_INDEX_]
10454             && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
10455                 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
10456                 $rOpts_maximum_line_length )
10457           )
10458         {
10459             DEBUG_XCI
10460               && print "XCI: Skipping seqno=$seqno, require different lines\n";
10461             next;
10462         }
10463
10464         # Do not apply -xci if adding extra ci will put the container contents
10465         # beyond the line length limit (fixes cases b899 b935)
10466         my $level    = $rLL->[$K_opening]->[_LEVEL_];
10467         my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
10468         my $maximum_text_length =
10469           $maximum_text_length_at_level[$level] -
10470           $ci_level * $rOpts_continuation_indentation;
10471
10472         # Fix for b1197 b1198 b1199 b1200 b1201 b1202
10473         # Do not apply -xci if we are running out of space
10474         if ( $level >= $stress_level_beta ) {
10475             DEBUG_XCI
10476               && print
10477 "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
10478             next;
10479         }
10480
10481         # remember how much space is available for patch b1031 above
10482         my $space =
10483           $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
10484
10485         if ( $space < 0 ) {
10486             DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
10487             next;
10488         }
10489         DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
10490
10491         $available_space{$seqno} = $space;
10492
10493         # This becomes the next controlling container
10494         push @seqno_stack, $seqno_top if ($seqno_top);
10495         $seqno_top = $seqno;
10496     }
10497     return;
10498 }
10499
10500 sub braces_left_setup {
10501
10502     # Called once per file to mark all -bl, -sbl, and -asbl containers
10503     my $self = shift;
10504
10505     my $rOpts_bl   = $rOpts->{'opening-brace-on-new-line'};
10506     my $rOpts_sbl  = $rOpts->{'opening-sub-brace-on-new-line'};
10507     my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
10508     return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
10509
10510     my $rLL = $self->[_rLL_];
10511     return unless ( defined($rLL) && @{$rLL} );
10512
10513     # We will turn on this hash for braces controlled by these flags:
10514     my $rbrace_left = $self->[_rbrace_left_];
10515
10516     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10517     my $ris_asub_block       = $self->[_ris_asub_block_];
10518     my $ris_sub_block        = $self->[_ris_sub_block_];
10519     foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10520
10521         my $block_type = $rblock_type_of_seqno->{$seqno};
10522
10523         # use -asbl flag for an anonymous sub block
10524         if ( $ris_asub_block->{$seqno} ) {
10525             if ($rOpts_asbl) {
10526                 $rbrace_left->{$seqno} = 1;
10527             }
10528         }
10529
10530         # use -sbl flag for a named sub
10531         elsif ( $ris_sub_block->{$seqno} ) {
10532             if ($rOpts_sbl) {
10533                 $rbrace_left->{$seqno} = 1;
10534             }
10535         }
10536
10537         # use -bl flag if not a sub block of any type
10538         else {
10539             if (   $rOpts_bl
10540                 && $block_type =~ /$bl_pattern/
10541                 && $block_type !~ /$bl_exclusion_pattern/ )
10542             {
10543                 $rbrace_left->{$seqno} = 1;
10544             }
10545         }
10546     }
10547     return;
10548 }
10549
10550 sub bli_adjustment {
10551
10552     # Called once per file to implement the --brace-left-and-indent option.
10553     # If -bli is set, adds one continuation indentation for certain braces
10554     my $self = shift;
10555     return unless ( $rOpts->{'brace-left-and-indent'} );
10556     my $rLL = $self->[_rLL_];
10557     return unless ( defined($rLL) && @{$rLL} );
10558
10559     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10560     my $ris_bli_container    = $self->[_ris_bli_container_];
10561     my $rbrace_left          = $self->[_rbrace_left_];
10562     my $K_opening_container  = $self->[_K_opening_container_];
10563     my $K_closing_container  = $self->[_K_closing_container_];
10564
10565     foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10566         my $block_type = $rblock_type_of_seqno->{$seqno};
10567         if (   $block_type
10568             && $block_type =~ /$bli_pattern/
10569             && $block_type !~ /$bli_exclusion_pattern/ )
10570         {
10571             $ris_bli_container->{$seqno} = 1;
10572             $rbrace_left->{$seqno}       = 1;
10573             my $Ko = $K_opening_container->{$seqno};
10574             my $Kc = $K_closing_container->{$seqno};
10575             if ( defined($Ko) && defined($Kc) ) {
10576                 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
10577             }
10578         }
10579     }
10580     return;
10581 }
10582
10583 sub find_multiline_qw {
10584
10585     my $self = shift;
10586
10587     # Multiline qw quotes are not sequenced items like containers { [ (
10588     # but behave in some respects in a similar way. So this routine finds them
10589     # and creates a separate sequence number system for later use.
10590
10591     # This is straightforward because they always begin at the end of one line
10592     # and and at the beginning of a later line. This is true no matter how we
10593     # finally make our line breaks, so we can find them before deciding on new
10594     # line breaks.
10595
10596     my $rstarting_multiline_qw_seqno_by_K = {};
10597     my $rending_multiline_qw_seqno_by_K   = {};
10598     my $rKrange_multiline_qw_by_seqno     = {};
10599     my $rmultiline_qw_has_extra_level     = {};
10600
10601     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
10602
10603     my $rlines = $self->[_rlines_];
10604     my $rLL    = $self->[_rLL_];
10605     my $qw_seqno;
10606     my $num_qw_seqno = 0;
10607     my $K_start_multiline_qw;
10608
10609     foreach my $line_of_tokens ( @{$rlines} ) {
10610
10611         my $line_type = $line_of_tokens->{_line_type};
10612         next unless ( $line_type eq 'CODE' );
10613         my $rK_range = $line_of_tokens->{_rK_range};
10614         my ( $Kfirst, $Klast ) = @{$rK_range};
10615         next unless ( defined($Kfirst) && defined($Klast) );   # skip blank line
10616         if ( defined($K_start_multiline_qw) ) {
10617             my $type = $rLL->[$Kfirst]->[_TYPE_];
10618
10619             # shouldn't happen
10620             if ( $type ne 'q' ) {
10621                 DEVEL_MODE && print STDERR <<EOM;
10622 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
10623 EOM
10624                 $K_start_multiline_qw = undef;
10625                 next;
10626             }
10627             my $Kprev  = $self->K_previous_nonblank($Kfirst);
10628             my $Knext  = $self->K_next_nonblank($Kfirst);
10629             my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10630             my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10631             if ( $type_m eq 'q' && $type_p ne 'q' ) {
10632                 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
10633                 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
10634                   [ $K_start_multiline_qw, $Kfirst ];
10635                 $K_start_multiline_qw = undef;
10636                 $qw_seqno             = undef;
10637             }
10638         }
10639         if ( !defined($K_start_multiline_qw)
10640             && $rLL->[$Klast]->[_TYPE_] eq 'q' )
10641         {
10642             my $Kprev  = $self->K_previous_nonblank($Klast);
10643             my $Knext  = $self->K_next_nonblank($Klast);
10644             my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10645             my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10646             if ( $type_m ne 'q' && $type_p eq 'q' ) {
10647                 $num_qw_seqno++;
10648                 $qw_seqno             = 'q' . $num_qw_seqno;
10649                 $K_start_multiline_qw = $Klast;
10650                 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
10651             }
10652         }
10653     }
10654
10655     # Give multiline qw lists extra indentation instead of CI.  This option
10656     # works well but is currently only activated when the -xci flag is set.
10657     # The reason is to avoid unexpected changes in formatting.
10658     if ($rOpts_extended_continuation_indentation) {
10659         while ( my ( $qw_seqno, $rKrange ) =
10660             each %{$rKrange_multiline_qw_by_seqno} )
10661         {
10662             my ( $Kbeg, $Kend ) = @{$rKrange};
10663
10664             # require isolated closing token
10665             my $token_end = $rLL->[$Kend]->[_TOKEN_];
10666             next
10667               unless ( length($token_end) == 1
10668                 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
10669
10670             # require isolated opening token
10671             my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
10672
10673             # allow space(s) after the qw
10674             if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
10675             {
10676                 $token_beg =~ s/\s+//;
10677             }
10678
10679             next unless ( length($token_beg) == 3 );
10680
10681             foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
10682                 $rLL->[$KK]->[_LEVEL_]++;
10683                 $rLL->[$KK]->[_CI_LEVEL_] = 0;
10684             }
10685
10686             # set flag for -wn option, which will remove the level
10687             $rmultiline_qw_has_extra_level->{$qw_seqno} = 1;
10688         }
10689     }
10690
10691     # For the -lp option we need to mark all parent containers of
10692     # multiline quotes
10693     if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
10694
10695         while ( my ( $qw_seqno, $rKrange ) =
10696             each %{$rKrange_multiline_qw_by_seqno} )
10697         {
10698             my ( $Kbeg, $Kend ) = @{$rKrange};
10699             my $parent_seqno = $self->parent_seqno_by_K($Kend);
10700             next unless ($parent_seqno);
10701
10702             # If the parent container exactly surrounds this qw, then -lp
10703             # formatting seems to work so we will not mark it.
10704             my $is_tightly_contained;
10705             my $Kn      = $self->K_next_nonblank($Kend);
10706             my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
10707             if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
10708
10709                 my $Kp = $self->K_previous_nonblank($Kbeg);
10710                 my $seqno_p =
10711                   defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
10712                 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
10713                     $is_tightly_contained = 1;
10714                 }
10715             }
10716
10717             $ris_excluded_lp_container->{$parent_seqno} = 1
10718               unless ($is_tightly_contained);
10719
10720             # continue up the tree marking parent containers
10721             while (1) {
10722                 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
10723                 last
10724                   unless ( defined($parent_seqno)
10725                     && $parent_seqno ne SEQ_ROOT );
10726                 $ris_excluded_lp_container->{$parent_seqno} = 1;
10727             }
10728         }
10729     }
10730
10731     $self->[_rstarting_multiline_qw_seqno_by_K_] =
10732       $rstarting_multiline_qw_seqno_by_K;
10733     $self->[_rending_multiline_qw_seqno_by_K_] =
10734       $rending_multiline_qw_seqno_by_K;
10735     $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
10736     $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
10737
10738     return;
10739 }
10740
10741 use constant DEBUG_COLLAPSED_LENGTHS => 0;
10742
10743 # Minimum space reserved for contents of a code block.  A value of 40 has given
10744 # reasonable results.  With a large line length, say -l=120, this will not
10745 # normally be noticable but it will prevent making a mess in some edge cases.
10746 use constant MIN_BLOCK_LEN => 40;
10747
10748 my %is_handle_type;
10749
10750 BEGIN {
10751     my @q = qw( w C U G i k => );
10752     @is_handle_type{@q} = (1) x scalar(@q);
10753
10754     my $i = 0;
10755     use constant {
10756         _max_prong_len_         => $i++,
10757         _handle_len_            => $i++,
10758         _seqno_o_               => $i++,
10759         _iline_o_               => $i++,
10760         _K_o_                   => $i++,
10761         _K_c_                   => $i++,
10762         _interrupted_list_rule_ => $i++,
10763     };
10764 }
10765
10766 sub collapsed_lengths {
10767
10768     my $self = shift;
10769
10770     #----------------------------------------------------------------
10771     # Define the collapsed lengths of containers for -xlp indentation
10772     #----------------------------------------------------------------
10773
10774     # We need an estimate of the minimum required line length starting at any
10775     # opening container for the -xlp style. This is needed to avoid using too
10776     # much indentation space for lower level containers and thereby running
10777     # out of space for outer container tokens due to the maximum line length
10778     # limit.
10779
10780     # The basic idea is that at each node in the tree we imagine that we have a
10781     # fork with a handle and collapsable prongs:
10782     #
10783     #                            |------------
10784     #                            |--------
10785     #                ------------|-------
10786     #                 handle     |------------
10787     #                            |--------
10788     #                              prongs
10789     #
10790     # Each prong has a minimum collapsed length. The collapsed length at a node
10791     # is the maximum of these minimum lengths, plus the handle length.  Each of
10792     # the prongs may itself be a tree node.
10793
10794     # This is just a rough calculation to get an approximate starting point for
10795     # indentation.  Later routines will be more precise.  It is important that
10796     # these estimates be independent of the line breaks of the input stream in
10797     # order to avoid instabilities.
10798
10799     my $rLL                        = $self->[_rLL_];
10800     my $Klimit                     = $self->[_Klimit_];
10801     my $rlines                     = $self->[_rlines_];
10802     my $K_opening_container        = $self->[_K_opening_container_];
10803     my $K_closing_container        = $self->[_K_closing_container_];
10804     my $rblock_type_of_seqno       = $self->[_rblock_type_of_seqno_];
10805     my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
10806     my $ris_excluded_lp_container  = $self->[_ris_excluded_lp_container_];
10807     my $ris_permanently_broken     = $self->[_ris_permanently_broken_];
10808     my $ris_list_by_seqno          = $self->[_ris_list_by_seqno_];
10809     my $rhas_broken_list           = $self->[_rhas_broken_list_];
10810
10811     my $K_start_multiline_qw;
10812     my $level_start_multiline_qw = 0;
10813     my $max_prong_len            = 0;
10814     my $handle_len               = 0;
10815     my @stack;
10816     my $len                = 0;
10817     my $last_nonblank_type = 'b';
10818     push @stack,
10819       [ $max_prong_len, $handle_len, SEQ_ROOT, undef, undef, undef, undef ];
10820
10821     my $iline = -1;
10822     foreach my $line_of_tokens ( @{$rlines} ) {
10823         $iline++;
10824         my $line_type = $line_of_tokens->{_line_type};
10825         next if ( $line_type ne 'CODE' );
10826         my $CODE_type = $line_of_tokens->{_code_type};
10827
10828         # Always skip blank lines
10829         next if ( $CODE_type eq 'BL' );
10830
10831         # Note on other line types:
10832         # 'FS' (Format Skipping) lines may contain opening/closing tokens so
10833         #      we have to process them to keep the stack correctly sequenced.
10834         # 'VB' (Verbatim) lines could be skipped, but testing shows that
10835         #      results look better if we include their lengths.
10836
10837         # Also note that we could exclude -xlp formatting of containers with
10838         # 'FS' and 'VB' lines, but in testing that was not really beneficial.
10839
10840         # So we process tokens in 'FS' and 'VB' lines like all the rest...
10841
10842         my $rK_range = $line_of_tokens->{_rK_range};
10843         my ( $K_first, $K_last ) = @{$rK_range};
10844         next unless ( defined($K_first) && defined($K_last) );
10845
10846         my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
10847
10848         # Always ignore block comments
10849         next if ( $has_comment && $K_first == $K_last );
10850
10851         # Handle an intermediate line of a multiline qw quote. These may
10852         # require including some -ci or -i spaces.  See cases c098/x063.
10853         # Updated to check all lines (not just $K_first==$K_last) to fix b1316
10854         my $K_begin_loop = $K_first;
10855         if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
10856
10857             my $KK       = $K_first;
10858             my $level    = $rLL->[$KK]->[_LEVEL_];
10859             my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
10860
10861             # remember the level of the start
10862             if ( !defined($K_start_multiline_qw) ) {
10863                 $K_start_multiline_qw     = $K_first;
10864                 $level_start_multiline_qw = $level;
10865                 my $seqno_qw =
10866                   $self->[_rstarting_multiline_qw_seqno_by_K_]
10867                   ->{$K_start_multiline_qw};
10868                 if ( !$seqno_qw ) {
10869                     my $Kp = $self->K_previous_nonblank($K_first);
10870                     if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
10871
10872                         $K_start_multiline_qw = $Kp;
10873                         $level_start_multiline_qw =
10874                           $rLL->[$K_start_multiline_qw]->[_LEVEL_];
10875                     }
10876                 }
10877             }
10878
10879             $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
10880               $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
10881
10882             # We may have to add the spaces of one level or ci level ...  it
10883             # depends depends on the -xci flag, the -wn flag, and if the qw
10884             # uses a container token as the quote delimiter.
10885
10886             # First rule: add ci if there is a $ci_level
10887             if ($ci_level) {
10888                 $len += $rOpts_continuation_indentation;
10889             }
10890
10891             # Second rule: otherwise, look for an extra indentation level
10892             # from the start and add one indentation level if found.
10893             elsif ( $level > $level_start_multiline_qw ) {
10894                 $len += $rOpts_indent_columns;
10895             }
10896
10897             if ( $len > $max_prong_len ) { $max_prong_len = $len }
10898
10899             $last_nonblank_type = 'q';
10900
10901             $K_begin_loop = $K_first + 1;
10902
10903             # We can skip to the next line if more tokens
10904             next if ( $K_begin_loop > $K_last );
10905
10906         }
10907         $K_start_multiline_qw = undef;
10908
10909         # Find the terminal token, before any side comment
10910         my $K_terminal = $K_last;
10911         if ($has_comment) {
10912             $K_terminal -= 1;
10913             $K_terminal -= 1
10914               if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
10915                 && $K_terminal > $K_first );
10916         }
10917
10918         # Use length to terminal comma if interrupded list rule applies
10919         if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
10920             my $K_c = $stack[-1]->[_K_c_];
10921             if (
10922                 defined($K_c)
10923                 && $rLL->[$K_terminal]->[_TYPE_] eq ','
10924
10925                 # Ignore a terminal comma, causes instability (b1297)
10926                 && (   $K_c - $K_terminal > 2
10927                     || $rLL->[ $K_terminal + 1 ]->[_TYPE_] eq 'b' )
10928               )
10929             {
10930                 my $Kend = $K_terminal;
10931
10932                 # This caused an instability in b1311 by making the result
10933                 # dependent on input.  It is not really necessary because the
10934                 # comment length is added at the end of the loop.
10935                 ##if ( $has_comment
10936                 ##    && !$rOpts_ignore_side_comment_lengths )
10937                 ##{
10938                 ##    $Kend = $K_last;
10939                 ##}
10940
10941                 $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
10942                   $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
10943
10944                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
10945             }
10946         }
10947
10948         # Loop over tokens on this line ...
10949         foreach my $KK ( $K_begin_loop .. $K_terminal ) {
10950
10951             my $type = $rLL->[$KK]->[_TYPE_];
10952             next if ( $type eq 'b' );
10953
10954             #------------------------
10955             # Handle sequenced tokens
10956             #------------------------
10957             my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10958             if ($seqno) {
10959
10960                 my $token = $rLL->[$KK]->[_TOKEN_];
10961
10962                 #----------------------------
10963                 # Entering a new container...
10964                 #----------------------------
10965                 if ( $is_opening_token{$token} ) {
10966
10967                     # save current prong length
10968                     $stack[-1]->[_max_prong_len_] = $max_prong_len;
10969                     $max_prong_len = 0;
10970
10971                     # Start new prong one level deeper
10972                     my $handle_len = 0;
10973                     if ( $rblock_type_of_seqno->{$seqno} ) {
10974
10975                         # code blocks do not use -lp indentation, but behave as
10976                         # if they had a handle of one indentation length
10977                         $handle_len = $rOpts_indent_columns;
10978
10979                     }
10980                     elsif ( $is_handle_type{$last_nonblank_type} ) {
10981                         $handle_len = $len;
10982                         $handle_len += 1
10983                           if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
10984                     }
10985
10986                     # Set a flag if the 'Interrupted List Rule' will be applied
10987                     # (see sub copy_old_breakpoints).
10988                     # - Added check on has_broken_list to fix issue b1298
10989
10990                     my $interrupted_list_rule =
10991                          $ris_permanently_broken->{$seqno}
10992                       && $ris_list_by_seqno->{$seqno}
10993                       && !$rhas_broken_list->{$seqno}
10994                       && !$rOpts_ignore_old_breakpoints;
10995
10996                     # NOTES: Since we are looking at old line numbers we have
10997                     # to be very careful not to introduce an instability.
10998
10999                     # This following causes instability (b1288-b1296):
11000                     #   $interrupted_list_rule ||=
11001                     #     $rOpts_break_at_old_comma_breakpoints;
11002
11003                     #  - We could turn off the interrupted list rule if there is
11004                     #    a broken sublist, to follow 'Compound List Rule 1'.
11005                     #  - We could use the _rhas_broken_list_ flag for this.
11006                     #  - But it seems safer not to do this, to avoid
11007                     #    instability, since the broken sublist could be
11008                     #    temporary.  It seems better to let the formatting
11009                     #    stabilize by itself after one or two iterations.
11010                     #  - So, not doing this for now
11011
11012                     # Include length to a comma ending this line
11013                     if (   $interrupted_list_rule
11014                         && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
11015                     {
11016                         my $Kend = $K_terminal;
11017                         if ( $Kend < $K_last
11018                             && !$rOpts_ignore_side_comment_lengths )
11019                         {
11020                             $Kend = $K_last;
11021                         }
11022
11023                         # Measure from the next blank if any (fixes b1301)
11024                         my $Kbeg = $KK;
11025                         if (   $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
11026                             && $Kbeg < $Kend )
11027                         {
11028                             $Kbeg++;
11029                         }
11030
11031                         my $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
11032                           $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
11033                         if ( $len > $max_prong_len ) { $max_prong_len = $len }
11034                     }
11035
11036                     my $K_c = $K_closing_container->{$seqno};
11037
11038                     push @stack,
11039                       [
11040                         $max_prong_len, $handle_len,
11041                         $seqno,         $iline,
11042                         $KK,            $K_c,
11043                         $interrupted_list_rule
11044                       ];
11045                 }
11046
11047                 #--------------------
11048                 # Exiting a container
11049                 #--------------------
11050                 elsif ( $is_closing_token{$token} ) {
11051                     if (@stack) {
11052
11053                         # The current prong ends - get its handle
11054                         my $item          = pop @stack;
11055                         my $handle_len    = $item->[_handle_len_];
11056                         my $seqno_o       = $item->[_seqno_o_];
11057                         my $iline_o       = $item->[_iline_o_];
11058                         my $K_o           = $item->[_K_o_];
11059                         my $K_c_expect    = $item->[_K_c_];
11060                         my $collapsed_len = $max_prong_len;
11061
11062                         if ( $seqno_o ne $seqno ) {
11063
11064                             # Shouldn't happen - must have skipped some lines.
11065                             # Not fatal but -lp formatting could get messed up.
11066                             if (DEVEL_MODE) {
11067                                 Fault(<<EOM);
11068 sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
11069 EOM
11070                             }
11071                         }
11072
11073                         #------------------------------------------
11074                         # Rules to avoid scrunching code blocks ...
11075                         #------------------------------------------
11076                         # Some test cases:
11077                         # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
11078                         if ( $rblock_type_of_seqno->{$seqno} ) {
11079
11080                             my $K_c          = $KK;
11081                             my $block_length = MIN_BLOCK_LEN;
11082                             my $is_one_line_block;
11083                             my $level = $rLL->[$K_o]->[_LEVEL_];
11084                             if ( defined($K_o) && defined($K_c) ) {
11085                                 my $block_length =
11086                                   $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
11087                                   $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
11088                                 $is_one_line_block = $iline == $iline_o;
11089                             }
11090
11091                             # Code block rule 1: Use the total block length if
11092                             # it is less than the minimum.
11093                             if ( $block_length < MIN_BLOCK_LEN ) {
11094                                 $collapsed_len = $block_length;
11095                             }
11096
11097                             # Code block rule 2: Use the full length of a
11098                             # one-line block to avoid breaking it, unless
11099                             # extremely long.  We do not need to do a precise
11100                             # check here, because if it breaks then it will
11101                             # stay broken on later iterations.
11102                             elsif ($is_one_line_block
11103                                 && $block_length <
11104                                 $maximum_line_length_at_level[$level] )
11105                             {
11106                                 $collapsed_len = $block_length;
11107                             }
11108
11109                             # Code block rule 3: Otherwise the length should be
11110                             # at least MIN_BLOCK_LEN to avoid scrunching code
11111                             # blocks.
11112                             elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
11113                                 $collapsed_len = MIN_BLOCK_LEN;
11114                             }
11115                         }
11116
11117                         # Store the result.  Some extra space, '2', allows for
11118                         # length of an opening token, inside space, comma, ...
11119                         # This constant has been tuned to give good overall
11120                         # results.
11121                         $collapsed_len += 2;
11122                         $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
11123
11124                         # Restart scanning the lower level prong
11125                         if (@stack) {
11126                             $max_prong_len = $stack[-1]->[_max_prong_len_];
11127                             $collapsed_len += $handle_len;
11128                             if ( $collapsed_len > $max_prong_len ) {
11129                                 $max_prong_len = $collapsed_len;
11130                             }
11131                         }
11132                     }
11133                 }
11134
11135                 # it is a ternary - no special processing for these yet
11136                 else {
11137
11138                 }
11139
11140                 $len                = 0;
11141                 $last_nonblank_type = $type;
11142                 next;
11143             }
11144
11145             #----------------------------
11146             # Handle non-container tokens
11147             #----------------------------
11148             my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
11149
11150             # Count lengths of things like 'xx => yy' as a single item
11151             if ( $type eq '=>' ) {
11152                 $len += $token_length + 1;
11153                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11154             }
11155             elsif ( $last_nonblank_type eq '=>' ) {
11156                 $len += $token_length;
11157                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11158
11159                 # but only include one => per item
11160                 if ( $last_nonblank_type eq '=>' ) { $len = $token_length }
11161             }
11162
11163             # include everthing to end of line after a here target
11164             elsif ( $type eq 'h' ) {
11165                 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
11166                   $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11167                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11168             }
11169
11170             # for everything else just use the token length
11171             else {
11172                 $len = $token_length;
11173                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11174             }
11175             $last_nonblank_type = $type;
11176
11177         } ## end loop over tokens on this line
11178
11179         # Now take care of any side comment
11180         if ($has_comment) {
11181             if ($rOpts_ignore_side_comment_lengths) {
11182                 $len = 0;
11183             }
11184             else {
11185
11186                 # For a side comment when -iscl is not set, measure length from
11187                 # the start of the previous nonblank token
11188                 my $len0 =
11189                     $K_terminal > 0
11190                   ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
11191                   : 0;
11192                 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
11193                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11194             }
11195         }
11196
11197     } ## end loop over lines
11198
11199     if (DEBUG_COLLAPSED_LENGTHS) {
11200         print "\nCollapsed lengths--\n";
11201         foreach
11202           my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
11203         {
11204             my $clen = $rcollapsed_length_by_seqno->{$key};
11205             print "$key -> $clen\n";
11206         }
11207     }
11208
11209     return;
11210 }
11211
11212 sub is_excluded_lp {
11213
11214     # Decide if this container is excluded by user request:
11215     #  returns true if this token is excluded (i.e., may not use -lp)
11216     #  returns false otherwise
11217
11218     # The control hash can either describe:
11219     #   what to exclude:  $line_up_parentheses_control_is_lxpl = 1, or
11220     #   what to include:  $line_up_parentheses_control_is_lxpl = 0
11221
11222     my ( $self, $KK ) = @_;
11223     my $rLL         = $self->[_rLL_];
11224     my $rtoken_vars = $rLL->[$KK];
11225     my $token       = $rtoken_vars->[_TOKEN_];
11226     my $rflags      = $line_up_parentheses_control_hash{$token};
11227
11228     #-----------------------------------------------
11229     # TEST #1: check match to listed container types
11230     #-----------------------------------------------
11231     if ( !defined($rflags) ) {
11232
11233         # There is no entry for this container, so we are done
11234         return !$line_up_parentheses_control_is_lxpl;
11235     }
11236
11237     my ( $flag1, $flag2 ) = @{$rflags};
11238
11239     #-----------------------------------------------------------
11240     # TEST #2: check match to flag1, the preceding nonblank word
11241     #-----------------------------------------------------------
11242     my $match_flag1 = !defined($flag1) || $flag1 eq '*';
11243     if ( !$match_flag1 ) {
11244
11245         # Find the previous token
11246         my ( $is_f, $is_k, $is_w );
11247         my $Kp = $self->K_previous_nonblank($KK);
11248         if ( defined($Kp) ) {
11249             my $type_p = $rLL->[$Kp]->[_TYPE_];
11250             my $seqno  = $rtoken_vars->[_TYPE_SEQUENCE_];
11251
11252             # keyword?
11253             $is_k = $type_p eq 'k';
11254
11255             # function call?
11256             $is_f = $self->[_ris_function_call_paren_]->{$seqno};
11257
11258             # either keyword or function call?
11259             $is_w = $is_k || $is_f;
11260         }
11261
11262         # Check for match based on flag1 and the previous token:
11263         if    ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
11264         elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
11265         elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
11266         elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
11267         elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
11268         elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
11269     }
11270
11271     # See if we can exclude this based on the flag1 test...
11272     if ($line_up_parentheses_control_is_lxpl) {
11273         return 1 if ($match_flag1);
11274     }
11275     else {
11276         return 1 if ( !$match_flag1 );
11277     }
11278
11279     #-------------------------------------------------------------
11280     # TEST #3: exclusion based on flag2 and the container contents
11281     #-------------------------------------------------------------
11282
11283     # Note that this is an exclusion test for both -lpxl or -lpil input methods
11284     # The options are:
11285     #  0 or blank: ignore container contents
11286     #  1 exclude non-lists or lists with sublists
11287     #  2 same as 1 but also exclude lists with code blocks
11288
11289     my $match_flag2;
11290     if ($flag2) {
11291
11292         my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11293
11294         my $is_list        = $self->[_ris_list_by_seqno_]->{$seqno};
11295         my $has_list       = $self->[_rhas_list_]->{$seqno};
11296         my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
11297         my $has_ternary    = $self->[_rhas_ternary_]->{$seqno};
11298
11299         if (  !$is_list
11300             || $has_list
11301             || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
11302         {
11303             $match_flag2 = 1;
11304         }
11305     }
11306     return $match_flag2;
11307 }
11308
11309 sub set_excluded_lp_containers {
11310
11311     my ($self) = @_;
11312     return unless ($rOpts_line_up_parentheses);
11313     my $rLL = $self->[_rLL_];
11314     return unless ( defined($rLL) && @{$rLL} );
11315
11316     my $K_opening_container       = $self->[_K_opening_container_];
11317     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
11318     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
11319
11320     foreach my $seqno ( keys %{$K_opening_container} ) {
11321
11322         # code blocks are always excluded by the -lp coding so we can skip them
11323         next if ( $rblock_type_of_seqno->{$seqno} );
11324
11325         my $KK = $K_opening_container->{$seqno};
11326         next unless defined($KK);
11327
11328         # see if a user exclusion rule turns off -lp for this container
11329         if ( $self->is_excluded_lp($KK) ) {
11330             $ris_excluded_lp_container->{$seqno} = 1;
11331         }
11332     }
11333     return;
11334 }
11335
11336 ######################################
11337 # CODE SECTION 6: Process line-by-line
11338 ######################################
11339
11340 sub process_all_lines {
11341
11342     #----------------------------------------------------------
11343     # Main loop to format all lines of a file according to type
11344     #----------------------------------------------------------
11345
11346     my $self                       = shift;
11347     my $rlines                     = $self->[_rlines_];
11348     my $sink_object                = $self->[_sink_object_];
11349     my $fh_tee                     = $self->[_fh_tee_];
11350     my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
11351     my $file_writer_object         = $self->[_file_writer_object_];
11352     my $logger_object              = $self->[_logger_object_];
11353     my $vertical_aligner_object    = $self->[_vertical_aligner_object_];
11354     my $save_logfile               = $self->[_save_logfile_];
11355
11356     # Note for RT#118553, leave only one newline at the end of a file.
11357     # Example code to do this is in comments below:
11358     # my $Opt_trim_ending_blank_lines = 0;
11359     # if ($Opt_trim_ending_blank_lines) {
11360     #     while ( my $line_of_tokens = pop @{$rlines} ) {
11361     #         my $line_type = $line_of_tokens->{_line_type};
11362     #         if ( $line_type eq 'CODE' ) {
11363     #             my $CODE_type = $line_of_tokens->{_code_type};
11364     #             next if ( $CODE_type eq 'BL' );
11365     #         }
11366     #         push @{$rlines}, $line_of_tokens;
11367     #         last;
11368     #     }
11369     # }
11370
11371    # But while this would be a trivial update, it would have very undesirable
11372    # side effects when perltidy is run from within an editor on a small snippet.
11373    # So this is best done with a separate filter, such
11374    # as 'delete_ending_blank_lines.pl' in the examples folder.
11375
11376     # Flag to prevent blank lines when POD occurs in a format skipping sect.
11377     my $in_format_skipping_section;
11378
11379     # set locations for blanks around long runs of keywords
11380     my $rwant_blank_line_after = $self->keyword_group_scan();
11381
11382     my $line_type      = "";
11383     my $i_last_POD_END = -10;
11384     my $i              = -1;
11385     foreach my $line_of_tokens ( @{$rlines} ) {
11386         $i++;
11387
11388         # insert blank lines requested for keyword sequences
11389         if (   $i > 0
11390             && defined( $rwant_blank_line_after->{ $i - 1 } )
11391             && $rwant_blank_line_after->{ $i - 1 } == 1 )
11392         {
11393             $self->want_blank_line();
11394         }
11395
11396         my $last_line_type = $line_type;
11397         $line_type = $line_of_tokens->{_line_type};
11398         my $input_line = $line_of_tokens->{_line_text};
11399
11400         # _line_type codes are:
11401         #   SYSTEM         - system-specific code before hash-bang line
11402         #   CODE           - line of perl code (including comments)
11403         #   POD_START      - line starting pod, such as '=head'
11404         #   POD            - pod documentation text
11405         #   POD_END        - last line of pod section, '=cut'
11406         #   HERE           - text of here-document
11407         #   HERE_END       - last line of here-doc (target word)
11408         #   FORMAT         - format section
11409         #   FORMAT_END     - last line of format section, '.'
11410         #   SKIP           - code skipping section
11411         #   SKIP_END       - last line of code skipping section, '#>>V'
11412         #   DATA_START     - __DATA__ line
11413         #   DATA           - unidentified text following __DATA__
11414         #   END_START      - __END__ line
11415         #   END            - unidentified text following __END__
11416         #   ERROR          - we are in big trouble, probably not a perl script
11417
11418         # put a blank line after an =cut which comes before __END__ and __DATA__
11419         # (required by podchecker)
11420         if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
11421             $i_last_POD_END = $i;
11422             $file_writer_object->reset_consecutive_blank_lines();
11423             if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
11424                 $self->want_blank_line();
11425             }
11426         }
11427
11428         # handle line of code..
11429         if ( $line_type eq 'CODE' ) {
11430
11431             my $CODE_type = $line_of_tokens->{_code_type};
11432             $in_format_skipping_section = $CODE_type eq 'FS';
11433
11434             # Handle blank lines
11435             if ( $CODE_type eq 'BL' ) {
11436
11437                 # Keep this blank? Start with the flag -kbl=n, where
11438                 #   n=0 ignore all old blank lines
11439                 #   n=1 stable: keep old blanks, but limited by -mbl=n
11440                 #   n=2 keep all old blank lines, regardless of -mbl=n
11441                 # If n=0 we delete all old blank lines and let blank line
11442                 # rules generate any needed blank lines.
11443                 my $kgb_keep = $rOpts_keep_old_blank_lines;
11444
11445                 # Then delete lines requested by the keyword-group logic if
11446                 # allowed
11447                 if (   $kgb_keep == 1
11448                     && defined( $rwant_blank_line_after->{$i} )
11449                     && $rwant_blank_line_after->{$i} == 2 )
11450                 {
11451                     $kgb_keep = 0;
11452                 }
11453
11454                 # But always keep a blank line following an =cut
11455                 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
11456                     $kgb_keep = 1;
11457                 }
11458
11459                 if ($kgb_keep) {
11460                     $self->flush($CODE_type);
11461                     $file_writer_object->write_blank_code_line(
11462                         $rOpts_keep_old_blank_lines == 2 );
11463                     $self->[_last_line_leading_type_] = 'b';
11464                 }
11465                 next;
11466             }
11467             else {
11468
11469                 # Let logger see all non-blank lines of code. This is a slow
11470                 # operation so we avoid it if it is not going to be saved.
11471                 if ( $save_logfile && $logger_object ) {
11472                     $logger_object->black_box( $line_of_tokens,
11473                         $vertical_aligner_object->get_output_line_number );
11474                 }
11475             }
11476
11477             # Handle Format Skipping (FS) and Verbatim (VB) Lines
11478             if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
11479                 $self->write_unindented_line("$input_line");
11480                 $file_writer_object->reset_consecutive_blank_lines();
11481                 next;
11482             }
11483
11484             # Handle all other lines of code
11485             $self->process_line_of_CODE($line_of_tokens);
11486         }
11487
11488         # handle line of non-code..
11489         else {
11490
11491             # set special flags
11492             my $skip_line = 0;
11493             if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
11494
11495                 # Pod docs should have a preceding blank line.  But stay
11496                 # out of __END__ and __DATA__ sections, because
11497                 # the user may be using this section for any purpose whatsoever
11498                 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
11499                 if ( $rOpts->{'trim-pod'} )   { $input_line =~ s/\s+$// }
11500                 if (   !$skip_line
11501                     && !$in_format_skipping_section
11502                     && $line_type eq 'POD_START'
11503                     && !$self->[_saw_END_or_DATA_] )
11504                 {
11505                     $self->want_blank_line();
11506                 }
11507             }
11508
11509             # leave the blank counters in a predictable state
11510             # after __END__ or __DATA__
11511             elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
11512                 $file_writer_object->reset_consecutive_blank_lines();
11513                 $self->[_saw_END_or_DATA_] = 1;
11514             }
11515
11516             # Patch to avoid losing blank lines after a code-skipping block;
11517             # fixes case c047.
11518             elsif ( $line_type eq 'SKIP_END' ) {
11519                 $file_writer_object->reset_consecutive_blank_lines();
11520             }
11521
11522             # write unindented non-code line
11523             if ( !$skip_line ) {
11524                 $self->write_unindented_line($input_line);
11525             }
11526         }
11527     }
11528     return;
11529
11530 } ## end sub process_all_lines
11531
11532 sub keyword_group_scan {
11533     my $self = shift;
11534
11535     #-------------------------------------------------------------------------
11536     # Called once per file to process any --keyword-group-blanks-* parameters.
11537     #-------------------------------------------------------------------------
11538
11539     # Manipulate blank lines around keyword groups (kgb* flags)
11540     # Scan all lines looking for runs of consecutive lines beginning with
11541     # selected keywords.  Example keywords are 'my', 'our', 'local', ... but
11542     # they may be anything.  We will set flags requesting that blanks be
11543     # inserted around and within them according to input parameters.  Note
11544     # that we are scanning the lines as they came in in the input stream, so
11545     # they are not necessarily well formatted.
11546
11547     # The output of this sub is a return hash ref whose keys are the indexes of
11548     # lines after which we desire a blank line.  For line index i:
11549     #     $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
11550     #     $rhash_of_desires->{$i} = 2 means we want blank line $i removed
11551     my $rhash_of_desires = {};
11552
11553     # Nothing to do if no blanks can be output. This test added to fix
11554     # case b760.
11555     if ( !$rOpts_maximum_consecutive_blank_lines ) {
11556         return $rhash_of_desires;
11557     }
11558
11559     my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'};   # '-kgbb'
11560     my $Opt_blanks_after  = $rOpts->{'keyword-group-blanks-after'};    # '-kgba'
11561     my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'};   # '-kgbi'
11562     my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'};   # '-kgbd'
11563     my $Opt_size          = $rOpts->{'keyword-group-blanks-size'};     # '-kgbs'
11564
11565     # A range of sizes can be input with decimal notation like 'min.max' with
11566     # any number of dots between the two numbers. Examples:
11567     #    string    =>    min    max  matches
11568     #    1.1             1      1    exactly 1
11569     #    1.3             1      3    1,2, or 3
11570     #    1..3            1      3    1,2, or 3
11571     #    5               5      -    5 or more
11572     #    6.              6      -    6 or more
11573     #    .2              -      2    up to 2
11574     #    1.0             1      0    nothing
11575     my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
11576     if (   $Opt_size_min && $Opt_size_min !~ /^\d+$/
11577         || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
11578     {
11579         Warn(<<EOM);
11580 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
11581 ignoring all -kgb flags
11582 EOM
11583
11584         # Turn this option off so that this message does not keep repeating
11585         # during iterations and other files.
11586         $rOpts->{'keyword-group-blanks-size'} = "";
11587         return $rhash_of_desires;
11588     }
11589     $Opt_size_min = 1 unless ($Opt_size_min);
11590
11591     if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
11592         return $rhash_of_desires;
11593     }
11594
11595     # codes for $Opt_blanks_before and $Opt_blanks_after:
11596     # 0 = never (delete if exist)
11597     # 1 = stable (keep unchanged)
11598     # 2 = always (insert if missing)
11599
11600     return $rhash_of_desires
11601       unless $Opt_size_min > 0
11602       && ( $Opt_blanks_before != 1
11603         || $Opt_blanks_after != 1
11604         || $Opt_blanks_inside
11605         || $Opt_blanks_delete );
11606
11607     my $Opt_pattern         = $keyword_group_list_pattern;
11608     my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
11609     my $Opt_repeat_count =
11610       $rOpts->{'keyword-group-blanks-repeat-count'};    # '-kgbr'
11611
11612     my $rlines              = $self->[_rlines_];
11613     my $rLL                 = $self->[_rLL_];
11614     my $K_closing_container = $self->[_K_closing_container_];
11615     my $K_opening_container = $self->[_K_opening_container_];
11616     my $rK_weld_right       = $self->[_rK_weld_right_];
11617
11618     # variables for the current group and subgroups:
11619     my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
11620         @subgroup );
11621
11622     # Definitions:
11623     # ($ibeg, $iend) = starting and ending line indexes of this entire group
11624     #         $count = total number of keywords seen in this entire group
11625     #     $level_beg = indententation level of this group
11626     #         @group = [ $i, $token, $count ] =list of all keywords & blanks
11627     #      @subgroup =  $j, index of group where token changes
11628     #       @iblanks = line indexes of blank lines in input stream in this group
11629     #  where i=starting line index
11630     #        token (the keyword)
11631     #        count = number of this token in this subgroup
11632     #            j = index in group where token changes
11633     #
11634     # These vars will contain values for the most recently seen line:
11635     my ( $line_type, $CODE_type, $K_first, $K_last );
11636
11637     my $number_of_groups_seen = 0;
11638
11639     #-------------------
11640     # helper subroutines
11641     #-------------------
11642
11643     my $insert_blank_after = sub {
11644         my ($i) = @_;
11645         $rhash_of_desires->{$i} = 1;
11646         my $ip = $i + 1;
11647         if ( defined( $rhash_of_desires->{$ip} )
11648             && $rhash_of_desires->{$ip} == 2 )
11649         {
11650             $rhash_of_desires->{$ip} = 0;
11651         }
11652         return;
11653     };
11654
11655     my $split_into_sub_groups = sub {
11656
11657         # place blanks around long sub-groups of keywords
11658         # ...if requested
11659         return unless ($Opt_blanks_inside);
11660
11661         # loop over sub-groups, index k
11662         push @subgroup, scalar @group;
11663         my $kbeg = 1;
11664         my $kend = @subgroup - 1;
11665         for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
11666
11667             # index j runs through all keywords found
11668             my $j_b = $subgroup[ $k - 1 ];
11669             my $j_e = $subgroup[$k] - 1;
11670
11671             # index i is the actual line number of a keyword
11672             my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
11673             my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
11674             my $num = $count_e - $count_b + 1;
11675
11676             # This subgroup runs from line $ib to line $ie-1, but may contain
11677             # blank lines
11678             if ( $num >= $Opt_size_min ) {
11679
11680                 # if there are blank lines, we require that at least $num lines
11681                 # be non-blank up to the boundary with the next subgroup.
11682                 my $nog_b = my $nog_e = 1;
11683                 if ( @iblanks && !$Opt_blanks_delete ) {
11684                     my $j_bb = $j_b + $num - 1;
11685                     my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
11686                     $nog_b = $count_bb - $count_b + 1 == $num;
11687
11688                     my $j_ee = $j_e - ( $num - 1 );
11689                     my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
11690                     $nog_e = $count_e - $count_ee + 1 == $num;
11691                 }
11692                 if ( $nog_b && $k > $kbeg ) {
11693                     $insert_blank_after->( $i_b - 1 );
11694                 }
11695                 if ( $nog_e && $k < $kend ) {
11696                     my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
11697                     $insert_blank_after->( $i_ep - 1 );
11698                 }
11699             }
11700         }
11701         return;
11702     };
11703
11704     my $delete_if_blank = sub {
11705         my ($i) = @_;
11706
11707         # delete line $i if it is blank
11708         return unless ( $i >= 0 && $i < @{$rlines} );
11709         my $line_type = $rlines->[$i]->{_line_type};
11710         return if ( $line_type ne 'CODE' );
11711         my $code_type = $rlines->[$i]->{_code_type};
11712         if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
11713         return;
11714     };
11715
11716     my $delete_inner_blank_lines = sub {
11717
11718         # always remove unwanted trailing blank lines from our list
11719         return unless (@iblanks);
11720         while ( my $ibl = pop(@iblanks) ) {
11721             if ( $ibl < $iend ) { push @iblanks, $ibl; last }
11722             $iend = $ibl;
11723         }
11724
11725         # now mark mark interior blank lines for deletion if requested
11726         return unless ($Opt_blanks_delete);
11727
11728         while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
11729
11730         return;
11731     };
11732
11733     my $end_group = sub {
11734
11735         # end a group of keywords
11736         my ($bad_ending) = @_;
11737         if ( defined($ibeg) && $ibeg >= 0 ) {
11738
11739             # then handle sufficiently large groups
11740             if ( $count >= $Opt_size_min ) {
11741
11742                 $number_of_groups_seen++;
11743
11744                 # do any blank deletions regardless of the count
11745                 $delete_inner_blank_lines->();
11746
11747                 if ( $ibeg > 0 ) {
11748                     my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
11749
11750                     # patch for hash bang line which is not currently marked as
11751                     # a comment; mark it as a comment
11752                     if ( $ibeg == 1 && !$code_type ) {
11753                         my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
11754                         $code_type = 'BC'
11755                           if ( $line_text && $line_text =~ /^#/ );
11756                     }
11757
11758                     # Do not insert a blank after a comment
11759                     # (this could be subject to a flag in the future)
11760                     if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
11761                         if ( $Opt_blanks_before == INSERT ) {
11762                             $insert_blank_after->( $ibeg - 1 );
11763
11764                         }
11765                         elsif ( $Opt_blanks_before == DELETE ) {
11766                             $delete_if_blank->( $ibeg - 1 );
11767                         }
11768                     }
11769                 }
11770
11771                 # We will only put blanks before code lines. We could loosen
11772                 # this rule a little, but we have to be very careful because
11773                 # for example we certainly don't want to drop a blank line
11774                 # after a line like this:
11775                 #   my $var = <<EOM;
11776                 if ( $line_type eq 'CODE' && defined($K_first) ) {
11777
11778                     # - Do not put a blank before a line of different level
11779                     # - Do not put a blank line if we ended the search badly
11780                     # - Do not put a blank at the end of the file
11781                     # - Do not put a blank line before a hanging side comment
11782                     my $level    = $rLL->[$K_first]->[_LEVEL_];
11783                     my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
11784
11785                     if (   $level == $level_beg
11786                         && $ci_level == 0
11787                         && !$bad_ending
11788                         && $iend < @{$rlines}
11789                         && $CODE_type ne 'HSC' )
11790                     {
11791                         if ( $Opt_blanks_after == INSERT ) {
11792                             $insert_blank_after->($iend);
11793                         }
11794                         elsif ( $Opt_blanks_after == DELETE ) {
11795                             $delete_if_blank->( $iend + 1 );
11796                         }
11797                     }
11798                 }
11799             }
11800             $split_into_sub_groups->();
11801         }
11802
11803         # reset for another group
11804         $ibeg      = -1;
11805         $iend      = undef;
11806         $level_beg = -1;
11807         $K_closing = undef;
11808         @group     = ();
11809         @subgroup  = ();
11810         @iblanks   = ();
11811
11812         return;
11813     };
11814
11815     my $find_container_end = sub {
11816
11817         # If the keyword line is continued onto subsequent lines, find the
11818         # closing token '$K_closing' so that we can easily skip past the
11819         # contents of the container.
11820
11821         # We only set this value if we find a simple list, meaning
11822         # -contents only one level deep
11823         # -not welded
11824
11825         # First check: skip if next line is not one deeper
11826         my $Knext_nonblank = $self->K_next_nonblank($K_last);
11827         goto RETURN if ( !defined($Knext_nonblank) );
11828         my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
11829         goto RETURN if ( $level_next != $level_beg + 1 );
11830
11831         # Find the parent container of the first token on the next line
11832         my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
11833         goto RETURN unless ( defined($parent_seqno) );
11834
11835         # Must not be a weld (can be unstable)
11836         goto RETURN
11837           if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
11838
11839         # Opening container must exist and be on this line
11840         my $Ko = $K_opening_container->{$parent_seqno};
11841         goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
11842
11843         # Verify that the closing container exists and is on a later line
11844         my $Kc = $K_closing_container->{$parent_seqno};
11845         goto RETURN unless ( defined($Kc) && $Kc > $K_last );
11846
11847         # That's it
11848         $K_closing = $Kc;
11849         goto RETURN;
11850
11851       RETURN:
11852         return;
11853     };
11854
11855     my $add_to_group = sub {
11856         my ( $i, $token, $level ) = @_;
11857
11858         # End the previous group if we have reached the maximum
11859         # group size
11860         if ( $Opt_size_max && @group >= $Opt_size_max ) {
11861             $end_group->();
11862         }
11863
11864         if ( @group == 0 ) {
11865             $ibeg      = $i;
11866             $level_beg = $level;
11867             $count     = 0;
11868         }
11869
11870         $count++;
11871         $iend = $i;
11872
11873         # New sub-group?
11874         if ( !@group || $token ne $group[-1]->[1] ) {
11875             push @subgroup, scalar(@group);
11876         }
11877         push @group, [ $i, $token, $count ];
11878
11879         # remember if this line ends in an open container
11880         $find_container_end->();
11881
11882         return;
11883     };
11884
11885     #----------------------------------
11886     # loop over all lines of the source
11887     #----------------------------------
11888     $end_group->();
11889     my $i = -1;
11890     foreach my $line_of_tokens ( @{$rlines} ) {
11891
11892         $i++;
11893         last
11894           if ( $Opt_repeat_count > 0
11895             && $number_of_groups_seen >= $Opt_repeat_count );
11896
11897         $CODE_type = "";
11898         $K_first   = undef;
11899         $K_last    = undef;
11900         $line_type = $line_of_tokens->{_line_type};
11901
11902         # always end a group at non-CODE
11903         if ( $line_type ne 'CODE' ) { $end_group->(); next }
11904
11905         $CODE_type = $line_of_tokens->{_code_type};
11906
11907         # end any group at a format skipping line
11908         if ( $CODE_type && $CODE_type eq 'FS' ) {
11909             $end_group->();
11910             next;
11911         }
11912
11913         # continue in a verbatim (VB) type; it may be quoted text
11914         if ( $CODE_type eq 'VB' ) {
11915             if ( $ibeg >= 0 ) { $iend = $i; }
11916             next;
11917         }
11918
11919         # and continue in blank (BL) types
11920         if ( $CODE_type eq 'BL' ) {
11921             if ( $ibeg >= 0 ) {
11922                 $iend = $i;
11923                 push @{iblanks}, $i;
11924
11925                 # propagate current subgroup token
11926                 my $tok = $group[-1]->[1];
11927                 push @group, [ $i, $tok, $count ];
11928             }
11929             next;
11930         }
11931
11932         # examine the first token of this line
11933         my $rK_range = $line_of_tokens->{_rK_range};
11934         ( $K_first, $K_last ) = @{$rK_range};
11935         if ( !defined($K_first) ) {
11936
11937             # Somewhat unexpected blank line..
11938             # $rK_range is normally defined for line type CODE, but this can
11939             # happen for example if the input line was a single semicolon which
11940             # is being deleted.  In that case there was code in the input
11941             # file but it is not being retained. So we can silently return.
11942             return $rhash_of_desires;
11943         }
11944
11945         my $level    = $rLL->[$K_first]->[_LEVEL_];
11946         my $type     = $rLL->[$K_first]->[_TYPE_];
11947         my $token    = $rLL->[$K_first]->[_TOKEN_];
11948         my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
11949
11950         # End a group 'badly' at an unexpected level.  This will prevent
11951         # blank lines being incorrectly placed after the end of the group.
11952         # We are looking for any deviation from two acceptable patterns:
11953         #   PATTERN 1: a simple list; secondary lines are at level+1
11954         #   PATTERN 2: a long statement; all secondary lines same level
11955         # This was added as a fix for case b1177, in which a complex structure
11956         # got incorrectly inserted blank lines.
11957         if ( $ibeg >= 0 ) {
11958
11959             # Check for deviation from PATTERN 1, simple list:
11960             if ( defined($K_closing) && $K_first < $K_closing ) {
11961                 $end_group->(1) if ( $level != $level_beg + 1 );
11962             }
11963
11964             # Check for deviation from PATTERN 2, single statement:
11965             elsif ( $level != $level_beg ) { $end_group->(1) }
11966         }
11967
11968         # Do not look for keywords in lists ( keyword 'my' can occur in lists,
11969         # see case b760); fixed for c048.
11970         if ( $self->is_list_by_K($K_first) ) {
11971             if ( $ibeg >= 0 ) { $iend = $i }
11972             next;
11973         }
11974
11975         # see if this is a code type we seek (i.e. comment)
11976         if (   $CODE_type
11977             && $Opt_comment_pattern
11978             && $CODE_type =~ /$Opt_comment_pattern/ )
11979         {
11980
11981             my $tok = $CODE_type;
11982
11983             # Continuing a group
11984             if ( $ibeg >= 0 && $level == $level_beg ) {
11985                 $add_to_group->( $i, $tok, $level );
11986             }
11987
11988             # Start new group
11989             else {
11990
11991                 # first end old group if any; we might be starting new
11992                 # keywords at different level
11993                 if ( $ibeg >= 0 ) { $end_group->(); }
11994                 $add_to_group->( $i, $tok, $level );
11995             }
11996             next;
11997         }
11998
11999         # See if it is a keyword we seek, but never start a group in a
12000         # continuation line; the code may be badly formatted.
12001         if (   $ci_level == 0
12002             && $type eq 'k'
12003             && $token =~ /$Opt_pattern/ )
12004         {
12005
12006             # Continuing a keyword group
12007             if ( $ibeg >= 0 && $level == $level_beg ) {
12008                 $add_to_group->( $i, $token, $level );
12009             }
12010
12011             # Start new keyword group
12012             else {
12013
12014                 # first end old group if any; we might be starting new
12015                 # keywords at different level
12016                 if ( $ibeg >= 0 ) { $end_group->(); }
12017                 $add_to_group->( $i, $token, $level );
12018             }
12019             next;
12020         }
12021
12022         # This is not one of our keywords, but we are in a keyword group
12023         # so see if we should continue or quit
12024         elsif ( $ibeg >= 0 ) {
12025
12026             # - bail out on a large level change; we may have walked into a
12027             #   data structure or anoymous sub code.
12028             if ( $level > $level_beg + 1 || $level < $level_beg ) {
12029                 $end_group->(1);
12030                 next;
12031             }
12032
12033             # - keep going on a continuation line of the same level, since
12034             #   it is probably a continuation of our previous keyword,
12035             # - and keep going past hanging side comments because we never
12036             #   want to interrupt them.
12037             if ( ( ( $level == $level_beg ) && $ci_level > 0 )
12038                 || $CODE_type eq 'HSC' )
12039             {
12040                 $iend = $i;
12041                 next;
12042             }
12043
12044             # - continue if if we are within in a container which started with
12045             # the line of the previous keyword.
12046             if ( defined($K_closing) && $K_first <= $K_closing ) {
12047
12048                 # continue if entire line is within container
12049                 if ( $K_last <= $K_closing ) { $iend = $i; next }
12050
12051                 # continue at ); or }; or ];
12052                 my $KK = $K_closing + 1;
12053                 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
12054                     if ( $KK < $K_last ) {
12055                         if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
12056                         if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
12057                             $end_group->(1);
12058                             next;
12059                         }
12060                     }
12061                     $iend = $i;
12062                     next;
12063                 }
12064
12065                 $end_group->(1);
12066                 next;
12067             }
12068
12069             # - end the group if none of the above
12070             $end_group->();
12071             next;
12072         }
12073
12074         # not in a keyword group; continue
12075         else { next }
12076     }
12077
12078     # end of loop over all lines
12079     $end_group->();
12080     return $rhash_of_desires;
12081
12082 } ## end sub keyword_group_scan
12083
12084 #######################################
12085 # CODE SECTION 7: Process lines of code
12086 #######################################
12087
12088 {    ## begin closure process_line_of_CODE
12089
12090     # The routines in this closure receive lines of code and combine them into
12091     # 'batches' and send them along. A 'batch' is the unit of code which can be
12092     # processed further as a unit. It has the property that it is the largest
12093     # amount of code into which which perltidy is free to place one or more
12094     # line breaks within it without violating any constraints.
12095
12096     # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
12097
12098     # flags needed by the store routine
12099     my $line_of_tokens;
12100     my $no_internal_newlines;
12101     my $CODE_type;
12102
12103     # range of K of tokens for the current line
12104     my ( $K_first, $K_last );
12105
12106     my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
12107         $rblock_type_of_seqno, $ri_starting_one_line_block );
12108
12109     # past stored nonblank tokens and flags
12110     my (
12111         $K_last_nonblank_code, $K_last_last_nonblank_code,
12112         $looking_for_else,     $is_static_block_comment,
12113         $batch_CODE_type,      $last_line_had_side_comment,
12114         $next_parent_seqno,    $next_slevel,
12115     );
12116
12117     # Called once at the start of a new file
12118     sub initialize_process_line_of_CODE {
12119         $K_last_nonblank_code       = undef;
12120         $K_last_last_nonblank_code  = undef;
12121         $looking_for_else           = 0;
12122         $is_static_block_comment    = 0;
12123         $batch_CODE_type            = "";
12124         $last_line_had_side_comment = 0;
12125         $next_parent_seqno          = SEQ_ROOT;
12126         $next_slevel                = undef;
12127         return;
12128     }
12129
12130     # Batch variables: these describe the current batch of code being formed
12131     # and sent down the pipeline.  They are initialized in the next
12132     # sub.
12133     my ( $rbrace_follower, $index_start_one_line_block,
12134         $semicolons_before_block_self_destruct,
12135         $starting_in_quote, $ending_in_quote, );
12136
12137     # Called before the start of each new batch
12138     sub initialize_batch_variables {
12139
12140         $max_index_to_go            = UNDEFINED_INDEX;
12141         @summed_lengths_to_go       = @nesting_depth_to_go = (0);
12142         $ri_starting_one_line_block = [];
12143
12144         # The initialization code for the remaining batch arrays is as follows
12145         # and can be activated for testing.  But profiling shows that it is
12146         # time-consuming to re-initialize the batch arrays and is not necessary
12147         # because the maximum valid token, $max_index_to_go, is carefully
12148         # controlled.  This means however that it is not possible to do any
12149         # type of filter or map operation directly on these arrays.  And it is
12150         # not possible to use negative indexes. As a precaution against program
12151         # changes which might do this, sub pad_array_to_go adds some undefs at
12152         # the end of the current batch of data.
12153
12154         # So 'long story short': this is a waste of time
12155         0 && do { #<<<
12156         @block_type_to_go        = ();
12157         @type_sequence_to_go     = ();
12158         @bond_strength_to_go     = ();
12159         @forced_breakpoint_to_go = ();
12160         @token_lengths_to_go     = ();
12161         @levels_to_go            = ();
12162         @mate_index_to_go        = ();
12163         @ci_levels_to_go         = ();
12164         @nobreak_to_go           = ();
12165         @old_breakpoint_to_go    = ();
12166         @tokens_to_go            = ();
12167         @K_to_go                 = ();
12168         @types_to_go             = ();
12169         @leading_spaces_to_go    = ();
12170         @reduced_spaces_to_go    = ();
12171         @inext_to_go             = ();
12172         @iprev_to_go             = ();
12173         @parent_seqno_to_go      = ();
12174         };
12175
12176         $rbrace_follower = undef;
12177         $ending_in_quote = 0;
12178         destroy_one_line_block();
12179         return;
12180     }
12181
12182     sub leading_spaces_to_go {
12183
12184         # return the number of indentation spaces for a token in the output
12185         # stream
12186
12187         my ($ii) = @_;
12188         return 0 if ( $ii < 0 );
12189         my $indentation = $leading_spaces_to_go[$ii];
12190         return ref($indentation) ? $indentation->get_spaces() : $indentation;
12191     }
12192
12193     sub create_one_line_block {
12194         ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
12195           = @_;
12196         return;
12197     }
12198
12199     sub destroy_one_line_block {
12200         $index_start_one_line_block            = UNDEFINED_INDEX;
12201         $semicolons_before_block_self_destruct = 0;
12202         return;
12203     }
12204
12205     # Routine to place the current token into the output stream.
12206     # Called once per output token.
12207
12208     use constant DEBUG_STORE => 0;
12209
12210     sub store_token_to_go {
12211
12212         my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
12213
12214         # Add one token to the next batch.
12215         #   $Ktoken_vars = the index K in the global token array
12216         #   $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
12217         #                  unless they are temporarily being overridden
12218
12219         my $type = $rtoken_vars->[_TYPE_];
12220
12221         # Check for emergency flush...
12222         # The K indexes in the batch must always be a continuous sequence of
12223         # the global token array.  The batch process programming assumes this.
12224         # If storing this token would cause this relation to fail we must dump
12225         # the current batch before storing the new token.  It is extremely rare
12226         # for this to happen. One known example is the following two-line
12227         # snippet when run with parameters
12228         # --noadd-newlines  --space-terminal-semicolon:
12229         #    if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
12230         #    $yy=1;
12231         if ( $max_index_to_go >= 0 ) {
12232             my $Klast = $K_to_go[$max_index_to_go];
12233             if ( $Ktoken_vars != $Klast + 1 ) {
12234                 $self->flush_batch_of_CODE();
12235             }
12236
12237             # Do not output consecutive blank tokens ... this should not
12238             # happen, but it is worth checking.  Later code can then make the
12239             # simplifying assumption that blank tokens are not consecutive.
12240             elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
12241
12242                 if (DEVEL_MODE) {
12243
12244                     # if this happens, it is may be that consecutive blanks
12245                     # were inserted into the token stream in 'respace_tokens'
12246                     my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
12247                     Fault("consecutive blanks near line $lno; please fix");
12248                 }
12249                 return;
12250             }
12251         }
12252
12253         # Do not start a batch with a blank token.
12254         # Fixes cases b149 b888 b984 b985 b986 b987
12255         else {
12256             if ( $type eq 'b' ) { return }
12257         }
12258
12259         ++$max_index_to_go;
12260         $batch_CODE_type               = $CODE_type;
12261         $K_to_go[$max_index_to_go]     = $Ktoken_vars;
12262         $types_to_go[$max_index_to_go] = $type;
12263
12264         $old_breakpoint_to_go[$max_index_to_go]    = 0;
12265         $forced_breakpoint_to_go[$max_index_to_go] = 0;
12266         $mate_index_to_go[$max_index_to_go]        = -1;
12267
12268         my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
12269         my $ci_level = $ci_levels_to_go[$max_index_to_go] =
12270           $rtoken_vars->[_CI_LEVEL_];
12271
12272         # Clip levels to zero if there are level errors in the file.
12273         # We had to wait until now for reasons explained in sub 'write_line'.
12274         my $level = $rtoken_vars->[_LEVEL_];
12275         if ( $level < 0 ) { $level = 0 }
12276         $levels_to_go[$max_index_to_go] = $level;
12277
12278         my $seqno = $type_sequence_to_go[$max_index_to_go] =
12279           $rtoken_vars->[_TYPE_SEQUENCE_];
12280
12281         if ( $max_index_to_go == 0 ) {
12282
12283             # Update the next parent sequence number for each new batch.
12284
12285             #------------------------------------------
12286             # Begin coding from sub parent_seqno_from_K
12287             #------------------------------------------
12288
12289             ## $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
12290             $next_parent_seqno = SEQ_ROOT;
12291             if ($seqno) {
12292                 $next_parent_seqno = $rparent_of_seqno->{$seqno};
12293             }
12294             else {
12295                 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
12296                 if ( defined($Kt) ) {
12297                     my $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
12298                     my $type          = $rLL->[$Kt]->[_TYPE_];
12299
12300                     # if next container token is closing, it is the parent seqno
12301                     if ( $is_closing_type{$type} ) {
12302                         $next_parent_seqno = $type_sequence;
12303                     }
12304
12305                     # otherwise we want its parent container
12306                     else {
12307                         $next_parent_seqno =
12308                           $rparent_of_seqno->{$type_sequence};
12309                     }
12310                 }
12311             }
12312             $next_parent_seqno = SEQ_ROOT
12313               unless ( defined($next_parent_seqno) );
12314
12315             #----------------------------------------
12316             # End coding from sub parent_seqno_from_K
12317             #----------------------------------------
12318
12319             $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
12320         }
12321
12322         # Initialize some sequence-dependent variables to their normal values
12323         my $parent_seqno = $next_parent_seqno;
12324         my $slevel       = $next_slevel;
12325         my $block_type   = "";
12326
12327         # Then fix them at container tokens:
12328         if ($seqno) {
12329             if ( $is_opening_token{$token} ) {
12330                 $next_parent_seqno = $seqno;
12331                 $slevel            = $rdepth_of_opening_seqno->[$seqno];
12332                 $next_slevel       = $slevel + 1;
12333                 $block_type        = $rblock_type_of_seqno->{$seqno};
12334             }
12335             elsif ( $is_closing_token{$token} ) {
12336                 $next_slevel       = $rdepth_of_opening_seqno->[$seqno];
12337                 $slevel            = $next_slevel + 1;
12338                 $block_type        = $rblock_type_of_seqno->{$seqno};
12339                 $parent_seqno      = $rparent_of_seqno->{$seqno};
12340                 $parent_seqno      = SEQ_ROOT unless defined($parent_seqno);
12341                 $next_parent_seqno = $parent_seqno;
12342             }
12343             else {
12344                 # ternary token: nothing to do
12345             }
12346             $block_type = "" unless ( defined($block_type) );
12347         }
12348
12349         $parent_seqno_to_go[$max_index_to_go]  = $parent_seqno;
12350         $nesting_depth_to_go[$max_index_to_go] = $slevel;
12351         $block_type_to_go[$max_index_to_go]    = $block_type;
12352         $nobreak_to_go[$max_index_to_go]       = $no_internal_newlines;
12353
12354         my $length = $rtoken_vars->[_TOKEN_LENGTH_];
12355
12356         # Safety check that length is defined. Should not be needed now.
12357         # Former patch for indent-only, in which the entire set of tokens is
12358         # turned into type 'q'. Lengths may have not been defined because sub
12359         # 'respace_tokens' is bypassed. We do not need lengths in this case,
12360         # but we will use the character count to have a defined value.  In the
12361         # future, it would be nicer to have 'respace_tokens' convert the lines
12362         # to quotes and get correct lengths.
12363         if ( !defined($length) ) { $length = length($token) }
12364
12365         $token_lengths_to_go[$max_index_to_go] = $length;
12366
12367         # We keep a running sum of token lengths from the start of this batch:
12368         #   summed_lengths_to_go[$i]   = total length to just before token $i
12369         #   summed_lengths_to_go[$i+1] = total length to just after token $i
12370         $summed_lengths_to_go[ $max_index_to_go + 1 ] =
12371           $summed_lengths_to_go[$max_index_to_go] + $length;
12372
12373         my $in_continued_quote =
12374           ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
12375         if ( $max_index_to_go == 0 ) {
12376             $starting_in_quote = $in_continued_quote;
12377         }
12378
12379         # Define the indentation that this token will have in two cases:
12380         # Without CI = reduced_spaces_to_go
12381         # With CI    = leading_spaces_to_go
12382         if ($in_continued_quote) {
12383             $leading_spaces_to_go[$max_index_to_go] = 0;
12384             $reduced_spaces_to_go[$max_index_to_go] = 0;
12385         }
12386         else {
12387             $reduced_spaces_to_go[$max_index_to_go] = my $reduced_spaces =
12388               $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
12389             $leading_spaces_to_go[$max_index_to_go] =
12390               $reduced_spaces + $rOpts_continuation_indentation * $ci_level;
12391         }
12392         $standard_spaces_to_go[$max_index_to_go] =
12393           $leading_spaces_to_go[$max_index_to_go];
12394
12395         DEBUG_STORE && do {
12396             my ( $a, $b, $c ) = caller();
12397             print STDOUT
12398 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
12399         };
12400         return;
12401     }
12402
12403     sub flush_batch_of_CODE {
12404
12405         # Finish any batch packaging and call the process routine.
12406         # This must be the only call to grind_batch_of_CODE()
12407         my ($self) = @_;
12408
12409         return unless ( $max_index_to_go >= 0 );
12410
12411         # Create an array to hold variables for this batch
12412         my $this_batch = [];
12413         $this_batch->[_starting_in_quote_] = $starting_in_quote;
12414         $this_batch->[_ending_in_quote_]   = $ending_in_quote;
12415         $this_batch->[_max_index_to_go_]   = $max_index_to_go;
12416         $this_batch->[_batch_CODE_type_]   = $batch_CODE_type;
12417
12418         # The flag $is_static_block_comment applies to the line which just
12419         # arrived. So it only applies if we are outputting that line.
12420         $this_batch->[_is_static_block_comment_] =
12421              defined($K_first)
12422           && $max_index_to_go == 0
12423           && $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
12424
12425         $this_batch->[_ri_starting_one_line_block_] =
12426           $ri_starting_one_line_block;
12427
12428         $self->[_this_batch_] = $this_batch;
12429
12430         $last_line_had_side_comment =
12431           $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
12432
12433         $self->grind_batch_of_CODE();
12434
12435         # Done .. this batch is history
12436         $self->[_this_batch_] = [];
12437
12438         initialize_batch_variables();
12439         initialize_forced_breakpoint_vars();
12440
12441         return;
12442     }
12443
12444     sub end_batch {
12445
12446         # end the current batch, EXCEPT for a few special cases
12447         my ($self) = @_;
12448
12449         if ( $max_index_to_go < 0 ) {
12450
12451             # This is harmless but should be elimintated in development
12452             if (DEVEL_MODE) {
12453                 Fault("End batch called with nothing to do; please fix\n");
12454             }
12455             return;
12456         }
12457
12458         # Exceptions when a line does not end with a comment... (fixes c058)
12459         if ( $types_to_go[$max_index_to_go] ne '#' ) {
12460
12461             # Exception 1: Do not end line in a weld
12462             return
12463               if ( $total_weld_count
12464                 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
12465
12466             # Exception 2: just set a tentative breakpoint if we might be in a
12467             # one-line block
12468             if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12469                 $self->set_forced_breakpoint($max_index_to_go);
12470                 return;
12471             }
12472         }
12473
12474         $self->flush_batch_of_CODE();
12475         return;
12476     }
12477
12478     sub flush_vertical_aligner {
12479         my ($self) = @_;
12480         my $vao = $self->[_vertical_aligner_object_];
12481         $vao->flush();
12482         return;
12483     }
12484
12485     # flush is called to output any tokens in the pipeline, so that
12486     # an alternate source of lines can be written in the correct order
12487     sub flush {
12488         my ( $self, $CODE_type ) = @_;
12489
12490         # end the current batch with 1 exception
12491
12492         destroy_one_line_block();
12493
12494         # Exception: if we are flushing within the code stream only to insert
12495         # blank line(s), then we can keep the batch intact at a weld. This
12496         # improves formatting of -ce.  See test 'ce1.ce'
12497         if ( $CODE_type && $CODE_type eq 'BL' ) {
12498             $self->end_batch() if ( $max_index_to_go >= 0 );
12499         }
12500
12501         # otherwise, we have to shut things down completely.
12502         else { $self->flush_batch_of_CODE() }
12503
12504         $self->flush_vertical_aligner();
12505         return;
12506     }
12507
12508     sub process_line_of_CODE {
12509
12510         my ( $self, $my_line_of_tokens ) = @_;
12511
12512         #----------------------------------------------------------------
12513         # This routine is called once per INPUT line to format all of the
12514         # tokens on that line.
12515         #----------------------------------------------------------------
12516
12517         # It outputs full-line comments and blank lines immediately.
12518
12519         # The tokens are copied one-by-one from the global token array $rLL to
12520         # a set of '_to_go' arrays which collect batches of tokens for a
12521         # further processing via calls to 'sub store_token_to_go', until a well
12522         # defined 'structural' break point* or 'forced' breakpoint* is reached.
12523         # Then, the batch of collected '_to_go' tokens is passed along to 'sub
12524         # grind_batch_of_CODE' for further processing.
12525
12526         # * 'structural' break points are basically line breaks corresponding
12527         # to code blocks.  An example is a chain of if-elsif-else statements,
12528         # which should typically be broken at the opening and closing braces.
12529
12530         # * 'forced' break points are breaks required by side comments or by
12531         # special user controls.
12532
12533         # So this routine is just making an initial set of required line
12534         # breaks, basically regardless of the maximum requested line length.
12535         # The subsequent stage of formating make additional line breaks
12536         # appropriate for lists and logical structures, and to keep line
12537         # lengths below the requested maximum line length.
12538
12539         #-----------------------------------
12540         # begin initialize closure variables
12541         #-----------------------------------
12542         $line_of_tokens = $my_line_of_tokens;
12543         $CODE_type      = $line_of_tokens->{_code_type};
12544         my $rK_range = $line_of_tokens->{_rK_range};
12545         ( $K_first, $K_last ) = @{$rK_range};
12546         if ( !defined($K_first) ) {
12547
12548             # Empty line: This can happen if tokens are deleted, for example
12549             # with the -mangle parameter
12550             return;
12551         }
12552         $rLL                     = $self->[_rLL_];
12553         $radjusted_levels        = $self->[_radjusted_levels_];
12554         $rparent_of_seqno        = $self->[_rparent_of_seqno_];
12555         $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
12556         $rblock_type_of_seqno    = $self->[_rblock_type_of_seqno_];
12557
12558         #---------------------------------
12559         # end initialize closure variables
12560         #---------------------------------
12561
12562         # This flag will become nobreak_to_go and should be set to 2 to prevent
12563         # a line break AFTER the current token.
12564         $no_internal_newlines = 0;
12565         if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
12566             $no_internal_newlines = 2;
12567         }
12568
12569         my $input_line = $line_of_tokens->{_line_text};
12570
12571         my $is_comment =
12572           ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
12573         my $is_static_block_comment_without_leading_space =
12574           $CODE_type eq 'SBCX';
12575         $is_static_block_comment =
12576           $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
12577         my $is_hanging_side_comment = $CODE_type eq 'HSC';
12578         my $is_VERSION_statement    = $CODE_type eq 'VER';
12579
12580         if ($is_VERSION_statement) {
12581             $self->[_saw_VERSION_in_this_file_] = 1;
12582             $no_internal_newlines = 2;
12583         }
12584
12585         # Add interline blank if any
12586         my $last_old_nonblank_type   = "b";
12587         my $first_new_nonblank_token = "";
12588         my $K_first_true             = $K_first;
12589         if ( $max_index_to_go >= 0 ) {
12590             $last_old_nonblank_type   = $types_to_go[$max_index_to_go];
12591             $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
12592             if (  !$is_comment
12593                 && $types_to_go[$max_index_to_go] ne 'b'
12594                 && $K_first > 0
12595                 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
12596             {
12597                 $K_first -= 1;
12598             }
12599         }
12600
12601         my $rtok_first = $rLL->[$K_first];
12602
12603         my $in_quote = $line_of_tokens->{_ending_in_quote};
12604         $ending_in_quote = $in_quote;
12605
12606         #------------------------------------
12607         # Handle a block (full-line) comment.
12608         #------------------------------------
12609         if ($is_comment) {
12610
12611             if ( $rOpts->{'delete-block-comments'} ) {
12612                 $self->flush();
12613                 return;
12614             }
12615
12616             destroy_one_line_block();
12617             $self->end_batch() if ( $max_index_to_go >= 0 );
12618
12619             # output a blank line before block comments
12620             if (
12621                 # unless we follow a blank or comment line
12622                 $self->[_last_line_leading_type_] ne '#'
12623                 && $self->[_last_line_leading_type_] ne 'b'
12624
12625                 # only if allowed
12626                 && $rOpts->{'blanks-before-comments'}
12627
12628                 # if this is NOT an empty comment, unless it follows a side
12629                 # comment and could become a hanging side comment.
12630                 && (
12631                     $rtok_first->[_TOKEN_] ne '#'
12632                     || (   $last_line_had_side_comment
12633                         && $rLL->[$K_first]->[_LEVEL_] > 0 )
12634                 )
12635
12636                 # not after a short line ending in an opening token
12637                 # because we already have space above this comment.
12638                 # Note that the first comment in this if block, after
12639                 # the 'if (', does not get a blank line because of this.
12640                 && !$self->[_last_output_short_opening_token_]
12641
12642                 # never before static block comments
12643                 && !$is_static_block_comment
12644               )
12645             {
12646                 $self->flush();    # switching to new output stream
12647                 my $file_writer_object = $self->[_file_writer_object_];
12648                 $file_writer_object->write_blank_code_line();
12649                 $self->[_last_line_leading_type_] = 'b';
12650             }
12651
12652             if (
12653                 $rOpts->{'indent-block-comments'}
12654                 && (  !$rOpts->{'indent-spaced-block-comments'}
12655                     || $input_line =~ /^\s+/ )
12656                 && !$is_static_block_comment_without_leading_space
12657               )
12658             {
12659                 my $Ktoken_vars = $K_first;
12660                 my $rtoken_vars = $rLL->[$Ktoken_vars];
12661                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12662                 $self->end_batch();
12663             }
12664             else {
12665
12666                 # switching to new output stream
12667                 $self->flush();
12668
12669                 # Note that last arg in call here is 'undef' for comments
12670                 my $file_writer_object = $self->[_file_writer_object_];
12671                 $file_writer_object->write_code_line(
12672                     $rtok_first->[_TOKEN_] . "\n", undef );
12673                 $self->[_last_line_leading_type_] = '#';
12674             }
12675             return;
12676         }
12677
12678         # compare input/output indentation except for continuation lines
12679         # (because they have an unknown amount of initial blank space)
12680         # and lines which are quotes (because they may have been outdented)
12681         my $guessed_indentation_level =
12682           $line_of_tokens->{_guessed_indentation_level};
12683         unless ( $is_hanging_side_comment
12684             || $rtok_first->[_CI_LEVEL_] > 0
12685             || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' )
12686         {
12687             my $input_line_number = $line_of_tokens->{_line_number};
12688             $self->compare_indentation_levels( $K_first,
12689                 $guessed_indentation_level, $input_line_number );
12690         }
12691
12692         #------------------------
12693         # Handle indentation-only
12694         #------------------------
12695
12696         # NOTE: In previous versions we sent all qw lines out immediately here.
12697         # No longer doing this: also write a line which is entirely a 'qw' list
12698         # to allow stacking of opening and closing tokens.  Note that interior
12699         # qw lines will still go out at the end of this routine.
12700         if ( $CODE_type eq 'IO' ) {
12701             $self->flush();
12702             my $line = $input_line;
12703
12704             # Fix for rt #125506 Unexpected string formating
12705             # in which leading space of a terminal quote was removed
12706             $line =~ s/\s+$//;
12707             $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
12708
12709             my $Ktoken_vars = $K_first;
12710
12711             # We work with a copy of the token variables and change the
12712             # first token to be the entire line as a quote variable
12713             my $rtoken_vars = $rLL->[$Ktoken_vars];
12714             $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
12715
12716             # Patch: length is not really important here
12717             $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
12718
12719             $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12720             $self->end_batch();
12721             return;
12722         }
12723
12724         #---------------------------
12725         # Handle all other lines ...
12726         #---------------------------
12727
12728         # If we just saw the end of an elsif block, write nag message
12729         # if we do not see another elseif or an else.
12730         if ($looking_for_else) {
12731
12732             unless ( $rLL->[$K_first]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
12733                 write_logfile_entry("(No else block)\n");
12734             }
12735             $looking_for_else = 0;
12736         }
12737
12738         # This is a good place to kill incomplete one-line blocks
12739         if ( $max_index_to_go >= 0 ) {
12740             if (
12741                 (
12742                        ( $semicolons_before_block_self_destruct == 0 )
12743                     && ( $last_old_nonblank_type eq ';' )
12744                     && ( $first_new_nonblank_token ne '}' )
12745                 )
12746
12747                 # Patch for RT #98902. Honor request to break at old commas.
12748                 || (   $rOpts_break_at_old_comma_breakpoints
12749                     && $last_old_nonblank_type eq ',' )
12750               )
12751             {
12752                 $forced_breakpoint_to_go[$max_index_to_go] = 1
12753                   if ($rOpts_break_at_old_comma_breakpoints);
12754                 destroy_one_line_block();
12755                 $self->end_batch();
12756             }
12757
12758             # Keep any requested breaks before this line.  Note that we have to
12759             # use the original K_first because it may have been reduced above
12760             # to add a blank.  The value of the flag is as follows:
12761             #   1 => hard break, flush the batch
12762             #   2 => soft break, set breakpoint and continue building the batch
12763             if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
12764                 destroy_one_line_block();
12765                 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
12766                     $self->set_forced_breakpoint($max_index_to_go);
12767                 }
12768                 else {
12769                     $self->end_batch() if ( $max_index_to_go >= 0 );
12770                 }
12771             }
12772         }
12773
12774         #--------------------------------------
12775         # loop to process the tokens one-by-one
12776         #--------------------------------------
12777
12778         # We do not want a leading blank if the previous batch just got output
12779         if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
12780             $K_first++;
12781         }
12782
12783         foreach my $Ktoken_vars ( $K_first .. $K_last ) {
12784
12785             my $rtoken_vars = $rLL->[$Ktoken_vars];
12786             my $type        = $rtoken_vars->[_TYPE_];
12787
12788             # If we are continuing after seeing a right curly brace, flush
12789             # buffer unless we see what we are looking for, as in
12790             #   } else ...
12791             if ( $rbrace_follower && $type ne 'b' ) {
12792                 my $token = $rtoken_vars->[_TOKEN_];
12793                 unless ( $rbrace_follower->{$token} ) {
12794                     $self->end_batch() if ( $max_index_to_go >= 0 );
12795                 }
12796                 $rbrace_follower = undef;
12797             }
12798
12799             my (
12800                 $block_type,       $type_sequence,
12801                 $is_opening_BLOCK, $is_closing_BLOCK,
12802                 $nobreak_BEFORE_BLOCK
12803             );
12804             if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
12805
12806                 my $token = $rtoken_vars->[_TOKEN_];
12807                 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
12808                 $block_type    = $rblock_type_of_seqno->{$type_sequence};
12809
12810                 if (   $block_type
12811                     && $token eq $type
12812                     && $block_type ne 't'
12813                     && !$self->[_rshort_nested_]->{$type_sequence} )
12814                 {
12815
12816                     if ( $type eq '{' ) {
12817                         $is_opening_BLOCK     = 1;
12818                         $nobreak_BEFORE_BLOCK = $no_internal_newlines;
12819                     }
12820                     elsif ( $type eq '}' ) {
12821                         $is_closing_BLOCK     = 1;
12822                         $nobreak_BEFORE_BLOCK = $no_internal_newlines;
12823                     }
12824                 }
12825             }
12826
12827             # Find next nonblank token on this line and look for a side comment
12828             my ( $Knnb, $side_comment_follows );
12829
12830             # if before last token ...
12831             if ( $Ktoken_vars < $K_last ) {
12832                 $Knnb = $Ktoken_vars + 1;
12833                 if (   $Knnb < $K_last
12834                     && $rLL->[$Knnb]->[_TYPE_] eq 'b' )
12835                 {
12836                     $Knnb++;
12837                 }
12838
12839                 if ( $rLL->[$Knnb]->[_TYPE_] eq '#' ) {
12840                     $side_comment_follows = 1;
12841
12842                     # Do not allow breaks which would promote a side comment to
12843                     # a block comment.
12844                     $no_internal_newlines = 2;
12845                 }
12846             }
12847
12848             # if at last token ...
12849             else {
12850
12851                 #---------------------
12852                 # handle side comments
12853                 #---------------------
12854                 if ( $type eq '#' ) {
12855                     $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12856                     next;
12857                 }
12858             }
12859
12860             #--------------
12861             # handle blanks
12862             #--------------
12863             if ( $type eq 'b' ) {
12864                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12865                 next;
12866             }
12867
12868             # Process non-blank and non-comment tokens ...
12869
12870             #-----------------
12871             # handle semicolon
12872             #-----------------
12873             if ( $type eq ';' ) {
12874
12875                 my $next_nonblank_token_type = 'b';
12876                 my $next_nonblank_token      = '';
12877                 if ( defined($Knnb) ) {
12878                     $next_nonblank_token      = $rLL->[$Knnb]->[_TOKEN_];
12879                     $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
12880                 }
12881
12882                 my $break_before_semicolon = ( $Ktoken_vars == $K_first )
12883                   && $rOpts_break_at_old_semicolon_breakpoints;
12884
12885                 # kill one-line blocks with too many semicolons
12886                 $semicolons_before_block_self_destruct--;
12887                 if (
12888                        $break_before_semicolon
12889                     || ( $semicolons_before_block_self_destruct < 0 )
12890                     || (   $semicolons_before_block_self_destruct == 0
12891                         && $next_nonblank_token_type !~ /^[b\}]$/ )
12892                   )
12893                 {
12894                     destroy_one_line_block();
12895                     $self->end_batch()
12896                       if ( $break_before_semicolon
12897                         && $max_index_to_go >= 0 );
12898                 }
12899
12900                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12901
12902                 $self->end_batch()
12903                   unless (
12904                     $no_internal_newlines
12905                     || (   $rOpts_keep_interior_semicolons
12906                         && $Ktoken_vars < $K_last )
12907                     || ( $next_nonblank_token eq '}' )
12908                   );
12909
12910             }
12911
12912             #-----------
12913             # handle '{'
12914             #-----------
12915             elsif ($is_opening_BLOCK) {
12916
12917                 # Tentatively output this token.  This is required before
12918                 # calling starting_one_line_block.  We may have to unstore
12919                 # it, though, if we have to break before it.
12920                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12921
12922                 # Look ahead to see if we might form a one-line block..
12923                 my $too_long =
12924                   $self->starting_one_line_block( $Ktoken_vars,
12925                     $K_last_nonblank_code, $K_last );
12926                 $self->clear_breakpoint_undo_stack();
12927
12928                 # to simplify the logic below, set a flag to indicate if
12929                 # this opening brace is far from the keyword which introduces it
12930                 my $keyword_on_same_line = 1;
12931                 if (
12932                        $max_index_to_go >= 0
12933                     && defined($K_last_nonblank_code)
12934                     && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
12935                     && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
12936                         || $too_long )
12937                   )
12938                 {
12939                     $keyword_on_same_line = 0;
12940                 }
12941
12942                 # Break before '{' if requested with -bl or -bli flag
12943                 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
12944
12945                 # But do not break if this token is welded to the left
12946                 if ( $total_weld_count
12947                     && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
12948                 {
12949                     $want_break = 0;
12950                 }
12951
12952                 # Break BEFORE an opening '{' ...
12953                 if (
12954
12955                     # if requested
12956                     $want_break
12957
12958                     # and we were unable to start looking for a block,
12959                     && $index_start_one_line_block == UNDEFINED_INDEX
12960
12961                     # or if it will not be on same line as its keyword, so that
12962                     # it will be outdented (eval.t, overload.t), and the user
12963                     # has not insisted on keeping it on the right
12964                     || (   !$keyword_on_same_line
12965                         && !$rOpts_opening_brace_always_on_right )
12966                   )
12967                 {
12968
12969                     # but only if allowed
12970                     unless ($nobreak_BEFORE_BLOCK) {
12971
12972                         # since we already stored this token, we must unstore it
12973                         $self->unstore_token_to_go();
12974
12975                         # then output the line
12976                         $self->end_batch() if ( $max_index_to_go >= 0 );
12977
12978                         # and now store this token at the start of a new line
12979                         $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12980                     }
12981                 }
12982
12983                 # now output this line
12984                 $self->end_batch()
12985                   if ( $max_index_to_go >= 0 && !$no_internal_newlines );
12986             }
12987
12988             #-----------
12989             # handle '}'
12990             #-----------
12991             elsif ($is_closing_BLOCK) {
12992
12993                 my $next_nonblank_token_type = 'b';
12994                 my $next_nonblank_token      = '';
12995                 if ( defined($Knnb) ) {
12996                     $next_nonblank_token      = $rLL->[$Knnb]->[_TOKEN_];
12997                     $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
12998                 }
12999
13000                 # If there is a pending one-line block ..
13001                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13002
13003                     # Fix for b1208: if a side comment follows this closing
13004                     # brace then we must include its length in the length test
13005                     # ... unless the -issl flag is set (fixes b1307-1309).
13006                     # Assume a minimum of 1 blank space to the comment.
13007                     my $added_length =
13008                       $side_comment_follows
13009                       && !$rOpts_ignore_side_comment_lengths
13010                       ? 1 + $rLL->[$Knnb]->[_TOKEN_LENGTH_]
13011                       : 0;
13012
13013                     # we have to terminate it if..
13014                     if (
13015
13016                         # it is too long (final length may be different from
13017                         # initial estimate). note: must allow 1 space for this
13018                         # token
13019                         $self->excess_line_length( $index_start_one_line_block,
13020                             $max_index_to_go ) + $added_length >= 0
13021
13022                         # or if it has too many semicolons
13023                         || (   $semicolons_before_block_self_destruct == 0
13024                             && defined($K_last_nonblank_code)
13025                             && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' )
13026                       )
13027                     {
13028                         destroy_one_line_block();
13029                     }
13030                 }
13031
13032                 # put a break before this closing curly brace if appropriate
13033                 $self->end_batch()
13034                   if ( $max_index_to_go >= 0
13035                     && !$nobreak_BEFORE_BLOCK
13036                     && $index_start_one_line_block == UNDEFINED_INDEX );
13037
13038                 # store the closing curly brace
13039                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13040
13041                 # ok, we just stored a closing curly brace.  Often, but
13042                 # not always, we want to end the line immediately.
13043                 # So now we have to check for special cases.
13044
13045                 # if this '}' successfully ends a one-line block..
13046                 my $is_one_line_block = 0;
13047                 my $keep_going        = 0;
13048                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13049
13050                     # Remember the type of token just before the
13051                     # opening brace.  It would be more general to use
13052                     # a stack, but this will work for one-line blocks.
13053                     $is_one_line_block =
13054                       $types_to_go[$index_start_one_line_block];
13055
13056                     # we have to actually make it by removing tentative
13057                     # breaks that were set within it
13058                     $self->undo_forced_breakpoint_stack(0);
13059
13060                     # For -lp, extend the nobreak to include a trailing
13061                     # terminal ','.  This is because the -lp indentation was
13062                     # not known when making one-line blocks, so we may be able
13063                     # to move the line back to fit.  Otherwise we may create a
13064                     # needlessly stranded comma on the next line.
13065                     my $iend_nobreak = $max_index_to_go - 1;
13066                     if (   $rOpts_line_up_parentheses
13067                         && $next_nonblank_token_type eq ','
13068                         && $Knnb eq $K_last )
13069                     {
13070                         my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13071                         my $is_excluded =
13072                           $self->[_ris_excluded_lp_container_]->{$p_seqno};
13073                         $iend_nobreak = $max_index_to_go if ( !$is_excluded );
13074                     }
13075
13076                     $self->set_nobreaks( $index_start_one_line_block,
13077                         $iend_nobreak );
13078
13079                     # save starting block indexes so that sub correct_lp can
13080                     # check and adjust -lp indentation (c098)
13081                     push @{$ri_starting_one_line_block},
13082                       $index_start_one_line_block;
13083
13084                     # then re-initialize for the next one-line block
13085                     destroy_one_line_block();
13086
13087                     # then decide if we want to break after the '}' ..
13088                     # We will keep going to allow certain brace followers as in:
13089                     #   do { $ifclosed = 1; last } unless $losing;
13090                     #
13091                     # But make a line break if the curly ends a
13092                     # significant block:
13093                     if (
13094                         (
13095                             $is_block_without_semicolon{$block_type}
13096
13097                             # Follow users break point for
13098                             # one line block types U & G, such as a 'try' block
13099                             || $is_one_line_block =~ /^[UG]$/
13100                             && $Ktoken_vars == $K_last
13101                         )
13102
13103                         # if needless semicolon follows we handle it later
13104                         && $next_nonblank_token ne ';'
13105                       )
13106                     {
13107                         $self->end_batch()
13108                           unless ($no_internal_newlines);
13109                     }
13110                 }
13111
13112                 # set string indicating what we need to look for brace follower
13113                 # tokens
13114                 if ( $block_type eq 'do' ) {
13115                     $rbrace_follower = \%is_do_follower;
13116                     if (
13117                         $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
13118                       )
13119                     {
13120                         $rbrace_follower = { ')' => 1 };
13121                     }
13122                 }
13123                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
13124                     $rbrace_follower = \%is_if_brace_follower;
13125                 }
13126                 elsif ( $block_type eq 'else' ) {
13127                     $rbrace_follower = \%is_else_brace_follower;
13128                 }
13129
13130                 # added eval for borris.t
13131                 elsif ($is_sort_map_grep_eval{$block_type}
13132                     || $is_one_line_block eq 'G' )
13133                 {
13134                     $rbrace_follower = undef;
13135                     $keep_going      = 1;
13136                 }
13137
13138                 # anonymous sub
13139                 elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
13140                     if ($is_one_line_block) {
13141
13142                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
13143
13144                         # Exceptions to help keep -lp intact, see git #74 ...
13145                         # Exception 1: followed by '}' on this line
13146                         if (   $Ktoken_vars < $K_last
13147                             && $next_nonblank_token eq '}' )
13148                         {
13149                             $rbrace_follower = undef;
13150                             $keep_going      = 1;
13151                         }
13152
13153                         # Exception 2: followed by '}' on next line if -lp set.
13154                         # The -lp requirement allows the formatting to follow
13155                         # old breaks when -lp is not used, minimizing changes.
13156                         # Fixes issue c087.
13157                         elsif ($Ktoken_vars == $K_last
13158                             && $rOpts_line_up_parentheses )
13159                         {
13160                             my $K_closing_container =
13161                               $self->[_K_closing_container_];
13162                             my $K_opening_container =
13163                               $self->[_K_opening_container_];
13164                             my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13165                             my $Kc      = $K_closing_container->{$p_seqno};
13166                             my $is_excluded =
13167                               $self->[_ris_excluded_lp_container_]->{$p_seqno};
13168                             if (   defined($Kc)
13169                                 && $rLL->[$Kc]->[_TOKEN_] eq '}'
13170                                 && !$is_excluded
13171                                 && $Kc - $Ktoken_vars <= 2 )
13172                             {
13173                                 $rbrace_follower = undef;
13174                                 $keep_going      = 1;
13175                             }
13176                         }
13177                     }
13178                     else {
13179                         $rbrace_follower = \%is_anon_sub_brace_follower;
13180                     }
13181                 }
13182
13183                 # None of the above: specify what can follow a closing
13184                 # brace of a block which is not an
13185                 # if/elsif/else/do/sort/map/grep/eval
13186                 # Testfiles:
13187                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
13188                 else {
13189                     $rbrace_follower = \%is_other_brace_follower;
13190                 }
13191
13192                 # See if an elsif block is followed by another elsif or else;
13193                 # complain if not.
13194                 if ( $block_type eq 'elsif' ) {
13195
13196                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
13197                         $looking_for_else = 1;    # ok, check on next line
13198                     }
13199                     else {
13200
13201                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
13202                             write_logfile_entry("No else block :(\n");
13203                         }
13204                     }
13205                 }
13206
13207                 # keep going after certain block types (map,sort,grep,eval)
13208                 # added eval for borris.t
13209                 if ($keep_going) {
13210
13211                     # keep going
13212                 }
13213
13214                 # if no more tokens, postpone decision until re-entring
13215                 elsif ( ( $next_nonblank_token_type eq 'b' )
13216                     && $rOpts_add_newlines )
13217                 {
13218                     unless ($rbrace_follower) {
13219                         $self->end_batch()
13220                           unless ($no_internal_newlines);
13221                     }
13222                 }
13223
13224                 elsif ($rbrace_follower) {
13225
13226                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
13227                         $self->end_batch()
13228                           unless ( $no_internal_newlines
13229                             || $max_index_to_go < 0 );
13230                     }
13231                     $rbrace_follower = undef;
13232                 }
13233
13234                 else {
13235                     $self->end_batch()
13236                       unless ( $no_internal_newlines
13237                         || $max_index_to_go < 0 );
13238                 }
13239
13240             } ## end treatment of closing block token
13241
13242             #------------------------------
13243             # handle here_doc target string
13244             #------------------------------
13245             elsif ( $type eq 'h' ) {
13246
13247                 # no newlines after seeing here-target
13248                 $no_internal_newlines = 2;
13249                 ## destroy_one_line_block();  # deleted to fix case b529
13250                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13251             }
13252
13253             #-----------------------------
13254             # handle all other token types
13255             #-----------------------------
13256             else {
13257
13258                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13259
13260                 # break after a label if requested
13261                 if ( $type eq 'J' && $rOpts_break_after_labels == 1 ) {
13262                     $self->end_batch()
13263                       unless ($no_internal_newlines);
13264                 }
13265             }
13266
13267             # remember two previous nonblank, non-comment OUTPUT tokens
13268             $K_last_last_nonblank_code = $K_last_nonblank_code;
13269             $K_last_nonblank_code      = $Ktoken_vars;
13270
13271         } ## end of loop over all tokens in this line
13272
13273         my $type       = $rLL->[$K_last]->[_TYPE_];
13274         my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
13275
13276         # we have to flush ..
13277         if (
13278
13279             # if there is a side comment...
13280             $type eq '#'
13281
13282             # if this line ends in a quote
13283             # NOTE: This is critically important for insuring that quoted lines
13284             # do not get processed by things like -sot and -sct
13285             || $in_quote
13286
13287             # if this is a VERSION statement
13288             || $is_VERSION_statement
13289
13290             # to keep a label at the end of a line
13291             || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
13292
13293             # if we have a hard break request
13294             || $break_flag && $break_flag != 2
13295
13296             # if we are instructed to keep all old line breaks
13297             || !$rOpts->{'delete-old-newlines'}
13298
13299             # if this is a line of the form 'use overload'. A break here
13300             # in the input file is a good break because it will allow
13301             # the operators which follow to be formatted well. Without
13302             # this break the formatting with -ci=4 -xci is poor, for example.
13303
13304             #   use overload
13305             #     '+' => sub {
13306             #       print length $_[2], "\n";
13307             #       my ( $x, $y ) = _order(@_);
13308             #       Number::Roman->new( int $x + $y );
13309             #     },
13310             #     '-' => sub {
13311             #       my ( $x, $y ) = _order(@_);
13312             #       Number::Roman->new( int $x - $y );
13313             #     };
13314             || (   $max_index_to_go == 2
13315                 && $types_to_go[0] eq 'k'
13316                 && $tokens_to_go[0] eq 'use'
13317                 && $tokens_to_go[$max_index_to_go] eq 'overload' )
13318           )
13319         {
13320             destroy_one_line_block();
13321             $self->end_batch() if ( $max_index_to_go >= 0 );
13322         }
13323
13324         # Check for a soft break request
13325         if ( $max_index_to_go >= 0 && $break_flag && $break_flag == 2 ) {
13326             $self->set_forced_breakpoint($max_index_to_go);
13327         }
13328
13329         # mark old line breakpoints in current output stream
13330         if (
13331             $max_index_to_go >= 0
13332             && (  !$rOpts_ignore_old_breakpoints
13333                 || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
13334           )
13335         {
13336             my $jobp = $max_index_to_go;
13337             if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
13338             {
13339                 $jobp--;
13340             }
13341             $old_breakpoint_to_go[$jobp] = 1;
13342         }
13343         return;
13344     } ## end sub process_line_of_CODE
13345 } ## end closure process_line_of_CODE
13346
13347 sub tight_paren_follows {
13348
13349     my ( $self, $K_to_go_0, $K_ic ) = @_;
13350
13351     # Input parameters:
13352     #   $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
13353     #   $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
13354     # Return parameter:
13355     #   false if we want a break after the closing do brace
13356     #   true if we do not want a break after the closing do brace
13357
13358     # We are at the closing brace of a 'do' block.  See if this brace is
13359     # followed by a closing paren, and if so, set a flag which indicates
13360     # that we do not want a line break between the '}' and ')'.
13361
13362     # xxxxx ( ...... do {  ... } ) {
13363     #                          ^-------looking at this brace, K_ic
13364
13365     # Subscript notation:
13366     # _i = inner container (braces in this case)
13367     # _o = outer container (parens in this case)
13368     # _io = inner opening = '{'
13369     # _ic = inner closing = '}'
13370     # _oo = outer opening = '('
13371     # _oc = outer closing = ')'
13372
13373     #       |--K_oo                 |--K_oc  = outer container
13374     # xxxxx ( ...... do {  ...... } ) {
13375     #                   |--K_io   |--K_ic    = inner container
13376
13377     # In general, the safe thing to do is return a 'false' value
13378     # if the statement appears to be complex.  This will have
13379     # the downstream side-effect of opening up outer containers
13380     # to help make complex code readable.  But for simpler
13381     # do blocks it can be preferable to keep the code compact
13382     # by returning a 'true' value.
13383
13384     return unless defined($K_ic);
13385     my $rLL = $self->[_rLL_];
13386
13387     # we should only be called at a closing block
13388     my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
13389     return unless ($seqno_i);    # shouldn't happen;
13390
13391     # This only applies if the next nonblank is a ')'
13392     my $K_oc = $self->K_next_nonblank($K_ic);
13393     return unless defined($K_oc);
13394     my $token_next = $rLL->[$K_oc]->[_TOKEN_];
13395     return unless ( $token_next eq ')' );
13396
13397     my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
13398     my $K_io    = $self->[_K_opening_container_]->{$seqno_i};
13399     my $K_oo    = $self->[_K_opening_container_]->{$seqno_o};
13400     return unless ( defined($K_io) && defined($K_oo) );
13401
13402     # RULE 1: Do not break before a closing signature paren
13403     # (regardless of complexity).  This is a fix for issue git#22.
13404     # Looking for something like:
13405     #   sub xxx ( ... do {  ... } ) {
13406     #                               ^----- next block_type
13407     my $K_test = $self->K_next_nonblank($K_oc);
13408     if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
13409         my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
13410         if ($seqno_test) {
13411             if (   $self->[_ris_asub_block_]->{$seqno_test}
13412                 || $self->[_ris_sub_block_]->{$seqno_test} )
13413             {
13414                 return 1;
13415             }
13416         }
13417     }
13418
13419     # RULE 2: Break if the contents within braces appears to be 'complex'.  We
13420     # base this decision on the number of tokens between braces.
13421
13422     # xxxxx ( ... do {  ... } ) {
13423     #                 ^^^^^^
13424
13425     # Although very simple, it has the advantages of (1) being insensitive to
13426     # changes in lengths of identifier names, (2) easy to understand, implement
13427     # and test.  A test case for this is 't/snippets/long_line.in'.
13428
13429     # Example: $K_ic - $K_oo = 9       [Pass Rule 2]
13430     # if ( do { $2 !~ /&/ } ) { ... }
13431
13432     # Example: $K_ic - $K_oo = 10      [Pass Rule 2]
13433     # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13434
13435     # Example: $K_ic - $K_oo = 20      [Fail Rule 2]
13436     # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
13437
13438     return if ( $K_ic - $K_io > 16 );
13439
13440     # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
13441     # As with the previous rule, we decide based on the token count
13442
13443     # xxxxx ( ... do {  ... } ) {
13444     #        ^^^^^^^^
13445
13446     # Example: $K_ic - $K_oo = 9       [Pass Rule 2]
13447     #          $K_io - $K_oo = 4       [Pass Rule 3]
13448     # if ( do { $2 !~ /&/ } ) { ... }
13449
13450     # Example: $K_ic - $K_oo = 10    [Pass rule 2]
13451     #          $K_io - $K_oo = 9     [Pass rule 3]
13452     # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13453
13454     return if ( $K_io - $K_oo > 9 );
13455
13456     # RULE 4: Break if we have already broken this batch of output tokens
13457     return if ( $K_oo < $K_to_go_0 );
13458
13459     # RULE 5: Break if input is not on one line
13460     # For example, we will set the flag for the following expression
13461     # written in one line:
13462
13463     # This has: $K_ic - $K_oo = 10    [Pass rule 2]
13464     #           $K_io - $K_oo = 8     [Pass rule 3]
13465     #   $self->debug( 'Error: ' . do { local $/; <$err> } );
13466
13467     # but we break after the brace if it is on multiple lines on input, since
13468     # the user may prefer it on multiple lines:
13469
13470     # [Fail rule 5]
13471     #   $self->debug(
13472     #       'Error: ' . do { local $/; <$err> }
13473     #   );
13474
13475     if ( !$rOpts_ignore_old_breakpoints ) {
13476         my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
13477         my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
13478         return if ( $iline_oo != $iline_oc );
13479     }
13480
13481     # OK to keep the paren tight
13482     return 1;
13483 }
13484
13485 my %is_brace_semicolon_colon;
13486
13487 BEGIN {
13488     my @q = qw( { } ; : );
13489     @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
13490 }
13491
13492 sub starting_one_line_block {
13493
13494     # after seeing an opening curly brace, look for the closing brace and see
13495     # if the entire block will fit on a line.  This routine is not always right
13496     # so a check is made later (at the closing brace) to make sure we really
13497     # have a one-line block.  We have to do this preliminary check, though,
13498     # because otherwise we would always break at a semicolon within a one-line
13499     # block if the block contains multiple statements.
13500
13501     my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
13502
13503     my $rbreak_container     = $self->[_rbreak_container_];
13504     my $rshort_nested        = $self->[_rshort_nested_];
13505     my $rLL                  = $self->[_rLL_];
13506     my $K_opening_container  = $self->[_K_opening_container_];
13507     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13508
13509     # kill any current block - we can only go 1 deep
13510     destroy_one_line_block();
13511
13512     # return value:
13513     #  1=distance from start of block to opening brace exceeds line length
13514     #  0=otherwise
13515
13516     my $i_start = 0;
13517
13518     # This routine should not have been called if there are no tokens in the
13519     # 'to_go' arrays of previously stored tokens.  A previous call to
13520     # 'store_token_to_go' should have stored an opening brace. An error here
13521     # indicates that a programming change may have caused a flush operation to
13522     # clean out the previously stored tokens.
13523     if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
13524         Fault("program bug: store_token_to_go called incorrectly\n")
13525           if (DEVEL_MODE);
13526         return 0;
13527     }
13528
13529     # Return if block should be broken
13530     my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
13531     if ( $rbreak_container->{$type_sequence} ) {
13532         return 0;
13533     }
13534
13535     my $ris_bli_container = $self->[_ris_bli_container_];
13536     my $is_bli            = $ris_bli_container->{$type_sequence};
13537
13538     my $block_type = $rblock_type_of_seqno->{$type_sequence};
13539     $block_type = "" unless ( defined($block_type) );
13540     my $index_max_forced_break = get_index_max_forced_break();
13541
13542     my $previous_nonblank_token = '';
13543     my $i_last_nonblank         = -1;
13544     if ( defined($K_last_nonblank) ) {
13545         $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
13546         if ( $i_last_nonblank >= 0 ) {
13547             $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
13548         }
13549     }
13550
13551     # find the starting keyword for this block (such as 'if', 'else', ...)
13552     if (
13553         $max_index_to_go == 0
13554         ##|| $block_type =~ /^[\{\}\;\:]$/
13555         || $is_brace_semicolon_colon{$block_type}
13556         || substr( $block_type, 0, 7 ) eq 'package'
13557       )
13558     {
13559         $i_start = $max_index_to_go;
13560     }
13561
13562     # the previous nonblank token should start these block types
13563     elsif (
13564         $i_last_nonblank >= 0
13565         && (   $previous_nonblank_token eq $block_type
13566             || $self->[_ris_asub_block_]->{$type_sequence}
13567             || $self->[_ris_sub_block_]->{$type_sequence}
13568             || substr( $block_type, -2, 2 ) eq '()' )
13569       )
13570     {
13571         $i_start = $i_last_nonblank;
13572
13573         # For signatures and extended syntax ...
13574         # If this brace follows a parenthesized list, we should look back to
13575         # find the keyword before the opening paren because otherwise we might
13576         # form a one line block which stays intack, and cause the parenthesized
13577         # expression to break open. That looks bad.
13578         if ( $tokens_to_go[$i_start] eq ')' ) {
13579
13580             # Find the opening paren
13581             my $K_start = $K_to_go[$i_start];
13582             return 0 unless defined($K_start);
13583             my $seqno = $type_sequence_to_go[$i_start];
13584             return 0 unless ($seqno);
13585             my $K_opening = $K_opening_container->{$seqno};
13586             return 0 unless defined($K_opening);
13587             my $i_opening = $i_start + ( $K_opening - $K_start );
13588
13589             # give up if not on this line
13590             return 0 unless ( $i_opening >= 0 );
13591             $i_start = $i_opening;    ##$index_max_forced_break + 1;
13592
13593             # go back one token before the opening paren
13594             if ( $i_start > 0 )                                  { $i_start-- }
13595             if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
13596             my $lev = $levels_to_go[$i_start];
13597             if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
13598         }
13599     }
13600
13601     elsif ( $previous_nonblank_token eq ')' ) {
13602
13603         # For something like "if (xxx) {", the keyword "if" will be
13604         # just after the most recent break. This will be 0 unless
13605         # we have just killed a one-line block and are starting another.
13606         # (doif.t)
13607         # Note: cannot use inext_index_to_go[] here because that array
13608         # is still being constructed.
13609         $i_start = $index_max_forced_break + 1;
13610         if ( $types_to_go[$i_start] eq 'b' ) {
13611             $i_start++;
13612         }
13613
13614         # Patch to avoid breaking short blocks defined with extended_syntax:
13615         # Strip off any trailing () which was added in the parser to mark
13616         # the opening keyword.  For example, in the following
13617         #    create( TypeFoo $e) {$bubba}
13618         # the blocktype would be marked as create()
13619         my $stripped_block_type = $block_type;
13620         if ( substr( $block_type, -2, 2 ) eq '()' ) {
13621             $stripped_block_type = substr( $block_type, 0, -2 );
13622         }
13623         unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
13624             return 0;
13625         }
13626     }
13627
13628     # patch for SWITCH/CASE to retain one-line case/when blocks
13629     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
13630
13631         # Note: cannot use inext_index_to_go[] here because that array
13632         # is still being constructed.
13633         $i_start = $index_max_forced_break + 1;
13634         if ( $types_to_go[$i_start] eq 'b' ) {
13635             $i_start++;
13636         }
13637         unless ( $tokens_to_go[$i_start] eq $block_type ) {
13638             return 0;
13639         }
13640     }
13641
13642     else {
13643         return 1;
13644     }
13645
13646     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
13647
13648     my $maximum_line_length =
13649       $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
13650
13651     # see if block starting location is too great to even start
13652     if ( $pos > $maximum_line_length ) {
13653         return 1;
13654     }
13655
13656     # See if everything to the closing token will fit on one line
13657     # This is part of an update to fix cases b562 .. b983
13658     my $K_closing = $self->[_K_closing_container_]->{$type_sequence};
13659     return 0 unless ( defined($K_closing) );
13660     my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
13661       $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
13662
13663     my $excess = $pos + 1 + $container_length - $maximum_line_length;
13664
13665     # Add a small tolerance for welded tokens (case b901)
13666     if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
13667         $excess += 2;
13668     }
13669
13670     if ( $excess > 0 ) {
13671
13672         # line is too long...  there is no chance of forming a one line block
13673         # if the excess is more than 1 char
13674         return 0 if ( $excess > 1 );
13675
13676         # ... and give up if it is not a one-line block on input.
13677         # note: for a one-line block on input, it may be possible to keep
13678         # it as a one-line block (by removing a needless semicolon ).
13679         my $K_start = $K_to_go[$i_start];
13680         my $ldiff =
13681           $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
13682         return 0 if ($ldiff);
13683     }
13684
13685     foreach my $Ki ( $Kj + 1 .. $K_last ) {
13686
13687         # old whitespace could be arbitrarily large, so don't use it
13688         if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
13689         else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
13690
13691         # ignore some small blocks
13692         my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
13693         my $nobreak       = $rshort_nested->{$type_sequence};
13694
13695         # Return false result if we exceed the maximum line length,
13696         if ( $pos > $maximum_line_length ) {
13697             return 0;
13698         }
13699
13700         # keep going for non-containers
13701         elsif ( !$type_sequence ) {
13702
13703         }
13704
13705         # return if we encounter another opening brace before finding the
13706         # closing brace.
13707         elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
13708             && $rLL->[$Ki]->[_TYPE_] eq '{'
13709             && $rblock_type_of_seqno->{$type_sequence}
13710             && !$nobreak )
13711         {
13712             return 0;
13713         }
13714
13715         # if we find our closing brace..
13716         elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
13717             && $rLL->[$Ki]->[_TYPE_] eq '}'
13718             && $rblock_type_of_seqno->{$type_sequence}
13719             && !$nobreak )
13720         {
13721
13722             # be sure any trailing comment also fits on the line
13723             my $Ki_nonblank = $Ki;
13724             if ( $Ki_nonblank < $K_last ) {
13725                 $Ki_nonblank++;
13726                 if (   $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
13727                     && $Ki_nonblank < $K_last )
13728                 {
13729                     $Ki_nonblank++;
13730                 }
13731             }
13732
13733             # Patch for one-line sort/map/grep/eval blocks with side comments:
13734             # We will ignore the side comment length for sort/map/grep/eval
13735             # because this can lead to statements which change every time
13736             # perltidy is run.  Here is an example from Denis Moskowitz which
13737             # oscillates between these two states without this patch:
13738
13739 ## --------
13740 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13741 ##  @baz;
13742 ##
13743 ## grep {
13744 ##     $_->foo ne 'bar'
13745 ##   }    # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13746 ##   @baz;
13747 ## --------
13748
13749             # When the first line is input it gets broken apart by the main
13750             # line break logic in sub process_line_of_CODE.
13751             # When the second line is input it gets recombined by
13752             # process_line_of_CODE and passed to the output routines.  The
13753             # output routines (break_long_lines) do not break it apart
13754             # because the bond strengths are set to the highest possible value
13755             # for grep/map/eval/sort blocks, so the first version gets output.
13756             # It would be possible to fix this by changing bond strengths,
13757             # but they are high to prevent errors in older versions of perl.
13758             # See c100 for eval test.
13759             if (   $Ki < $K_last
13760                 && $rLL->[$K_last]->[_TYPE_] eq '#'
13761                 && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
13762                 && !$rOpts_ignore_side_comment_lengths
13763                 && !$is_sort_map_grep_eval{$block_type}
13764                 && $K_last - $Ki_nonblank <= 2 )
13765             {
13766                 # Only include the side comment for if/else/elsif/unless if it
13767                 # immediately follows (because the current '$rbrace_follower'
13768                 # logic for these will give an immediate brake after these
13769                 # closing braces).  So for example a line like this
13770                 #     if (...) { ... } ; # very long comment......
13771                 # will already break like this:
13772                 #     if (...) { ... }
13773                 #     ; # very long comment......
13774                 # so we do not need to include the length of the comment, which
13775                 # would break the block. Project 'bioperl' has coding like this.
13776                 if (   $block_type !~ /^(if|else|elsif|unless)$/
13777                     || $K_last == $Ki_nonblank )
13778                 {
13779                     $Ki_nonblank = $K_last;
13780                     $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
13781
13782                     if ( $Ki_nonblank > $Ki + 1 ) {
13783
13784                         # source whitespace could be anything, assume
13785                         # at least one space before the hash on output
13786                         if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
13787                             $pos += 1;
13788                         }
13789                         else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
13790                     }
13791
13792                     if ( $pos >= $maximum_line_length ) {
13793                         return 0;
13794                     }
13795                 }
13796             }
13797
13798             # ok, it's a one-line block
13799             create_one_line_block( $i_start, 20 );
13800             return 0;
13801         }
13802
13803         # just keep going for other characters
13804         else {
13805         }
13806     }
13807
13808     # We haven't hit the closing brace, but there is still space. So the
13809     # question here is, should we keep going to look at more lines in hopes of
13810     # forming a new one-line block, or should we stop right now. The problem
13811     # with continuing is that we will not be able to honor breaks before the
13812     # opening brace if we continue.
13813
13814     # Typically we will want to keep trying to make one-line blocks for things
13815     # like sort/map/grep/eval.  But it is not always a good idea to make as
13816     # many one-line blocks as possible, so other types are not done.  The user
13817     # can always use -mangle.
13818
13819     # If we want to keep going, we will create a new one-line block.
13820     # The blocks which we can keep going are in a hash, but we never want
13821     # to continue if we are at a '-bli' block.
13822     if ( $want_one_line_block{$block_type} && !$is_bli ) {
13823         create_one_line_block( $i_start, 1 );
13824     }
13825     return 0;
13826 }
13827
13828 sub unstore_token_to_go {
13829
13830     # remove most recent token from output stream
13831     my $self = shift;
13832     if ( $max_index_to_go > 0 ) {
13833         $max_index_to_go--;
13834     }
13835     else {
13836         $max_index_to_go = UNDEFINED_INDEX;
13837     }
13838     return;
13839 }
13840
13841 sub compare_indentation_levels {
13842
13843     # Check to see if output line tabbing agrees with input line
13844     # this can be very useful for debugging a script which has an extra
13845     # or missing brace.
13846
13847     my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
13848     return unless ( defined($K_first) );
13849
13850     my $rLL = $self->[_rLL_];
13851
13852     my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
13853     my $radjusted_levels             = $self->[_radjusted_levels_];
13854     if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
13855         $structural_indentation_level = $radjusted_levels->[$K_first];
13856     }
13857
13858     # record max structural depth for log file
13859     if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
13860         $self->[_maximum_BLOCK_level_]         = $structural_indentation_level;
13861         $self->[_maximum_BLOCK_level_at_line_] = $line_number;
13862     }
13863
13864     my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
13865     my $is_closing_block =
13866          $type_sequence
13867       && $self->[_rblock_type_of_seqno_]->{$type_sequence}
13868       && $rLL->[$K_first]->[_TYPE_] eq '}';
13869
13870     if ( $guessed_indentation_level ne $structural_indentation_level ) {
13871         $self->[_last_tabbing_disagreement_] = $line_number;
13872
13873         if ($is_closing_block) {
13874
13875             if ( !$self->[_in_brace_tabbing_disagreement_] ) {
13876                 $self->[_in_brace_tabbing_disagreement_] = $line_number;
13877             }
13878             if ( !$self->[_first_brace_tabbing_disagreement_] ) {
13879                 $self->[_first_brace_tabbing_disagreement_] = $line_number;
13880             }
13881         }
13882
13883         if ( !$self->[_in_tabbing_disagreement_] ) {
13884             $self->[_tabbing_disagreement_count_]++;
13885
13886             if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
13887                 write_logfile_entry(
13888 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
13889                 );
13890             }
13891             $self->[_in_tabbing_disagreement_]    = $line_number;
13892             $self->[_first_tabbing_disagreement_] = $line_number
13893               unless ( $self->[_first_tabbing_disagreement_] );
13894         }
13895     }
13896     else {
13897
13898         $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
13899
13900         my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
13901         if ($in_tabbing_disagreement) {
13902
13903             if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
13904                 write_logfile_entry(
13905 "End indentation disagreement from input line $in_tabbing_disagreement\n"
13906                 );
13907
13908                 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
13909                 {
13910                     write_logfile_entry(
13911                         "No further tabbing disagreements will be noted\n");
13912                 }
13913             }
13914             $self->[_in_tabbing_disagreement_] = 0;
13915
13916         }
13917     }
13918     return;
13919 }
13920
13921 ###################################################
13922 # CODE SECTION 8: Utilities for setting breakpoints
13923 ###################################################
13924
13925 {    ## begin closure set_forced_breakpoint
13926
13927     my $forced_breakpoint_count;
13928     my $forced_breakpoint_undo_count;
13929     my @forced_breakpoint_undo_stack;
13930     my $index_max_forced_break;
13931
13932     # Break before or after certain tokens based on user settings
13933     my %break_before_or_after_token;
13934
13935     BEGIN {
13936
13937         # Updated to use all operators. This fixes case b1054
13938         # Here is the previous simplified version:
13939         ## my @q = qw( . : ? and or xor && || );
13940         my @q = @all_operators;
13941
13942         push @q, ',';
13943         @break_before_or_after_token{@q} = (1) x scalar(@q);
13944     }
13945
13946     sub initialize_forced_breakpoint_vars {
13947         $forced_breakpoint_count      = 0;
13948         $index_max_forced_break       = UNDEFINED_INDEX;
13949         $forced_breakpoint_undo_count = 0;
13950         @forced_breakpoint_undo_stack = ();
13951         return;
13952     }
13953
13954     sub get_forced_breakpoint_count {
13955         return $forced_breakpoint_count;
13956     }
13957
13958     sub get_forced_breakpoint_undo_count {
13959         return $forced_breakpoint_undo_count;
13960     }
13961
13962     sub get_index_max_forced_break {
13963         return $index_max_forced_break;
13964     }
13965
13966     sub set_fake_breakpoint {
13967
13968         # Just bump up the breakpoint count as a signal that there are breaks.
13969         # This is useful if we have breaks but may want to postpone deciding
13970         # where to make them.
13971         $forced_breakpoint_count++;
13972         return;
13973     }
13974
13975     use constant DEBUG_FORCE => 0;
13976
13977     sub set_forced_breakpoint {
13978         my ( $self, $i ) = @_;
13979
13980         # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
13981
13982         # Exceptions:
13983         # - If the token at index $i is a blank, backup to $i-1 to
13984         #   get to the previous nonblank token.
13985         # - For certain tokens, the break may be placed BEFORE the token
13986         #   at index $i, depending on user break preference settings.
13987         # - If a break is made after an opening token, then a break will
13988         #   also be made before the corresponding closing token.
13989
13990         # Returns '$i_nonblank':
13991         #   = index of the token after which the breakpoint was actually placed
13992         #   = undef if breakpoint was not set.
13993         my $i_nonblank;
13994
13995         if ( !defined($i) || $i < 0 ) {
13996
13997             # Calls with bad index $i are harmless but waste time and should
13998             # be caught and eliminated during code development.
13999             if (DEVEL_MODE) {
14000                 my ( $a, $b, $c ) = caller();
14001                 Fault(
14002 "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
14003                 );
14004             }
14005             return;
14006         }
14007
14008         # Break after token $i
14009         $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
14010
14011         # If we break at an opening container..break at the closing
14012         my $set_closing;
14013         if ( defined($i_nonblank)
14014             && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
14015         {
14016             $set_closing = 1;
14017             $self->set_closing_breakpoint($i_nonblank);
14018         }
14019
14020         DEBUG_FORCE && do {
14021             my ( $a, $b, $c ) = caller();
14022             my $msg =
14023 "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
14024             if ( !defined($i_nonblank) ) {
14025                 $i = "" unless defined($i);
14026                 $msg .= " but could not set break after i='$i'\n";
14027             }
14028             else {
14029                 $msg .= <<EOM;
14030 set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
14031 EOM
14032                 if ( defined($set_closing) ) {
14033                     $msg .=
14034 " Also set closing breakpoint corresponding to this token\n";
14035                 }
14036             }
14037             print STDOUT $msg;
14038         };
14039
14040         return $i_nonblank;
14041     }
14042
14043     sub set_forced_breakpoint_AFTER {
14044         my ( $self, $i ) = @_;
14045
14046         # This routine is only called by sub set_forced_breakpoint and
14047         # sub set_closing_breakpoint.
14048
14049         # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
14050
14051         # Exceptions:
14052         # - If the token at index $i is a blank, backup to $i-1 to
14053         #   get to the previous nonblank token.
14054         # - For certain tokens, the break may be placed BEFORE the token
14055         #   at index $i, depending on user break preference settings.
14056
14057         # Returns:
14058         #   - the index of the token after which the break was set, or
14059         #   - undef if no break was set
14060
14061         return unless ( defined($i) && $i >= 0 );
14062
14063         # Back up at a blank so we have a token to examine.
14064         # This was added to fix for cases like b932 involving an '=' break.
14065         if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
14066
14067         # Never break between welded tokens
14068         return
14069           if ( $total_weld_count
14070             && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
14071
14072         my $token = $tokens_to_go[$i];
14073         my $type  = $types_to_go[$i];
14074
14075         # For certain tokens, use user settings to decide if we break before or
14076         # after it
14077         if ( $break_before_or_after_token{$token}
14078             && ( $type eq $token || $type eq 'k' ) )
14079         {
14080             if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14081         }
14082
14083         # breaks are forced before 'if' and 'unless'
14084         elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
14085
14086         if ( $i >= 0 && $i <= $max_index_to_go ) {
14087             my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14088
14089             if (   $i_nonblank >= 0
14090                 && $nobreak_to_go[$i_nonblank] == 0
14091                 && !$forced_breakpoint_to_go[$i_nonblank] )
14092             {
14093                 $forced_breakpoint_to_go[$i_nonblank] = 1;
14094
14095                 if ( $i_nonblank > $index_max_forced_break ) {
14096                     $index_max_forced_break = $i_nonblank;
14097                 }
14098                 $forced_breakpoint_count++;
14099                 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
14100                   = $i_nonblank;
14101
14102                 # success
14103                 return $i_nonblank;
14104             }
14105         }
14106         return;
14107     }
14108
14109     sub clear_breakpoint_undo_stack {
14110         my ($self) = @_;
14111         $forced_breakpoint_undo_count = 0;
14112         return;
14113     }
14114
14115     use constant DEBUG_UNDOBP => 0;
14116
14117     sub undo_forced_breakpoint_stack {
14118
14119         my ( $self, $i_start ) = @_;
14120
14121         # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
14122         # remove all breakpoints from the top of the 'undo stack' down to and
14123         # including index $i_start.
14124
14125         # The 'undo stack' is a stack of all breakpoints made for a batch of
14126         # code.
14127
14128         if ( $i_start < 0 ) {
14129             $i_start = 0;
14130             my ( $a, $b, $c ) = caller();
14131
14132             # Bad call, can only be due to a recent programming change.
14133             Fault(
14134 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
14135             ) if (DEVEL_MODE);
14136             return;
14137         }
14138
14139         while ( $forced_breakpoint_undo_count > $i_start ) {
14140             my $i =
14141               $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14142             if ( $i >= 0 && $i <= $max_index_to_go ) {
14143                 $forced_breakpoint_to_go[$i] = 0;
14144                 $forced_breakpoint_count--;
14145
14146                 DEBUG_UNDOBP && do {
14147                     my ( $a, $b, $c ) = caller();
14148                     print STDOUT
14149 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
14150                 };
14151             }
14152
14153             # shouldn't happen, but not a critical error
14154             else {
14155                 DEBUG_UNDOBP && do {
14156                     my ( $a, $b, $c ) = caller();
14157                     print STDOUT
14158 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
14159                 };
14160             }
14161         }
14162         return;
14163     }
14164 } ## end closure set_forced_breakpoint
14165
14166 {    ## begin closure set_closing_breakpoint
14167
14168     my %postponed_breakpoint;
14169
14170     sub initialize_postponed_breakpoint {
14171         %postponed_breakpoint = ();
14172         return;
14173     }
14174
14175     sub has_postponed_breakpoint {
14176         my ($seqno) = @_;
14177         return $postponed_breakpoint{$seqno};
14178     }
14179
14180     sub set_closing_breakpoint {
14181
14182         # set a breakpoint at a matching closing token
14183         my ( $self, $i_break ) = @_;
14184
14185         if ( $mate_index_to_go[$i_break] >= 0 ) {
14186
14187             # Don't reduce the '2' in the statement below.
14188             # Test files: attrib.t, BasicLyx.pm.html
14189             if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
14190
14191              # break before } ] and ), but sub set_forced_breakpoint will decide
14192              # to break before or after a ? and :
14193                 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
14194                 $self->set_forced_breakpoint_AFTER(
14195                     $mate_index_to_go[$i_break] - $inc );
14196             }
14197         }
14198         else {
14199             my $type_sequence = $type_sequence_to_go[$i_break];
14200             if ($type_sequence) {
14201                 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
14202                 $postponed_breakpoint{$type_sequence} = 1;
14203             }
14204         }
14205         return;
14206     }
14207 } ## end closure set_closing_breakpoint
14208
14209 #########################################
14210 # CODE SECTION 9: Process batches of code
14211 #########################################
14212
14213 {    ## begin closure grind_batch_of_CODE
14214
14215     # The routines in this closure begin the processing of a 'batch' of code.
14216
14217     # A variable to keep track of consecutive nonblank lines so that we can
14218     # insert occasional blanks
14219     my @nonblank_lines_at_depth;
14220
14221     # A variable to remember maximum size of previous batches; this is needed
14222     # by the logical padding routine
14223     my $peak_batch_size;
14224     my $batch_count;
14225
14226     # variables to keep track of unbalanced containers.
14227     my %saved_opening_indentation;
14228     my @unmatched_opening_indexes_in_this_batch;
14229
14230     sub initialize_grind_batch_of_CODE {
14231         @nonblank_lines_at_depth   = ();
14232         $peak_batch_size           = 0;
14233         $batch_count               = 0;
14234         %saved_opening_indentation = ();
14235         return;
14236     }
14237
14238     # sub grind_batch_of_CODE receives sections of code which are the longest
14239     # possible lines without a break.  In other words, it receives what is left
14240     # after applying all breaks forced by blank lines, block comments, side
14241     # comments, pod text, and structural braces.  Its job is to break this code
14242     # down into smaller pieces, if necessary, which fit within the maximum
14243     # allowed line length.  Then it sends the resulting lines of code on down
14244     # the pipeline to the VerticalAligner package, breaking the code into
14245     # continuation lines as necessary.  The batch of tokens are in the "to_go"
14246     # arrays.  The name 'grind' is slightly suggestive of a machine continually
14247     # breaking down long lines of code, but mainly it is unique and easy to
14248     # remember and find with an editor search.
14249
14250     # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
14251     # together in the following way:
14252
14253     # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
14254     # combines them into the largest sequences of tokens which might form a new
14255     # line.
14256     # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
14257     # lines.
14258
14259     # So sub 'process_line_of_CODE' builds up the longest possible continouus
14260     # sequences of tokens, regardless of line length, and then
14261     # grind_batch_of_CODE breaks these sequences back down into the new output
14262     # lines.
14263
14264     # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
14265
14266     use constant DEBUG_GRIND => 0;
14267
14268     sub check_grind_input {
14269
14270         # Check for valid input to sub grind_batch_of_CODE.  An error here
14271         # would most likely be due to an error in 'sub store_token_to_go'.
14272         my ($self) = @_;
14273
14274         # Be sure there are tokens in the batch
14275         if ( $max_index_to_go < 0 ) {
14276             Fault(<<EOM);
14277 sub grind incorrectly called with max_index_to_go=$max_index_to_go
14278 EOM
14279         }
14280         my $Klimit = $self->[_Klimit_];
14281
14282         # The local batch tokens must be a continous part of the global token
14283         # array.
14284         my $KK;
14285         foreach my $ii ( 0 .. $max_index_to_go ) {
14286
14287             my $Km = $KK;
14288
14289             $KK = $K_to_go[$ii];
14290             if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
14291                 $KK = '(undef)' unless defined($KK);
14292                 Fault(<<EOM);
14293 at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
14294 EOM
14295             }
14296
14297             if ( $ii > 0 && $KK != $Km + 1 ) {
14298                 my $im = $ii - 1;
14299                 Fault(<<EOM);
14300 Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK;  expecting K = Km+1
14301 EOM
14302             }
14303         }
14304         return;
14305     }
14306
14307     sub grind_batch_of_CODE {
14308
14309         my ($self) = @_;
14310
14311         my $this_batch = $self->[_this_batch_];
14312         $batch_count++;
14313
14314         $self->check_grind_input() if (DEVEL_MODE);
14315
14316         # This routine is only called from sub flush_batch_of_code, so that
14317         # routine is a better spot for debugging.
14318         DEBUG_GRIND && do {
14319             my $token = my $type = "";
14320             if ( $max_index_to_go >= 0 ) {
14321                 $token = $tokens_to_go[$max_index_to_go];
14322                 $type  = $types_to_go[$max_index_to_go];
14323             }
14324             my $output_str = "";
14325             if ( $max_index_to_go > 20 ) {
14326                 my $mm = $max_index_to_go - 10;
14327                 $output_str = join( "", @tokens_to_go[ 0 .. 10 ] ) . " ... "
14328                   . join( "", @tokens_to_go[ $mm .. $max_index_to_go ] );
14329             }
14330             else {
14331                 $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
14332             }
14333             print STDERR <<EOM;
14334 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
14335 $output_str
14336 EOM
14337         };
14338
14339         return if ( $max_index_to_go < 0 );
14340
14341         $self->set_lp_indentation()
14342           if ($rOpts_line_up_parentheses);
14343
14344         #----------------------------
14345         # Shortcut for block comments
14346         #----------------------------
14347         if (
14348                $max_index_to_go == 0
14349             && $types_to_go[0] eq '#'
14350
14351             # this shortcut does not work for -lp yet
14352             && !$rOpts_line_up_parentheses
14353           )
14354         {
14355             my $ibeg = 0;
14356             $this_batch->[_ri_first_]                 = [$ibeg];
14357             $this_batch->[_ri_last_]                  = [$ibeg];
14358             $this_batch->[_peak_batch_size_]          = $peak_batch_size;
14359             $this_batch->[_do_not_pad_]               = 0;
14360             $this_batch->[_batch_count_]              = $batch_count;
14361             $this_batch->[_rix_seqno_controlling_ci_] = [];
14362
14363             $self->convey_batch_to_vertical_aligner();
14364
14365             my $level = $levels_to_go[$ibeg];
14366             $self->[_last_last_line_leading_level_] =
14367               $self->[_last_line_leading_level_];
14368             $self->[_last_line_leading_type_]  = $types_to_go[$ibeg];
14369             $self->[_last_line_leading_level_] = $level;
14370             $nonblank_lines_at_depth[$level]   = 1;
14371             return;
14372         }
14373
14374         #-------------
14375         # Normal route
14376         #-------------
14377
14378         my $rLL                      = $self->[_rLL_];
14379         my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
14380         my $rwant_container_open     = $self->[_rwant_container_open_];
14381
14382         my $starting_in_quote       = $this_batch->[_starting_in_quote_];
14383         my $ending_in_quote         = $this_batch->[_ending_in_quote_];
14384         my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
14385
14386         #-------------------------------------------------------
14387         # Loop over the batch to initialize some batch variables
14388         #-------------------------------------------------------
14389         my $comma_count_in_batch = 0;
14390         my $ilast_nonblank       = -1;
14391         my @colon_list;
14392         my @ix_seqno_controlling_ci;
14393         my %comma_arrow_count           = ();
14394         my $comma_arrow_count_contained = 0;
14395         my @unmatched_closing_indexes_in_this_batch;
14396
14397         @unmatched_opening_indexes_in_this_batch = ();
14398
14399         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
14400             $bond_strength_to_go[$i] = 0;
14401             $iprev_to_go[$i]         = $ilast_nonblank;
14402             $inext_to_go[$i]         = $i + 1;
14403
14404             my $type = $types_to_go[$i];
14405             if ( $type ne 'b' ) {
14406                 if ( $ilast_nonblank >= 0 ) {
14407                     $inext_to_go[$ilast_nonblank] = $i;
14408
14409                     # just in case there are two blanks in a row (shouldn't
14410                     # happen)
14411                     if ( ++$ilast_nonblank < $i ) {
14412                         $inext_to_go[$ilast_nonblank] = $i;
14413                     }
14414                 }
14415                 $ilast_nonblank = $i;
14416
14417                 # This is a good spot to efficiently collect information needed
14418                 # for breaking lines...
14419
14420                 # gather info needed by sub break_long_lines
14421                 if ( $type_sequence_to_go[$i] ) {
14422                     my $seqno = $type_sequence_to_go[$i];
14423                     my $token = $tokens_to_go[$i];
14424
14425                     # remember indexes of any tokens controlling xci
14426                     # in this batch. This list is needed by sub undo_ci.
14427                     if ( $ris_seqno_controlling_ci->{$seqno} ) {
14428                         push @ix_seqno_controlling_ci, $i;
14429                     }
14430
14431                     if ( $is_opening_sequence_token{$token} ) {
14432                         if ( $rwant_container_open->{$seqno} ) {
14433                             $self->set_forced_breakpoint($i);
14434                         }
14435                         push @unmatched_opening_indexes_in_this_batch, $i;
14436                         if ( $type eq '?' ) {
14437                             push @colon_list, $type;
14438                         }
14439                     }
14440                     elsif ( $is_closing_sequence_token{$token} ) {
14441
14442                         if ( $i > 0 && $rwant_container_open->{$seqno} ) {
14443                             $self->set_forced_breakpoint( $i - 1 );
14444                         }
14445
14446                         my $i_mate =
14447                           pop @unmatched_opening_indexes_in_this_batch;
14448                         if ( defined($i_mate) && $i_mate >= 0 ) {
14449                             if ( $type_sequence_to_go[$i_mate] ==
14450                                 $type_sequence_to_go[$i] )
14451                             {
14452                                 $mate_index_to_go[$i]      = $i_mate;
14453                                 $mate_index_to_go[$i_mate] = $i;
14454                                 my $seqno = $type_sequence_to_go[$i];
14455                                 if ( $comma_arrow_count{$seqno} ) {
14456                                     $comma_arrow_count_contained +=
14457                                       $comma_arrow_count{$seqno};
14458                                 }
14459                             }
14460                             else {
14461                                 push @unmatched_opening_indexes_in_this_batch,
14462                                   $i_mate;
14463                                 push @unmatched_closing_indexes_in_this_batch,
14464                                   $i;
14465                             }
14466                         }
14467                         else {
14468                             push @unmatched_closing_indexes_in_this_batch, $i;
14469                         }
14470                         if ( $type eq ':' ) {
14471                             push @colon_list, $type;
14472                         }
14473                     } ## end elsif ( $is_closing_sequence_token...)
14474
14475                 } ## end if ($seqno)
14476
14477                 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
14478                 elsif ( $tokens_to_go[$i] eq '=>' ) {
14479                     if (@unmatched_opening_indexes_in_this_batch) {
14480                         my $j = $unmatched_opening_indexes_in_this_batch[-1];
14481                         my $seqno = $type_sequence_to_go[$j];
14482                         $comma_arrow_count{$seqno}++;
14483                     }
14484                 }
14485             } ## end if ( $type ne 'b' )
14486         } ## end for ( my $i = 0 ; $i <=...)
14487
14488         my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
14489           @unmatched_closing_indexes_in_this_batch;
14490
14491         #------------------------
14492         # Set special breakpoints
14493         #------------------------
14494         # If this line ends in a code block brace, set breaks at any
14495         # previous closing code block braces to breakup a chain of code
14496         # blocks on one line.  This is very rare but can happen for
14497         # user-defined subs.  For example we might be looking at this:
14498         #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
14499         my $saw_good_break = 0;    # flag to force breaks even if short line
14500         if (
14501
14502             # looking for opening or closing block brace
14503             $block_type_to_go[$max_index_to_go]
14504
14505             # never any good breaks if just one token
14506             && $max_index_to_go > 0
14507
14508             # but not one of these which are never duplicated on a line:
14509             # until|while|for|if|elsif|else
14510             && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
14511             }
14512           )
14513         {
14514             my $lev = $nesting_depth_to_go[$max_index_to_go];
14515
14516             # Walk backwards from the end and
14517             # set break at any closing block braces at the same level.
14518             # But quit if we are not in a chain of blocks.
14519             for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
14520                 last if ( $levels_to_go[$i] < $lev );   # stop at a lower level
14521                 next if ( $levels_to_go[$i] > $lev );   # skip past higher level
14522
14523                 if ( $block_type_to_go[$i] ) {
14524                     if ( $tokens_to_go[$i] eq '}' ) {
14525                         $self->set_forced_breakpoint($i);
14526                         $saw_good_break = 1;
14527                     }
14528                 }
14529
14530                 # quit if we see anything besides words, function, blanks
14531                 # at this level
14532                 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
14533             }
14534         }
14535
14536         #-----------------------------------------------
14537         # insertion of any blank lines before this batch
14538         #-----------------------------------------------
14539
14540         my $imin = 0;
14541         my $imax = $max_index_to_go;
14542
14543         # trim any blank tokens
14544         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14545         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14546
14547         if ( $imin > $imax ) {
14548             if (DEVEL_MODE) {
14549                 my $K0  = $K_to_go[0];
14550                 my $lno = "";
14551                 if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
14552                 Fault(<<EOM);
14553 Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
14554 EOM
14555             }
14556             return;
14557         }
14558
14559         my $last_line_leading_type  = $self->[_last_line_leading_type_];
14560         my $last_line_leading_level = $self->[_last_line_leading_level_];
14561         my $last_last_line_leading_level =
14562           $self->[_last_last_line_leading_level_];
14563
14564         # add a blank line before certain key types but not after a comment
14565         if ( $last_line_leading_type ne '#' ) {
14566             my $want_blank    = 0;
14567             my $leading_token = $tokens_to_go[$imin];
14568             my $leading_type  = $types_to_go[$imin];
14569
14570             # blank lines before subs except declarations and one-liners
14571             if ( $leading_type eq 'i' ) {
14572                 if (
14573
14574                     # quick check
14575                     (
14576                         substr( $leading_token, 0, 3 ) eq 'sub'
14577                         || $rOpts_sub_alias_list
14578                     )
14579
14580                     # slow check
14581                     && $leading_token =~ /$SUB_PATTERN/
14582                   )
14583                 {
14584                     $want_blank = $rOpts->{'blank-lines-before-subs'}
14585                       if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
14586                 }
14587
14588                 # break before all package declarations
14589                 elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
14590                     $want_blank = $rOpts->{'blank-lines-before-packages'};
14591                 }
14592             }
14593
14594             # break before certain key blocks except one-liners
14595             if ( $leading_type eq 'k' ) {
14596                 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
14597                     $want_blank = $rOpts->{'blank-lines-before-subs'}
14598                       if ( terminal_type_i( $imin, $imax ) ne '}' );
14599                 }
14600
14601                 # Break before certain block types if we haven't had a
14602                 # break at this level for a while.  This is the
14603                 # difficult decision..
14604                 elsif ($last_line_leading_type ne 'b'
14605                     && $is_if_unless_while_until_for_foreach{$leading_token} )
14606                 {
14607                     my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
14608                     if ( !defined($lc) ) { $lc = 0 }
14609
14610                     # patch for RT #128216: no blank line inserted at a level
14611                     # change
14612                     if ( $levels_to_go[$imin] != $last_line_leading_level ) {
14613                         $lc = 0;
14614                     }
14615
14616                     $want_blank =
14617                          $rOpts->{'blanks-before-blocks'}
14618                       && $lc >= $rOpts->{'long-block-line-count'}
14619                       && $self->consecutive_nonblank_lines() >=
14620                       $rOpts->{'long-block-line-count'}
14621                       && terminal_type_i( $imin, $imax ) ne '}';
14622                 }
14623             }
14624
14625             # Check for blank lines wanted before a closing brace
14626             if ( $leading_token eq '}' ) {
14627                 if (   $rOpts->{'blank-lines-before-closing-block'}
14628                     && $block_type_to_go[$imin]
14629                     && $block_type_to_go[$imin] =~
14630                     /$blank_lines_before_closing_block_pattern/ )
14631                 {
14632                     my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
14633                     if ( $nblanks > $want_blank ) {
14634                         $want_blank = $nblanks;
14635                     }
14636                 }
14637             }
14638
14639             if ($want_blank) {
14640
14641                 # future: send blank line down normal path to VerticalAligner
14642                 $self->flush_vertical_aligner();
14643                 my $file_writer_object = $self->[_file_writer_object_];
14644                 $file_writer_object->require_blank_code_lines($want_blank);
14645             }
14646         }
14647
14648         # update blank line variables and count number of consecutive
14649         # non-blank, non-comment lines at this level
14650         $last_last_line_leading_level = $last_line_leading_level;
14651         $last_line_leading_level      = $levels_to_go[$imin];
14652         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
14653         $last_line_leading_type = $types_to_go[$imin];
14654         if (   $last_line_leading_level == $last_last_line_leading_level
14655             && $last_line_leading_type ne 'b'
14656             && $last_line_leading_type ne '#'
14657             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
14658         {
14659             $nonblank_lines_at_depth[$last_line_leading_level]++;
14660         }
14661         else {
14662             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
14663         }
14664
14665         $self->[_last_line_leading_type_]       = $last_line_leading_type;
14666         $self->[_last_line_leading_level_]      = $last_line_leading_level;
14667         $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
14668
14669         #--------------------------
14670         # scan lists and long lines
14671         #--------------------------
14672
14673         # Flag to remember if we called sub 'pad_array_to_go'.
14674         # Some routines (break_lists(), break_long_lines() ) need some
14675         # extra tokens added at the end of the batch.  Most batches do not
14676         # use these routines, so we will avoid calling 'pad_array_to_go'
14677         # unless it is needed.
14678         my $called_pad_array_to_go;
14679
14680         # set all forced breakpoints for good list formatting
14681         my $is_long_line = $max_index_to_go > 0
14682           && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
14683
14684         my $old_line_count_in_batch = 1;
14685         if ( $max_index_to_go > 0 ) {
14686             my $Kbeg = $K_to_go[0];
14687             my $Kend = $K_to_go[$max_index_to_go];
14688             $old_line_count_in_batch +=
14689               $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
14690         }
14691
14692         if (
14693                $is_long_line
14694             || $old_line_count_in_batch > 1
14695
14696             # must always call break_lists() with unbalanced batches because
14697             # it is maintaining some stacks
14698             || $is_unbalanced_batch
14699
14700             # call break_lists if we might want to break at commas
14701             || (
14702                 $comma_count_in_batch
14703                 && (   $rOpts_maximum_fields_per_table > 0
14704                     && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
14705                     || $rOpts_comma_arrow_breakpoints == 0 )
14706             )
14707
14708             # call break_lists if user may want to break open some one-line
14709             # hash references
14710             || (   $comma_arrow_count_contained
14711                 && $rOpts_comma_arrow_breakpoints != 3 )
14712           )
14713         {
14714             # add a couple of extra terminal blank tokens
14715             $self->pad_array_to_go();
14716             $called_pad_array_to_go = 1;
14717
14718             my $sgb = $self->break_lists($is_long_line);
14719             $saw_good_break ||= $sgb;
14720         }
14721
14722         # let $ri_first and $ri_last be references to lists of
14723         # first and last tokens of line fragments to output..
14724         my ( $ri_first, $ri_last );
14725
14726         #-------------------------
14727         # write a single line if..
14728         #-------------------------
14729         if (
14730
14731             # we aren't allowed to add any newlines
14732             !$rOpts_add_newlines
14733
14734             # or,
14735             || (
14736
14737                 # this line is 'short'
14738                 !$is_long_line
14739
14740                 # and we didn't see a good breakpoint
14741                 && !$saw_good_break
14742
14743                 # and we don't already have an interior breakpoint
14744                 && !get_forced_breakpoint_count()
14745             )
14746           )
14747         {
14748             @{$ri_first} = ($imin);
14749             @{$ri_last}  = ($imax);
14750         }
14751
14752         #-----------------------------
14753         # otherwise use multiple lines
14754         #-----------------------------
14755         else {
14756
14757             # add a couple of extra terminal blank tokens if we haven't
14758             # already done so
14759             $self->pad_array_to_go() unless ($called_pad_array_to_go);
14760
14761             ( $ri_first, $ri_last ) =
14762               $self->break_long_lines( $saw_good_break, \@colon_list );
14763
14764             $self->break_all_chain_tokens( $ri_first, $ri_last );
14765
14766             $self->break_equals( $ri_first, $ri_last );
14767
14768             # now we do a correction step to clean this up a bit
14769             # (The only time we would not do this is for debugging)
14770             $self->recombine_breakpoints( $ri_first, $ri_last )
14771               if ( $rOpts_recombine && @{$ri_first} > 1 );
14772
14773             $self->insert_final_ternary_breaks( $ri_first, $ri_last )
14774               if (@colon_list);
14775         }
14776
14777         $self->insert_breaks_before_list_opening_containers( $ri_first,
14778             $ri_last )
14779           if ( %break_before_container_types && $max_index_to_go > 0 );
14780
14781         #-------------------
14782         # -lp corrector step
14783         #-------------------
14784         my $do_not_pad = 0;
14785         if ($rOpts_line_up_parentheses) {
14786             $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last );
14787         }
14788
14789         #--------------------------
14790         # unmask phantom semicolons
14791         #--------------------------
14792         if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
14793             my $i       = $imax;
14794             my $tok     = ';';
14795             my $tok_len = 1;
14796             if ( $want_left_space{';'} != WS_NO ) {
14797                 $tok     = ' ;';
14798                 $tok_len = 2;
14799             }
14800             $tokens_to_go[$i]        = $tok;
14801             $token_lengths_to_go[$i] = $tok_len;
14802             my $KK = $K_to_go[$i];
14803             $rLL->[$KK]->[_TOKEN_]        = $tok;
14804             $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
14805             my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
14806             $self->note_added_semicolon($line_number);
14807
14808             foreach ( $imax .. $max_index_to_go ) {
14809                 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
14810             }
14811         }
14812
14813         if ( $rOpts_one_line_block_semicolons == 0 ) {
14814             $self->delete_one_line_semicolons( $ri_first, $ri_last );
14815         }
14816
14817         #--------------------
14818         # ship this batch out
14819         #--------------------
14820         $this_batch->[_ri_first_]                 = $ri_first;
14821         $this_batch->[_ri_last_]                  = $ri_last;
14822         $this_batch->[_peak_batch_size_]          = $peak_batch_size;
14823         $this_batch->[_do_not_pad_]               = $do_not_pad;
14824         $this_batch->[_batch_count_]              = $batch_count;
14825         $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
14826
14827         $self->convey_batch_to_vertical_aligner();
14828
14829         #-------------------------------------------------------------------
14830         # Write requested number of blank lines after an opening block brace
14831         #-------------------------------------------------------------------
14832         if ($rOpts_blank_lines_after_opening_block) {
14833             my $iterm = $imax;
14834             if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
14835                 $iterm -= 1;
14836                 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
14837                     $iterm -= 1;
14838                 }
14839             }
14840
14841             if (   $types_to_go[$iterm] eq '{'
14842                 && $block_type_to_go[$iterm]
14843                 && $block_type_to_go[$iterm] =~
14844                 /$blank_lines_after_opening_block_pattern/ )
14845             {
14846                 my $nblanks = $rOpts_blank_lines_after_opening_block;
14847                 $self->flush_vertical_aligner();
14848                 my $file_writer_object = $self->[_file_writer_object_];
14849                 $file_writer_object->require_blank_code_lines($nblanks);
14850             }
14851         }
14852
14853         # Remember the largest batch size processed. This is needed by the
14854         # logical padding routine to avoid padding the first nonblank token
14855         if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
14856             $peak_batch_size = $max_index_to_go;
14857         }
14858
14859         return;
14860     }
14861
14862     sub save_opening_indentation {
14863
14864         # This should be called after each batch of tokens is output. It
14865         # saves indentations of lines of all unmatched opening tokens.
14866         # These will be used by sub get_opening_indentation.
14867
14868         my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
14869
14870         # QW INDENTATION PATCH 1:
14871         # Also save indentation for multiline qw quotes
14872         my @i_qw;
14873         my $seqno_qw_opening;
14874         if ( $types_to_go[$max_index_to_go] eq 'q' ) {
14875             my $KK = $K_to_go[$max_index_to_go];
14876             $seqno_qw_opening =
14877               $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
14878             if ($seqno_qw_opening) {
14879                 push @i_qw, $max_index_to_go;
14880             }
14881         }
14882
14883         # we need to save indentations of any unmatched opening tokens
14884         # in this batch because we may need them in a subsequent batch.
14885         foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
14886
14887             my $seqno = $type_sequence_to_go[$_];
14888
14889             if ( !$seqno ) {
14890                 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
14891                     $seqno = $seqno_qw_opening;
14892                 }
14893                 else {
14894
14895                     # shouldn't happen
14896                     $seqno = 'UNKNOWN';
14897                 }
14898             }
14899
14900             $saved_opening_indentation{$seqno} = [
14901                 lookup_opening_indentation(
14902                     $_, $ri_first, $ri_last, $rindentation_list
14903                 )
14904             ];
14905         }
14906         return;
14907     }
14908
14909     sub get_saved_opening_indentation {
14910         my ($seqno) = @_;
14911         my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
14912
14913         if ($seqno) {
14914             if ( $saved_opening_indentation{$seqno} ) {
14915                 ( $indent, $offset, $is_leading ) =
14916                   @{ $saved_opening_indentation{$seqno} };
14917                 $exists = 1;
14918             }
14919         }
14920
14921         # some kind of serious error it doesn't exist
14922         # (example is badfile.t)
14923
14924         return ( $indent, $offset, $is_leading, $exists );
14925     }
14926 } ## end closure grind_batch_of_CODE
14927
14928 sub lookup_opening_indentation {
14929
14930     # get the indentation of the line in the current output batch
14931     # which output a selected opening token
14932     #
14933     # given:
14934     #   $i_opening - index of an opening token in the current output batch
14935     #                whose line indentation we need
14936     #   $ri_first - reference to list of the first index $i for each output
14937     #               line in this batch
14938     #   $ri_last - reference to list of the last index $i for each output line
14939     #              in this batch
14940     #   $rindentation_list - reference to a list containing the indentation
14941     #            used for each line.  (NOTE: the first slot in
14942     #            this list is the last returned line number, and this is
14943     #            followed by the list of indentations).
14944     #
14945     # return
14946     #   -the indentation of the line which contained token $i_opening
14947     #   -and its offset (number of columns) from the start of the line
14948
14949     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
14950
14951     if ( !@{$ri_last} ) {
14952
14953         # An error here implies a bug introduced by a recent program change.
14954         # Every batch of code has lines, so this should never happen.
14955         if (DEVEL_MODE) {
14956             Fault("Error in opening_indentation: no lines");
14957         }
14958         return ( 0, 0, 0 );
14959     }
14960
14961     my $nline = $rindentation_list->[0];    # line number of previous lookup
14962
14963     # reset line location if necessary
14964     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
14965
14966     # find the correct line
14967     unless ( $i_opening > $ri_last->[-1] ) {
14968         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
14969     }
14970
14971     # Error - token index is out of bounds - shouldn't happen
14972     # A program bug has been introduced in one of the calling routines.
14973     # We better stop here.
14974     else {
14975         my $i_last_line = $ri_last->[-1];
14976         if (DEVEL_MODE) {
14977             Fault(<<EOM);
14978 Program bug in call to lookup_opening_indentation - index out of range
14979  called with index i_opening=$i_opening  > $i_last_line = max index of last line
14980 This batch has max index = $max_index_to_go,
14981 EOM
14982         }
14983         $nline = $#{$ri_last};
14984     }
14985
14986     $rindentation_list->[0] =
14987       $nline;    # save line number to start looking next call
14988     my $ibeg       = $ri_start->[$nline];
14989     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
14990     my $is_leading = ( $ibeg == $i_opening );
14991     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
14992 }
14993
14994 sub terminal_type_i {
14995
14996     #  returns type of last token on this line (terminal token), as follows:
14997     #  returns # for a full-line comment
14998     #  returns ' ' for a blank line
14999     #  otherwise returns final token type
15000
15001     my ( $ibeg, $iend ) = @_;
15002
15003     # Start at the end and work backwards
15004     my $i      = $iend;
15005     my $type_i = $types_to_go[$i];
15006
15007     # Check for side comment
15008     if ( $type_i eq '#' ) {
15009         $i--;
15010         if ( $i < $ibeg ) {
15011             return wantarray ? ( $type_i, $ibeg ) : $type_i;
15012         }
15013         $type_i = $types_to_go[$i];
15014     }
15015
15016     # Skip past a blank
15017     if ( $type_i eq 'b' ) {
15018         $i--;
15019         if ( $i < $ibeg ) {
15020             return wantarray ? ( $type_i, $ibeg ) : $type_i;
15021         }
15022         $type_i = $types_to_go[$i];
15023     }
15024
15025     # Found it..make sure it is a BLOCK termination,
15026     # but hide a terminal } after sort/map/grep/eval/do because it is not
15027     # necessarily the end of the line.  (terminal.t)
15028     my $block_type = $block_type_to_go[$i];
15029     if (
15030         $type_i eq '}'
15031         && (  !$block_type
15032             || $is_sort_map_grep_eval_do{$block_type} )
15033       )
15034     {
15035         $type_i = 'b';
15036     }
15037     return wantarray ? ( $type_i, $i ) : $type_i;
15038 }
15039
15040 sub pad_array_to_go {
15041
15042     # To simplify coding in break_lists and set_bond_strengths, it helps to
15043     # create some extra blank tokens at the end of the arrays.  We also add
15044     # some undef's to help guard against using invalid data.
15045     my ($self) = @_;
15046     $K_to_go[ $max_index_to_go + 1 ]             = undef;
15047     $tokens_to_go[ $max_index_to_go + 1 ]        = '';
15048     $tokens_to_go[ $max_index_to_go + 2 ]        = '';
15049     $tokens_to_go[ $max_index_to_go + 3 ]        = undef;
15050     $types_to_go[ $max_index_to_go + 1 ]         = 'b';
15051     $types_to_go[ $max_index_to_go + 2 ]         = 'b';
15052     $types_to_go[ $max_index_to_go + 3 ]         = undef;
15053     $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
15054     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
15055       $nesting_depth_to_go[$max_index_to_go];
15056
15057     #    /^[R\}\)\]]$/
15058     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
15059         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
15060
15061             # Nesting depths are set to be >=0 in sub write_line, so it should
15062             # not be possible to get here unless the code has a bracing error
15063             # which leaves a closing brace with zero nesting depth.
15064             unless ( get_saw_brace_error() ) {
15065                 if (DEVEL_MODE) {
15066                     Fault(<<EOM);
15067 Program bug in pad_array_to_go: hit nesting error which should have been caught
15068 EOM
15069                 }
15070             }
15071         }
15072         else {
15073             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
15074         }
15075     }
15076
15077     #       /^[L\{\(\[]$/
15078     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
15079         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
15080     }
15081     return;
15082 }
15083
15084 sub break_all_chain_tokens {
15085
15086     # scan the current breakpoints looking for breaks at certain "chain
15087     # operators" (. : && || + etc) which often occur repeatedly in a long
15088     # statement.  If we see a break at any one, break at all similar tokens
15089     # within the same container.
15090     #
15091     my ( $self, $ri_left, $ri_right ) = @_;
15092
15093     my %saw_chain_type;
15094     my %left_chain_type;
15095     my %right_chain_type;
15096     my %interior_chain_type;
15097     my $nmax = @{$ri_right} - 1;
15098
15099     # scan the left and right end tokens of all lines
15100     my $count = 0;
15101     for my $n ( 0 .. $nmax ) {
15102         my $il    = $ri_left->[$n];
15103         my $ir    = $ri_right->[$n];
15104         my $typel = $types_to_go[$il];
15105         my $typer = $types_to_go[$ir];
15106         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
15107         $typer = '+' if ( $typer eq '-' );
15108         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
15109         $typer = '*' if ( $typer eq '/' );
15110         my $tokenl = $tokens_to_go[$il];
15111         my $tokenr = $tokens_to_go[$ir];
15112
15113         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
15114             next if ( $typel eq '?' );
15115             push @{ $left_chain_type{$typel} }, $il;
15116             $saw_chain_type{$typel} = 1;
15117             $count++;
15118         }
15119         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
15120             next if ( $typer eq '?' );
15121             push @{ $right_chain_type{$typer} }, $ir;
15122             $saw_chain_type{$typer} = 1;
15123             $count++;
15124         }
15125     }
15126     return unless $count;
15127
15128     # now look for any interior tokens of the same types
15129     $count = 0;
15130     for my $n ( 0 .. $nmax ) {
15131         my $il = $ri_left->[$n];
15132         my $ir = $ri_right->[$n];
15133         foreach my $i ( $il + 1 .. $ir - 1 ) {
15134             my $type = $types_to_go[$i];
15135             $type = '+' if ( $type eq '-' );
15136             $type = '*' if ( $type eq '/' );
15137             if ( $saw_chain_type{$type} ) {
15138                 push @{ $interior_chain_type{$type} }, $i;
15139                 $count++;
15140             }
15141         }
15142     }
15143     return unless $count;
15144
15145     # now make a list of all new break points
15146     my @insert_list;
15147
15148     # loop over all chain types
15149     foreach my $type ( keys %saw_chain_type ) {
15150
15151         # quit if just ONE continuation line with leading .  For example--
15152         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
15153         #  . $contents;
15154         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
15155
15156         # loop over all interior chain tokens
15157         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
15158
15159             # loop over all left end tokens of same type
15160             if ( $left_chain_type{$type} ) {
15161                 next if $nobreak_to_go[ $itest - 1 ];
15162                 foreach my $i ( @{ $left_chain_type{$type} } ) {
15163                     next unless $self->in_same_container_i( $i, $itest );
15164                     push @insert_list, $itest - 1;
15165
15166                     # Break at matching ? if this : is at a different level.
15167                     # For example, the ? before $THRf_DEAD in the following
15168                     # should get a break if its : gets a break.
15169                     #
15170                     # my $flags =
15171                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
15172                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
15173                     #   :              $THRf_R_JOINABLE;
15174                     if (   $type eq ':'
15175                         && $levels_to_go[$i] != $levels_to_go[$itest] )
15176                     {
15177                         my $i_question = $mate_index_to_go[$itest];
15178                         if ( $i_question > 0 ) {
15179                             push @insert_list, $i_question - 1;
15180                         }
15181                     }
15182                     last;
15183                 }
15184             }
15185
15186             # loop over all right end tokens of same type
15187             if ( $right_chain_type{$type} ) {
15188                 next if $nobreak_to_go[$itest];
15189                 foreach my $i ( @{ $right_chain_type{$type} } ) {
15190                     next unless $self->in_same_container_i( $i, $itest );
15191                     push @insert_list, $itest;
15192
15193                     # break at matching ? if this : is at a different level
15194                     if (   $type eq ':'
15195                         && $levels_to_go[$i] != $levels_to_go[$itest] )
15196                     {
15197                         my $i_question = $mate_index_to_go[$itest];
15198                         if ( $i_question >= 0 ) {
15199                             push @insert_list, $i_question;
15200                         }
15201                     }
15202                     last;
15203                 }
15204             }
15205         }
15206     }
15207
15208     # insert any new break points
15209     if (@insert_list) {
15210         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15211     }
15212     return;
15213 }
15214
15215 sub insert_additional_breaks {
15216
15217     # this routine will add line breaks at requested locations after
15218     # sub break_long_lines has made preliminary breaks.
15219
15220     my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
15221     my $i_f;
15222     my $i_l;
15223     my $line_number = 0;
15224     foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
15225
15226         next if ( $nobreak_to_go[$i_break_left] );
15227
15228         $i_f = $ri_first->[$line_number];
15229         $i_l = $ri_last->[$line_number];
15230         while ( $i_break_left >= $i_l ) {
15231             $line_number++;
15232
15233             # shouldn't happen unless caller passes bad indexes
15234             if ( $line_number >= @{$ri_last} ) {
15235                 if (DEVEL_MODE) {
15236                     Fault(<<EOM);
15237 Non-fatal program bug: couldn't set break at $i_break_left
15238 EOM
15239                 }
15240                 return;
15241             }
15242             $i_f = $ri_first->[$line_number];
15243             $i_l = $ri_last->[$line_number];
15244         }
15245
15246         # Do not leave a blank at the end of a line; back up if necessary
15247         if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
15248
15249         my $i_break_right = $inext_to_go[$i_break_left];
15250         if (   $i_break_left >= $i_f
15251             && $i_break_left < $i_l
15252             && $i_break_right > $i_f
15253             && $i_break_right <= $i_l )
15254         {
15255             splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
15256             splice( @{$ri_last},  $line_number, 1, ( $i_break_left, $i_l ) );
15257         }
15258     }
15259     return;
15260 }
15261
15262 {    ## begin closure in_same_container_i
15263     my $ris_break_token;
15264     my $ris_comma_token;
15265
15266     BEGIN {
15267
15268         # all cases break on seeing commas at same level
15269         my @q = qw( => );
15270         push @q, ',';
15271         @{$ris_comma_token}{@q} = (1) x scalar(@q);
15272
15273         # Non-ternary text also breaks on seeing any of qw(? : || or )
15274         # Example: we would not want to break at any of these .'s
15275         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
15276         push @q, qw( or || ? : );
15277         @{$ris_break_token}{@q} = (1) x scalar(@q);
15278     }
15279
15280     sub in_same_container_i {
15281
15282         # Check to see if tokens at i1 and i2 are in the same container, and
15283         # not separated by certain characters: => , ? : || or
15284         # This is an interface between the _to_go arrays to the rLL array
15285         my ( $self, $i1, $i2 ) = @_;
15286
15287         # quick check
15288         my $parent_seqno_1 = $parent_seqno_to_go[$i1];
15289         return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
15290
15291         if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
15292         my $K1  = $K_to_go[$i1];
15293         my $K2  = $K_to_go[$i2];
15294         my $rLL = $self->[_rLL_];
15295
15296         my $depth_1 = $nesting_depth_to_go[$i1];
15297         return if ( $depth_1 < 0 );
15298
15299         # Shouldn't happen since i1 and i2 have same parent:
15300         return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
15301
15302         # Select character set to scan for
15303         my $type_1 = $types_to_go[$i1];
15304         my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
15305
15306         # Fast preliminary loop to verify that tokens are in the same container
15307         my $KK = $K1;
15308         while (1) {
15309             $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
15310             last if !defined($KK);
15311             last if ( $KK >= $K2 );
15312             my $ii      = $i1 + $KK - $K1;
15313             my $depth_i = $nesting_depth_to_go[$ii];
15314             return if ( $depth_i < $depth_1 );
15315             next   if ( $depth_i > $depth_1 );
15316             if ( $type_1 ne ':' ) {
15317                 my $tok_i = $tokens_to_go[$ii];
15318                 return if ( $tok_i eq '?' || $tok_i eq ':' );
15319             }
15320         }
15321
15322         # Slow loop checking for certain characters
15323
15324         #-----------------------------------------------------
15325         # This is potentially a slow routine and not critical.
15326         # For safety just give up for large differences.
15327         # See test file 'infinite_loop.txt'
15328         #-----------------------------------------------------
15329         return if ( $i2 - $i1 > 200 );
15330
15331         foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
15332
15333             my $depth_i = $nesting_depth_to_go[$ii];
15334             next   if ( $depth_i > $depth_1 );
15335             return if ( $depth_i < $depth_1 );
15336             my $tok_i = $tokens_to_go[$ii];
15337             return if ( $rbreak->{$tok_i} );
15338         }
15339         return 1;
15340     }
15341 } ## end closure in_same_container_i
15342
15343 sub break_equals {
15344
15345     # Look for assignment operators that could use a breakpoint.
15346     # For example, in the following snippet
15347     #
15348     #    $HOME = $ENV{HOME}
15349     #      || $ENV{LOGDIR}
15350     #      || $pw[7]
15351     #      || die "no home directory for user $<";
15352     #
15353     # we could break at the = to get this, which is a little nicer:
15354     #    $HOME =
15355     #         $ENV{HOME}
15356     #      || $ENV{LOGDIR}
15357     #      || $pw[7]
15358     #      || die "no home directory for user $<";
15359     #
15360     # The logic here follows the logic in set_logical_padding, which
15361     # will add the padding in the second line to improve alignment.
15362     #
15363     my ( $self, $ri_left, $ri_right ) = @_;
15364     my $nmax = @{$ri_right} - 1;
15365     return unless ( $nmax >= 2 );
15366
15367     # scan the left ends of first two lines
15368     my $tokbeg = "";
15369     my $depth_beg;
15370     for my $n ( 1 .. 2 ) {
15371         my $il     = $ri_left->[$n];
15372         my $typel  = $types_to_go[$il];
15373         my $tokenl = $tokens_to_go[$il];
15374
15375         my $has_leading_op = ( $tokenl =~ /^\w/ )
15376           ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
15377           : $is_chain_operator{$typel};    # and, or
15378         return unless ($has_leading_op);
15379         if ( $n > 1 ) {
15380             return
15381               unless ( $tokenl eq $tokbeg
15382                 && $nesting_depth_to_go[$il] eq $depth_beg );
15383         }
15384         $tokbeg    = $tokenl;
15385         $depth_beg = $nesting_depth_to_go[$il];
15386     }
15387
15388     # now look for any interior tokens of the same types
15389     my $il = $ri_left->[0];
15390     my $ir = $ri_right->[0];
15391
15392     # now make a list of all new break points
15393     my @insert_list;
15394     for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
15395         my $type = $types_to_go[$i];
15396         if (   $is_assignment{$type}
15397             && $nesting_depth_to_go[$i] eq $depth_beg )
15398         {
15399             if ( $want_break_before{$type} ) {
15400                 push @insert_list, $i - 1;
15401             }
15402             else {
15403                 push @insert_list, $i;
15404             }
15405         }
15406     }
15407
15408     # Break after a 'return' followed by a chain of operators
15409     #  return ( $^O !~ /win32|dos/i )
15410     #    && ( $^O ne 'VMS' )
15411     #    && ( $^O ne 'OS2' )
15412     #    && ( $^O ne 'MacOS' );
15413     # To give:
15414     #  return
15415     #       ( $^O !~ /win32|dos/i )
15416     #    && ( $^O ne 'VMS' )
15417     #    && ( $^O ne 'OS2' )
15418     #    && ( $^O ne 'MacOS' );
15419     my $i = 0;
15420     if (   $types_to_go[$i] eq 'k'
15421         && $tokens_to_go[$i] eq 'return'
15422         && $ir > $il
15423         && $nesting_depth_to_go[$i] eq $depth_beg )
15424     {
15425         push @insert_list, $i;
15426     }
15427
15428     return unless (@insert_list);
15429
15430     # One final check...
15431     # scan second and third lines and be sure there are no assignments
15432     # we want to avoid breaking at an = to make something like this:
15433     #    unless ( $icon =
15434     #           $html_icons{"$type-$state"}
15435     #        or $icon = $html_icons{$type}
15436     #        or $icon = $html_icons{$state} )
15437     for my $n ( 1 .. 2 ) {
15438         my $il = $ri_left->[$n];
15439         my $ir = $ri_right->[$n];
15440         foreach my $i ( $il + 1 .. $ir ) {
15441             my $type = $types_to_go[$i];
15442             return
15443               if ( $is_assignment{$type}
15444                 && $nesting_depth_to_go[$i] eq $depth_beg );
15445         }
15446     }
15447
15448     # ok, insert any new break point
15449     if (@insert_list) {
15450         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15451     }
15452     return;
15453 }
15454
15455 {    ## begin closure recombine_breakpoints
15456
15457     # This routine is called once per batch to see if it would be better
15458     # to combine some of the lines into which the batch has been broken.
15459
15460     my %is_amp_amp;
15461     my %is_ternary;
15462     my %is_math_op;
15463     my %is_plus_minus;
15464     my %is_mult_div;
15465
15466     BEGIN {
15467
15468         my @q;
15469         @q = qw( && || );
15470         @is_amp_amp{@q} = (1) x scalar(@q);
15471
15472         @q = qw( ? : );
15473         @is_ternary{@q} = (1) x scalar(@q);
15474
15475         @q = qw( + - * / );
15476         @is_math_op{@q} = (1) x scalar(@q);
15477
15478         @q = qw( + - );
15479         @is_plus_minus{@q} = (1) x scalar(@q);
15480
15481         @q = qw( * / );
15482         @is_mult_div{@q} = (1) x scalar(@q);
15483     }
15484
15485     sub Debug_dump_breakpoints {
15486
15487         # Debug routine to dump current breakpoints...not normally called
15488         # We are given indexes to the current lines:
15489         # $ri_beg = ref to array of BEGinning indexes of each line
15490         # $ri_end = ref to array of ENDing indexes of each line
15491         my ( $self, $ri_beg, $ri_end, $msg ) = @_;
15492         print STDERR "----Dumping breakpoints from: $msg----\n";
15493         for my $n ( 0 .. @{$ri_end} - 1 ) {
15494             my $ibeg = $ri_beg->[$n];
15495             my $iend = $ri_end->[$n];
15496             my $text = "";
15497             foreach my $i ( $ibeg .. $iend ) {
15498                 $text .= $tokens_to_go[$i];
15499             }
15500             print STDERR "$n ($ibeg:$iend) $text\n";
15501         }
15502         print STDERR "----\n";
15503         return;
15504     }
15505
15506     sub delete_one_line_semicolons {
15507
15508         my ( $self, $ri_beg, $ri_end ) = @_;
15509         my $rLL                 = $self->[_rLL_];
15510         my $K_opening_container = $self->[_K_opening_container_];
15511
15512         # Walk down the lines of this batch and delete any semicolons
15513         # terminating one-line blocks;
15514         my $nmax = @{$ri_end} - 1;
15515
15516         foreach my $n ( 0 .. $nmax ) {
15517             my $i_beg    = $ri_beg->[$n];
15518             my $i_e      = $ri_end->[$n];
15519             my $K_beg    = $K_to_go[$i_beg];
15520             my $K_e      = $K_to_go[$i_e];
15521             my $K_end    = $K_e;
15522             my $type_end = $rLL->[$K_end]->[_TYPE_];
15523             if ( $type_end eq '#' ) {
15524                 $K_end = $self->K_previous_nonblank($K_end);
15525                 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
15526             }
15527
15528             # we are looking for a line ending in closing brace
15529             next
15530               unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
15531
15532             # ...and preceded by a semicolon on the same line
15533             my $K_semicolon = $self->K_previous_nonblank($K_end);
15534             next unless defined($K_semicolon);
15535             my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
15536             next if ( $i_semicolon <= $i_beg );
15537             next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
15538
15539             # Safety check - shouldn't happen - not critical
15540             # This is not worth throwing a Fault, except in DEVEL_MODE
15541             if ( $types_to_go[$i_semicolon] ne ';' ) {
15542                 DEVEL_MODE
15543                   && Fault("unexpected type looking for semicolon");
15544                 next;
15545             }
15546
15547             # ... with the corresponding opening brace on the same line
15548             my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
15549             my $K_opening     = $K_opening_container->{$type_sequence};
15550             next unless ( defined($K_opening) );
15551             my $i_opening = $i_beg + ( $K_opening - $K_beg );
15552             next if ( $i_opening < $i_beg );
15553
15554             # ... and only one semicolon between these braces
15555             my $semicolon_count = 0;
15556             foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
15557                 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
15558                     $semicolon_count++;
15559                     last;
15560                 }
15561             }
15562             next if ($semicolon_count);
15563
15564             # ...ok, then make the semicolon invisible
15565             my $len = $token_lengths_to_go[$i_semicolon];
15566             $tokens_to_go[$i_semicolon]            = "";
15567             $token_lengths_to_go[$i_semicolon]     = 0;
15568             $rLL->[$K_semicolon]->[_TOKEN_]        = "";
15569             $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
15570             foreach ( $i_semicolon .. $max_index_to_go ) {
15571                 $summed_lengths_to_go[ $_ + 1 ] -= $len;
15572             }
15573         }
15574         return;
15575     }
15576
15577     use constant DEBUG_RECOMBINE => 0;
15578
15579     sub recombine_breakpoints {
15580
15581         # We are given indexes to the current lines:
15582         #  $ri_beg = ref to array of BEGinning indexes of each line
15583         #  $ri_end = ref to array of ENDing indexes of each line
15584         my ( $self, $ri_beg, $ri_end ) = @_;
15585
15586         # sub break_long_lines is very liberal in setting line breaks
15587         # for long lines, always setting breaks at good breakpoints, even
15588         # when that creates small lines.  Sometimes small line fragments
15589         # are produced which would look better if they were combined.
15590         # That's the task of this routine.
15591
15592         # do nothing under extreme stress
15593         return if ( $stress_level_alpha < 1 && !DEVEL_MODE );
15594
15595         my $rK_weld_right = $self->[_rK_weld_right_];
15596         my $rK_weld_left  = $self->[_rK_weld_left_];
15597
15598         my $nmax = @{$ri_end} - 1;
15599         return if ( $nmax <= 0 );
15600
15601         my $nmax_start = $nmax;
15602
15603         # Make a list of all good joining tokens between the lines
15604         # n-1 and n.
15605         my @joint;
15606
15607         # Break the total batch sub-sections with lengths short enough to
15608         # recombine
15609         my $rsections = [];
15610         my $nbeg      = 0;
15611         my $nend;
15612         my $nmax_section = 0;
15613         foreach my $nn ( 1 .. $nmax ) {
15614             my $ibeg_1 = $ri_beg->[ $nn - 1 ];
15615             my $iend_1 = $ri_end->[ $nn - 1 ];
15616             my $iend_2 = $ri_end->[$nn];
15617             my $ibeg_2 = $ri_beg->[$nn];
15618
15619             # Define the joint variable
15620             my ( $itok, $itokp, $itokm );
15621             foreach my $itest ( $iend_1, $ibeg_2 ) {
15622                 my $type = $types_to_go[$itest];
15623                 if (   $is_math_op{$type}
15624                     || $is_amp_amp{$type}
15625                     || $is_assignment{$type}
15626                     || $type eq ':' )
15627                 {
15628                     $itok = $itest;
15629                 }
15630             }
15631             $joint[$nn] = [$itok];
15632
15633             # Update the section list
15634             my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15635             if (
15636                 $excess <= 1
15637
15638                 # The number 5 here is an arbitrary small number intended
15639                 # to keep most small matches in one sub-section.
15640                 || ( defined($nend) && ( $nn < 5 || $nmax - $nn < 5 ) )
15641               )
15642             {
15643                 $nend = $nn;
15644             }
15645             else {
15646                 if ( defined($nend) ) {
15647                     push @{$rsections}, [ $nbeg, $nend ];
15648                     my $num = $nend - $nbeg;
15649                     if ( $num > $nmax_section ) { $nmax_section = $num }
15650                     $nbeg = $nn;
15651                     $nend = undef;
15652                 }
15653                 $nbeg = $nn;
15654             }
15655         }
15656         if ( defined($nend) ) {
15657             push @{$rsections}, [ $nbeg, $nend ];
15658             my $num = $nend - $nbeg;
15659             if ( $num > $nmax_section ) { $nmax_section = $num }
15660         }
15661
15662         my $num_sections = @{$rsections};
15663
15664         # This is potentially an O(n-squared) loop, but not critical, so we can
15665         # put a finite limit on the total number of iterations. This is
15666         # suggested by issue c118, which pushed about 5.e5 lines through here
15667         # and caused an excessive run time.
15668
15669         # Three lines of defence have been put in place to prevent excessive
15670         # run times:
15671         #  1. do nothing if formatting under stress (c118 was under stress)
15672         #  2. break into small sub-sections to decrease the maximum n-squared.
15673         #  3. put a finite limit on the number of iterations.
15674
15675         # Testing shows that most batches only require one or two iterations.
15676         # A very large batch which is broken into sub-sections can require one
15677         # iteration per section.  This suggests the limit here, which allows
15678         # up to 10 iterations plus one pass per sub-section.
15679         my $it_count = 0;
15680         my $it_count_max =
15681           10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
15682
15683         if ( DEBUG_RECOMBINE > 1 ) {
15684             my $max = 0;
15685             print STDERR "-----\n$num_sections sections found for nmax=$nmax\n";
15686             foreach my $sect ( @{$rsections} ) {
15687                 my ( $nbeg, $nend ) = @{$sect};
15688                 my $num = $nend - $nbeg;
15689                 if ( $num > $max ) { $max = $num }
15690                 print STDERR "$nbeg $nend\n";
15691             }
15692             print STDERR "max size=$max of $nmax lines\n";
15693         }
15694
15695         # Loop over all sub-sections.  Note that we have to work backwards
15696         # from the end of the batch since the sections use original line
15697         # numbers, and the line numbers change as we go.
15698         while ( my $section = pop @{$rsections} ) {
15699             my ( $nbeg, $nend ) = @{$section};
15700
15701             # number of ending lines to leave untouched in this pass
15702             $nmax = @{$ri_end} - 1;
15703             my $num_freeze = $nmax - $nend;
15704
15705             my $more_to_do = 1;
15706
15707             # We keep looping over all of the lines of this batch
15708             # until there are no more possible recombinations
15709             my $nmax_last = $nmax + 1;
15710             my $reverse   = 0;
15711
15712             while ($more_to_do) {
15713
15714                 # Safety check for excess total iterations
15715                 $it_count++;
15716                 if ( $it_count > $it_count_max ) {
15717                     goto RETURN;
15718                 }
15719
15720                 my $n_best = 0;
15721                 my $bs_best;
15722                 my $nmax = @{$ri_end} - 1;
15723
15724                 # Safety check for infinite loop: the line count must decrease
15725                 unless ( $nmax < $nmax_last ) {
15726
15727                     # Shouldn't happen because splice below decreases nmax on
15728                     # each iteration.  An error can only be due to a recent
15729                     # programming change.  We better stop here.
15730                     if (DEVEL_MODE) {
15731                         Fault(
15732 "Program bug-infinite loop in recombine breakpoints\n"
15733                         );
15734                     }
15735                     $more_to_do = 0;
15736                     last;
15737                 }
15738                 $nmax_last  = $nmax;
15739                 $more_to_do = 0;
15740                 my $skip_Section_3;
15741                 my $leading_amp_count = 0;
15742                 my $this_line_is_semicolon_terminated;
15743
15744                 # loop over all remaining lines in this batch
15745                 my $nstop = $nmax - $num_freeze;
15746                 for my $iter ( $nbeg + 1 .. $nstop ) {
15747
15748                     # alternating sweep direction gives symmetric results
15749                     # for recombining lines which exceed the line length
15750                     # such as eval {{{{.... }}}}
15751                     my $n;
15752                     if   ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
15753                     else            { $n = $iter }
15754
15755                     #----------------------------------------------------------
15756                     # If we join the current pair of lines,
15757                     # line $n-1 will become the left part of the joined line
15758                     # line $n will become the right part of the joined line
15759                     #
15760                     # Here are Indexes of the endpoint tokens of the two lines:
15761                     #
15762                     #  -----line $n-1--- | -----line $n-----
15763                     #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
15764                     #                    ^
15765                     #                    |
15766                     # We want to decide if we should remove the line break
15767                     # between the tokens at $iend_1 and $ibeg_2
15768                     #
15769                     # We will apply a number of ad-hoc tests to see if joining
15770                     # here will look ok.  The code will just issue a 'next'
15771                     # command if the join doesn't look good.  If we get through
15772                     # the gauntlet of tests, the lines will be recombined.
15773                     #----------------------------------------------------------
15774                     #
15775                     # beginning and ending tokens of the lines we are working on
15776                     my $ibeg_1    = $ri_beg->[ $n - 1 ];
15777                     my $iend_1    = $ri_end->[ $n - 1 ];
15778                     my $iend_2    = $ri_end->[$n];
15779                     my $ibeg_2    = $ri_beg->[$n];
15780                     my $ibeg_nmax = $ri_beg->[$nmax];
15781
15782                     # combined line cannot be too long
15783                     my $excess =
15784                       $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15785                     next if ( $excess > 0 );
15786
15787                     my $type_iend_1 = $types_to_go[$iend_1];
15788                     my $type_iend_2 = $types_to_go[$iend_2];
15789                     my $type_ibeg_1 = $types_to_go[$ibeg_1];
15790                     my $type_ibeg_2 = $types_to_go[$ibeg_2];
15791
15792                     # terminal token of line 2 if any side comment is ignored:
15793                     my $iend_2t      = $iend_2;
15794                     my $type_iend_2t = $type_iend_2;
15795
15796                     # some beginning indexes of other lines, which may not exist
15797                     my $ibeg_0 = $n > 1          ? $ri_beg->[ $n - 2 ] : -1;
15798                     my $ibeg_3 = $n < $nmax      ? $ri_beg->[ $n + 1 ] : -1;
15799                     my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
15800
15801                     my $bs_tweak = 0;
15802
15803                     #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15804                     #        $nesting_depth_to_go[$ibeg_1] );
15805
15806                     DEBUG_RECOMBINE > 1 && do {
15807                         print STDERR
15808 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
15809                     };
15810
15811                     # If line $n is the last line, we set some flags and
15812                     # do any special checks for it
15813                     if ( $n == $nmax ) {
15814
15815                         # a terminal '{' should stay where it is
15816                         # unless preceded by a fat comma
15817                         next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
15818
15819                         if (   $type_iend_2 eq '#'
15820                             && $iend_2 - $ibeg_2 >= 2
15821                             && $types_to_go[ $iend_2 - 1 ] eq 'b' )
15822                         {
15823                             $iend_2t      = $iend_2 - 2;
15824                             $type_iend_2t = $types_to_go[$iend_2t];
15825                         }
15826
15827                         $this_line_is_semicolon_terminated =
15828                           $type_iend_2t eq ';';
15829                     }
15830
15831                     #----------------------------------------------------------
15832                     # Recombine Section 0:
15833                     # Examine the special token joining this line pair, if any.
15834                     # Put as many tests in this section to avoid duplicate code
15835                     # and to make formatting independent of whether breaks are
15836                     # to the left or right of an operator.
15837                     #----------------------------------------------------------
15838
15839                     my ($itok) = @{ $joint[$n] };
15840                     if ($itok) {
15841
15842                         my $type = $types_to_go[$itok];
15843
15844                         if ( $type eq ':' ) {
15845
15846                             # do not join at a colon unless it disobeys the
15847                             # break request
15848                             if ( $itok eq $iend_1 ) {
15849                                 next unless $want_break_before{$type};
15850                             }
15851                             else {
15852                                 $leading_amp_count++;
15853                                 next if $want_break_before{$type};
15854                             }
15855                         } ## end if ':'
15856
15857                         # handle math operators + - * /
15858                         elsif ( $is_math_op{$type} ) {
15859
15860                             # Combine these lines if this line is a single
15861                             # number, or if it is a short term with same
15862                             # operator as the previous line.  For example, in
15863                             # the following code we will combine all of the
15864                             # short terms $A, $B, $C, $D, $E, $F, together
15865                             # instead of leaving them one per line:
15866                             #  my $time =
15867                             #    $A * $B * $C * $D * $E * $F *
15868                             #    ( 2. * $eps * $sigma * $area ) *
15869                             #    ( 1. / $tcold**3 - 1. / $thot**3 );
15870
15871                             # This can be important in math-intensive code.
15872
15873                             my $good_combo;
15874
15875                             my $itokp  = min( $inext_to_go[$itok],  $iend_2 );
15876                             my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
15877                             my $itokm  = max( $iprev_to_go[$itok],  $ibeg_1 );
15878                             my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
15879
15880                             # check for a number on the right
15881                             if ( $types_to_go[$itokp] eq 'n' ) {
15882
15883                                 # ok if nothing else on right
15884                                 if ( $itokp == $iend_2 ) {
15885                                     $good_combo = 1;
15886                                 }
15887                                 else {
15888
15889                                     # look one more token to right..
15890                                     # okay if math operator or some termination
15891                                     $good_combo =
15892                                       ( ( $itokpp == $iend_2 )
15893                                           && $is_math_op{ $types_to_go[$itokpp]
15894                                           } )
15895                                       || $types_to_go[$itokpp] =~ /^[#,;]$/;
15896                                 }
15897                             }
15898
15899                             # check for a number on the left
15900                             if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
15901
15902                                 # okay if nothing else to left
15903                                 if ( $itokm == $ibeg_1 ) {
15904                                     $good_combo = 1;
15905                                 }
15906
15907                                 # otherwise look one more token to left
15908                                 else {
15909
15910                                    # okay if math operator, comma, or assignment
15911                                     $good_combo = ( $itokmm == $ibeg_1 )
15912                                       && ( $is_math_op{ $types_to_go[$itokmm] }
15913                                         || $types_to_go[$itokmm] =~ /^[,]$/
15914                                         || $is_assignment{ $types_to_go[$itokmm]
15915                                         } );
15916                                 }
15917                             }
15918
15919                             # look for a single short token either side of the
15920                             # operator
15921                             if ( !$good_combo ) {
15922
15923                                 # Slight adjustment factor to make results
15924                                 # independent of break before or after operator
15925                                 # in long summed lists.  (An operator and a
15926                                 # space make two spaces).
15927                                 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
15928
15929                                 $good_combo =
15930
15931                                   # numbers or id's on both sides of this joint
15932                                   $types_to_go[$itokp] =~ /^[in]$/
15933                                   && $types_to_go[$itokm] =~ /^[in]$/
15934
15935                                   # one of the two lines must be short:
15936                                   && (
15937                                     (
15938                                         # no more than 2 nonblank tokens right
15939                                         # of joint
15940                                         $itokpp == $iend_2
15941
15942                                         # short
15943                                         && token_sequence_length(
15944                                             $itokp, $iend_2
15945                                         ) < $two +
15946                                         $rOpts_short_concatenation_item_length
15947                                     )
15948                                     || (
15949                                         # no more than 2 nonblank tokens left of
15950                                         # joint
15951                                         $itokmm == $ibeg_1
15952
15953                                         # short
15954                                         && token_sequence_length(
15955                                             $ibeg_1, $itokm
15956                                         ) < 2 - $two +
15957                                         $rOpts_short_concatenation_item_length
15958                                     )
15959
15960                                   )
15961
15962                                   # keep pure terms; don't mix +- with */
15963                                   && !(
15964                                     $is_plus_minus{$type}
15965                                     && (   $is_mult_div{ $types_to_go[$itokmm] }
15966                                         || $is_mult_div{ $types_to_go[$itokpp] }
15967                                     )
15968                                   )
15969                                   && !(
15970                                     $is_mult_div{$type}
15971                                     && ( $is_plus_minus{ $types_to_go[$itokmm] }
15972                                         || $is_plus_minus{ $types_to_go[$itokpp]
15973                                         } )
15974                                   )
15975
15976                                   ;
15977                             }
15978
15979                             # it is also good to combine if we can reduce to 2
15980                             # lines
15981                             if ( !$good_combo ) {
15982
15983                                 # index on other line where same token would be
15984                                 # in a long chain.
15985                                 my $iother =
15986                                   ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
15987
15988                                 $good_combo =
15989                                      $n == 2
15990                                   && $n == $nmax
15991                                   && $types_to_go[$iother] ne $type;
15992                             }
15993
15994                             next unless ($good_combo);
15995
15996                         } ## end math
15997
15998                         elsif ( $is_amp_amp{$type} ) {
15999                             ##TBD
16000                         } ## end &&, ||
16001
16002                         elsif ( $is_assignment{$type} ) {
16003                             ##TBD
16004                         } ## end assignment
16005                     }
16006
16007                     #----------------------------------------------------------
16008                     # Recombine Section 1:
16009                     # Join welded nested containers immediately
16010                     #----------------------------------------------------------
16011
16012                     if (
16013                         $total_weld_count
16014                         && ( $type_sequence_to_go[$iend_1]
16015                             && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
16016                             || $type_sequence_to_go[$ibeg_2]
16017                             && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
16018                       )
16019                     {
16020                         $n_best = $n;
16021                         last;
16022                     }
16023
16024                     $reverse = 0;
16025
16026                     #----------------------------------------------------------
16027                     # Recombine Section 2:
16028                     # Examine token at $iend_1 (right end of first line of pair)
16029                     #----------------------------------------------------------
16030
16031                     # an isolated '}' may join with a ';' terminated segment
16032                     if ( $type_iend_1 eq '}' ) {
16033
16034                     # Check for cases where combining a semicolon terminated
16035                     # statement with a previous isolated closing paren will
16036                     # allow the combined line to be outdented.  This is
16037                     # generally a good move.  For example, we can join up
16038                     # the last two lines here:
16039                     #  (
16040                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16041                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16042                     #    )
16043                     #    = stat($file);
16044                     #
16045                     # to get:
16046                     #  (
16047                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16048                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16049                     #  ) = stat($file);
16050                     #
16051                     # which makes the parens line up.
16052                     #
16053                     # Another example, from Joe Matarazzo, probably looks best
16054                     # with the 'or' clause appended to the trailing paren:
16055                     #  $self->some_method(
16056                     #      PARAM1 => 'foo',
16057                     #      PARAM2 => 'bar'
16058                     #  ) or die "Some_method didn't work";
16059                     #
16060                     # But we do not want to do this for something like the -lp
16061                     # option where the paren is not outdentable because the
16062                     # trailing clause will be far to the right.
16063                     #
16064                     # The logic here is synchronized with the logic in sub
16065                     # sub final_indentation_adjustment, which actually does
16066                     # the outdenting.
16067                     #
16068                         $skip_Section_3 ||= $this_line_is_semicolon_terminated
16069
16070                           # only one token on last line
16071                           && $ibeg_1 == $iend_1
16072
16073                           # must be structural paren
16074                           && $tokens_to_go[$iend_1] eq ')'
16075
16076                           # style must allow outdenting,
16077                           && !$closing_token_indentation{')'}
16078
16079                           # only leading '&&', '||', and ':' if no others seen
16080                           # (but note: our count made below could be wrong
16081                           # due to intervening comments)
16082                           && ( $leading_amp_count == 0
16083                             || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
16084
16085                           # but leading colons probably line up with a
16086                           # previous colon or question (count could be wrong).
16087                           && $type_ibeg_2 ne ':'
16088
16089                           # only one step in depth allowed.  this line must not
16090                           # begin with a ')' itself.
16091                           && ( $nesting_depth_to_go[$iend_1] ==
16092                             $nesting_depth_to_go[$iend_2] + 1 );
16093
16094                         # YVES patch 2 of 2:
16095                         # Allow cuddled eval chains, like this:
16096                         #   eval {
16097                         #       #STUFF;
16098                         #       1; # return true
16099                         #   } or do {
16100                         #       #handle error
16101                         #   };
16102                         # This patch works together with a patch in
16103                         # setting adjusted indentation (where the closing eval
16104                         # brace is outdented if possible).
16105                         # The problem is that an 'eval' block has continuation
16106                         # indentation and it looks better to undo it in some
16107                         # cases.  If we do not use this patch we would get:
16108                         #   eval {
16109                         #       #STUFF;
16110                         #       1; # return true
16111                         #       }
16112                         #       or do {
16113                         #       #handle error
16114                         #     };
16115                         # The alternative, for uncuddled style, is to create
16116                         # a patch in final_indentation_adjustment which undoes
16117                         # the indentation of a leading line like 'or do {'.
16118                         # This doesn't work well with -icb through
16119                         if (
16120                                $block_type_to_go[$iend_1] eq 'eval'
16121                             && !ref( $leading_spaces_to_go[$iend_1] )
16122                             && !$rOpts_indent_closing_brace
16123                             && $tokens_to_go[$iend_2] eq '{'
16124                             && (
16125                                 ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
16126                                 || (   $type_ibeg_2 eq 'k'
16127                                     && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16128                                 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16129                             )
16130                           )
16131                         {
16132                             $skip_Section_3 ||= 1;
16133                         }
16134
16135                         next
16136                           unless (
16137                             $skip_Section_3
16138
16139                             # handle '.' and '?' specially below
16140                             || ( $type_ibeg_2 =~ /^[\.\?]$/ )
16141                           );
16142                     }
16143
16144                     elsif ( $type_iend_1 eq '{' ) {
16145
16146                         # YVES
16147                         # honor breaks at opening brace
16148                         # Added to prevent recombining something like this:
16149                         #  } || eval { package main;
16150                         next if $forced_breakpoint_to_go[$iend_1];
16151                     }
16152
16153                     # do not recombine lines with ending &&, ||,
16154                     elsif ( $is_amp_amp{$type_iend_1} ) {
16155                         next unless $want_break_before{$type_iend_1};
16156                     }
16157
16158                     # Identify and recombine a broken ?/: chain
16159                     elsif ( $type_iend_1 eq '?' ) {
16160
16161                         # Do not recombine different levels
16162                         next
16163                           if (
16164                             $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16165
16166                         # do not recombine unless next line ends in :
16167                         next unless $type_iend_2 eq ':';
16168                     }
16169
16170                     # for lines ending in a comma...
16171                     elsif ( $type_iend_1 eq ',' ) {
16172
16173                         # Do not recombine at comma which is following the
16174                         # input bias.
16175                         # TODO: might be best to make a special flag
16176                         next if ( $old_breakpoint_to_go[$iend_1] );
16177
16178                         # An isolated '},' may join with an identifier + ';'
16179                         # This is useful for the class of a 'bless' statement
16180                         # (bless.t)
16181                         if (   $type_ibeg_1 eq '}'
16182                             && $type_ibeg_2 eq 'i' )
16183                         {
16184                             next
16185                               unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16186                                 && ( $iend_2 == ( $ibeg_2 + 1 ) )
16187                                 && $this_line_is_semicolon_terminated );
16188
16189                             # override breakpoint
16190                             $forced_breakpoint_to_go[$iend_1] = 0;
16191                         }
16192
16193                         # but otherwise ..
16194                         else {
16195
16196                             # do not recombine after a comma unless this will
16197                             # leave just 1 more line
16198                             next unless ( $n + 1 >= $nmax );
16199
16200                             # do not recombine if there is a change in
16201                             # indentation depth
16202                             next
16203                               if ( $levels_to_go[$iend_1] !=
16204                                 $levels_to_go[$iend_2] );
16205
16206                             # do not recombine a "complex expression" after a
16207                             # comma.  "complex" means no parens.
16208                             my $saw_paren;
16209                             foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16210                                 if ( $tokens_to_go[$ii] eq '(' ) {
16211                                     $saw_paren = 1;
16212                                     last;
16213                                 }
16214                             }
16215                             next if $saw_paren;
16216                         }
16217                     }
16218
16219                     # opening paren..
16220                     elsif ( $type_iend_1 eq '(' ) {
16221
16222                         # No longer doing this
16223                     }
16224
16225                     elsif ( $type_iend_1 eq ')' ) {
16226
16227                         # No longer doing this
16228                     }
16229
16230                     # keep a terminal for-semicolon
16231                     elsif ( $type_iend_1 eq 'f' ) {
16232                         next;
16233                     }
16234
16235                     # if '=' at end of line ...
16236                     elsif ( $is_assignment{$type_iend_1} ) {
16237
16238                         # keep break after = if it was in input stream
16239                         # this helps prevent 'blinkers'
16240                         next
16241                           if (
16242                             $old_breakpoint_to_go[$iend_1]
16243
16244                             # don't strand an isolated '='
16245                             && $iend_1 != $ibeg_1
16246                           );
16247
16248                         my $is_short_quote =
16249                           (      $type_ibeg_2 eq 'Q'
16250                               && $ibeg_2 == $iend_2
16251                               && token_sequence_length( $ibeg_2, $ibeg_2 ) <
16252                               $rOpts_short_concatenation_item_length );
16253                         my $is_ternary = (
16254                             $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
16255                                 && $types_to_go[$ibeg_3] eq ':' )
16256                         );
16257
16258                         # always join an isolated '=', a short quote, or if this
16259                         # will put ?/: at start of adjacent lines
16260                         if (   $ibeg_1 != $iend_1
16261                             && !$is_short_quote
16262                             && !$is_ternary )
16263                         {
16264                             next
16265                               unless (
16266                                 (
16267
16268                                     # unless we can reduce this to two lines
16269                                     $nmax < $n + 2
16270
16271                                     # or three lines, the last with a leading
16272                                     # semicolon
16273                                     || (   $nmax == $n + 2
16274                                         && $types_to_go[$ibeg_nmax] eq ';' )
16275
16276                                     # or the next line ends with a here doc
16277                                     || $type_iend_2 eq 'h'
16278
16279                                     # or the next line ends in an open paren or
16280                                     # brace and the break hasn't been forced
16281                                     # [dima.t]
16282                                     || (  !$forced_breakpoint_to_go[$iend_1]
16283                                         && $type_iend_2 eq '{' )
16284                                 )
16285
16286                                 # do not recombine if the two lines might align
16287                                 # well this is a very approximate test for this
16288                                 && (
16289
16290                                     # RT#127633 - the leading tokens are not
16291                                     # operators
16292                                     ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
16293
16294                                     # or they are different
16295                                     || (   $ibeg_3 >= 0
16296                                         && $type_ibeg_2 ne
16297                                         $types_to_go[$ibeg_3] )
16298                                 )
16299                               );
16300
16301                             if (
16302
16303                                 # Recombine if we can make two lines
16304                                 $nmax >= $n + 2
16305
16306                                 # -lp users often prefer this:
16307                                 #  my $title = function($env, $env, $sysarea,
16308                                 #                       "bubba Borrower Entry");
16309                                 #  so we will recombine if -lp is used we have
16310                                 #  ending comma
16311                                 && !(
16312                                        $ibeg_3 > 0
16313                                     && ref( $leading_spaces_to_go[$ibeg_3] )
16314                                     && $type_iend_2 eq ','
16315                                 )
16316                               )
16317                             {
16318
16319                                 # otherwise, scan the rhs line up to last token
16320                                 # for complexity.  Note that we are not
16321                                 # counting the last token in case it is an
16322                                 # opening paren.
16323                                 my $tv    = 0;
16324                                 my $depth = $nesting_depth_to_go[$ibeg_2];
16325                                 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
16326                                     if ( $nesting_depth_to_go[$i] != $depth ) {
16327                                         $tv++;
16328                                         last if ( $tv > 1 );
16329                                     }
16330                                     $depth = $nesting_depth_to_go[$i];
16331                                 }
16332
16333                                 # ok to recombine if no level changes before
16334                                 # last token
16335                                 if ( $tv > 0 ) {
16336
16337                                     # otherwise, do not recombine if more than
16338                                     # two level changes.
16339                                     next if ( $tv > 1 );
16340
16341                                     # check total complexity of the two
16342                                     # adjacent lines that will occur if we do
16343                                     # this join
16344                                     my $istop =
16345                                       ( $n < $nmax )
16346                                       ? $ri_end->[ $n + 1 ]
16347                                       : $iend_2;
16348                                     foreach my $i ( $iend_2 .. $istop ) {
16349                                         if (
16350                                             $nesting_depth_to_go[$i] != $depth )
16351                                         {
16352                                             $tv++;
16353                                             last if ( $tv > 2 );
16354                                         }
16355                                         $depth = $nesting_depth_to_go[$i];
16356                                     }
16357
16358                                     # do not recombine if total is more than 2
16359                                     # level changes
16360                                     next if ( $tv > 2 );
16361                                 }
16362                             }
16363                         }
16364
16365                         unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16366                             $forced_breakpoint_to_go[$iend_1] = 0;
16367                         }
16368                     }
16369
16370                     # for keywords..
16371                     elsif ( $type_iend_1 eq 'k' ) {
16372
16373                         # make major control keywords stand out
16374                         # (recombine.t)
16375                         next
16376                           if (
16377
16378                             #/^(last|next|redo|return)$/
16379                             $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16380
16381                             # but only if followed by multiple lines
16382                             && $n < $nmax
16383                           );
16384
16385                         if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16386                             next
16387                               unless $want_break_before{ $tokens_to_go[$iend_1]
16388                               };
16389                         }
16390                     }
16391
16392                     #----------------------------------------------------------
16393                     # Recombine Section 3:
16394                     # Examine token at $ibeg_2 (left end of second line of pair)
16395                     #----------------------------------------------------------
16396
16397                     # join lines identified above as capable of
16398                     # causing an outdented line with leading closing paren
16399                     # Note that we are skipping the rest of this section
16400                     # and the rest of the loop to do the join
16401                     if ($skip_Section_3) {
16402                         $forced_breakpoint_to_go[$iend_1] = 0;
16403                         $n_best = $n;
16404                         last;
16405                     }
16406
16407                     # handle lines with leading &&, ||
16408                     elsif ( $is_amp_amp{$type_ibeg_2} ) {
16409
16410                         $leading_amp_count++;
16411
16412                         # ok to recombine if it follows a ? or :
16413                         # and is followed by an open paren..
16414                         my $ok =
16415                           (      $is_ternary{$type_ibeg_1}
16416                               && $tokens_to_go[$iend_2] eq '(' )
16417
16418                     # or is followed by a ? or : at same depth
16419                     #
16420                     # We are looking for something like this. We can
16421                     # recombine the && line with the line above to make the
16422                     # structure more clear:
16423                     #  return
16424                     #    exists $G->{Attr}->{V}
16425                     #    && exists $G->{Attr}->{V}->{$u}
16426                     #    ? %{ $G->{Attr}->{V}->{$u} }
16427                     #    : ();
16428                     #
16429                     # We should probably leave something like this alone:
16430                     #  return
16431                     #       exists $G->{Attr}->{E}
16432                     #    && exists $G->{Attr}->{E}->{$u}
16433                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
16434                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16435                     #    : ();
16436                     # so that we either have all of the &&'s (or ||'s)
16437                     # on one line, as in the first example, or break at
16438                     # each one as in the second example.  However, it
16439                     # sometimes makes things worse to check for this because
16440                     # it prevents multiple recombinations.  So this is not done.
16441                           || ( $ibeg_3 >= 0
16442                             && $is_ternary{ $types_to_go[$ibeg_3] }
16443                             && $nesting_depth_to_go[$ibeg_3] ==
16444                             $nesting_depth_to_go[$ibeg_2] );
16445
16446                         # Combine a trailing && term with an || term: fix for
16447                         # c060 This is rare but can happen.
16448                         $ok ||= 1
16449                           if ( $ibeg_3 < 0
16450                             && $type_ibeg_2 eq '&&'
16451                             && $type_ibeg_1 eq '||'
16452                             && $nesting_depth_to_go[$ibeg_2] ==
16453                             $nesting_depth_to_go[$ibeg_1] );
16454
16455                         next if !$ok && $want_break_before{$type_ibeg_2};
16456                         $forced_breakpoint_to_go[$iend_1] = 0;
16457
16458                         # tweak the bond strength to give this joint priority
16459                         # over ? and :
16460                         $bs_tweak = 0.25;
16461                     }
16462
16463                     # Identify and recombine a broken ?/: chain
16464                     elsif ( $type_ibeg_2 eq '?' ) {
16465
16466                         # Do not recombine different levels
16467                         my $lev = $levels_to_go[$ibeg_2];
16468                         next if ( $lev ne $levels_to_go[$ibeg_1] );
16469
16470                         # Do not recombine a '?' if either next line or
16471                         # previous line does not start with a ':'.  The reasons
16472                         # are that (1) no alignment of the ? will be possible
16473                         # and (2) the expression is somewhat complex, so the
16474                         # '?' is harder to see in the interior of the line.
16475                         my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
16476                         my $precedes_colon =
16477                           $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16478                         next unless ( $follows_colon || $precedes_colon );
16479
16480                         # we will always combining a ? line following a : line
16481                         if ( !$follows_colon ) {
16482
16483                             # ...otherwise recombine only if it looks like a
16484                             # chain.  we will just look at a few nearby lines
16485                             # to see if this looks like a chain.
16486                             my $local_count = 0;
16487                             foreach
16488                               my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 )
16489                             {
16490                                 $local_count++
16491                                   if $ii >= 0
16492                                   && $types_to_go[$ii] eq ':'
16493                                   && $levels_to_go[$ii] == $lev;
16494                             }
16495                             next unless ( $local_count > 1 );
16496                         }
16497                         $forced_breakpoint_to_go[$iend_1] = 0;
16498                     }
16499
16500                     # do not recombine lines with leading '.'
16501                     elsif ( $type_ibeg_2 eq '.' ) {
16502                         my $i_next_nonblank =
16503                           min( $inext_to_go[$ibeg_2], $iend_2 );
16504                         next
16505                           unless (
16506
16507                    # ... unless there is just one and we can reduce
16508                    # this to two lines if we do.  For example, this
16509                    #
16510                    #
16511                    #  $bodyA .=
16512                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16513                    #
16514                    #  looks better than this:
16515                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16516                    #    . '$args .= $pat;'
16517
16518                             (
16519                                    $n == 2
16520                                 && $n == $nmax
16521                                 && $type_ibeg_1 ne $type_ibeg_2
16522                             )
16523
16524                             # ... or this would strand a short quote , like this
16525                             #                . "some long quote"
16526                             #                . "\n";
16527
16528                             || (   $types_to_go[$i_next_nonblank] eq 'Q'
16529                                 && $i_next_nonblank >= $iend_2 - 1
16530                                 && $token_lengths_to_go[$i_next_nonblank] <
16531                                 $rOpts_short_concatenation_item_length )
16532                           );
16533                     }
16534
16535                     # handle leading keyword..
16536                     elsif ( $type_ibeg_2 eq 'k' ) {
16537
16538                         # handle leading "or"
16539                         if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16540                             next
16541                               unless (
16542                                 $this_line_is_semicolon_terminated
16543                                 && (
16544                                     $type_ibeg_1 eq '}'
16545                                     || (
16546
16547                                         # following 'if' or 'unless' or 'or'
16548                                         $type_ibeg_1 eq 'k'
16549                                         && $is_if_unless{ $tokens_to_go[$ibeg_1]
16550                                         }
16551
16552                                         # important: only combine a very simple
16553                                         # or statement because the step below
16554                                         # may have combined a trailing 'and'
16555                                         # with this or, and we do not want to
16556                                         # then combine everything together
16557                                         && ( $iend_2 - $ibeg_2 <= 7 )
16558                                     )
16559                                 )
16560                               );
16561
16562                             #X: RT #81854
16563                             $forced_breakpoint_to_go[$iend_1] = 0
16564                               unless ( $old_breakpoint_to_go[$iend_1] );
16565                         }
16566
16567                         # handle leading 'and' and 'xor'
16568                         elsif ($tokens_to_go[$ibeg_2] eq 'and'
16569                             || $tokens_to_go[$ibeg_2] eq 'xor' )
16570                         {
16571
16572                             # Decide if we will combine a single terminal 'and'
16573                             # after an 'if' or 'unless'.
16574
16575                             #     This looks best with the 'and' on the same
16576                             #     line as the 'if':
16577                             #
16578                             #         $a = 1
16579                             #           if $seconds and $nu < 2;
16580                             #
16581                             #     But this looks better as shown:
16582                             #
16583                             #         $a = 1
16584                             #           if !$this->{Parents}{$_}
16585                             #           or $this->{Parents}{$_} eq $_;
16586                             #
16587                             next
16588                               unless (
16589                                 $this_line_is_semicolon_terminated
16590                                 && (
16591
16592                                     # following 'if' or 'unless' or 'or'
16593                                     $type_ibeg_1 eq 'k'
16594                                     && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
16595                                         || $tokens_to_go[$ibeg_1] eq 'or' )
16596                                 )
16597                               );
16598                         }
16599
16600                         # handle leading "if" and "unless"
16601                         elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16602
16603                             # Combine something like:
16604                             #    next
16605                             #      if ( $lang !~ /${l}$/i );
16606                             # into:
16607                             #    next if ( $lang !~ /${l}$/i );
16608                             next
16609                               unless (
16610                                 $this_line_is_semicolon_terminated
16611
16612                                 #  previous line begins with 'and' or 'or'
16613                                 && $type_ibeg_1 eq 'k'
16614                                 && $is_and_or{ $tokens_to_go[$ibeg_1] }
16615
16616                               );
16617                         }
16618
16619                         # handle all other leading keywords
16620                         else {
16621
16622                             # keywords look best at start of lines,
16623                             # but combine things like "1 while"
16624                             unless ( $is_assignment{$type_iend_1} ) {
16625                                 next
16626                                   if ( ( $type_iend_1 ne 'k' )
16627                                     && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16628                             }
16629                         }
16630                     }
16631
16632                     # similar treatment of && and || as above for 'and' and
16633                     # 'or': NOTE: This block of code is currently bypassed
16634                     # because of a previous block but is retained for possible
16635                     # future use.
16636                     elsif ( $is_amp_amp{$type_ibeg_2} ) {
16637
16638                         # maybe looking at something like:
16639                         # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16640
16641                         next
16642                           unless (
16643                             $this_line_is_semicolon_terminated
16644
16645                             # previous line begins with an 'if' or 'unless'
16646                             # keyword
16647                             && $type_ibeg_1 eq 'k'
16648                             && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16649
16650                           );
16651                     }
16652
16653                     # handle line with leading = or similar
16654                     elsif ( $is_assignment{$type_ibeg_2} ) {
16655                         next unless ( $n == 1 || $n == $nmax );
16656                         next if ( $old_breakpoint_to_go[$iend_1] );
16657                         next
16658                           unless (
16659
16660                             # unless we can reduce this to two lines
16661                             $nmax == 2
16662
16663                             # or three lines, the last with a leading semicolon
16664                             || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16665
16666                             # or the next line ends with a here doc
16667                             || $type_iend_2 eq 'h'
16668
16669                             # or this is a short line ending in ;
16670                             || (   $n == $nmax
16671                                 && $this_line_is_semicolon_terminated )
16672                           );
16673                         $forced_breakpoint_to_go[$iend_1] = 0;
16674                     }
16675
16676                     #----------------------------------------------------------
16677                     # Recombine Section 4:
16678                     # Combine the lines if we arrive here and it is possible
16679                     #----------------------------------------------------------
16680
16681                     # honor hard breakpoints
16682                     next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16683
16684                     my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16685
16686                  # Require a few extra spaces before recombining lines if we are
16687                  # at an old breakpoint unless this is a simple list or terminal
16688                  # line.  The goal is to avoid oscillating between two
16689                  # quasi-stable end states.  For example this snippet caused
16690                  # problems:
16691 ##    my $this =
16692 ##    bless {
16693 ##        TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
16694 ##      },
16695 ##      $type;
16696                     next
16697                       if ( $old_breakpoint_to_go[$iend_1]
16698                         && !$this_line_is_semicolon_terminated
16699                         && $n < $nmax
16700                         && $excess + 4 > 0
16701                         && $type_iend_2 ne ',' );
16702
16703                     # do not recombine if we would skip in indentation levels
16704                     if ( $n < $nmax ) {
16705                         my $if_next = $ri_beg->[ $n + 1 ];
16706                         next
16707                           if (
16708                                $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16709                             && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16710
16711                             # but an isolated 'if (' is undesirable
16712                             && !(
16713                                    $n == 1
16714                                 && $iend_1 - $ibeg_1 <= 2
16715                                 && $type_ibeg_1 eq 'k'
16716                                 && $tokens_to_go[$ibeg_1] eq 'if'
16717                                 && $tokens_to_go[$iend_1] ne '('
16718                             )
16719                           );
16720                     }
16721
16722                     # honor no-break's
16723                     ## next if ( $bs >= NO_BREAK - 1 );  # removed for b1257
16724
16725                     # remember the pair with the greatest bond strength
16726                     if ( !$n_best ) {
16727                         $n_best  = $n;
16728                         $bs_best = $bs;
16729                     }
16730                     else {
16731
16732                         if ( $bs > $bs_best ) {
16733                             $n_best  = $n;
16734                             $bs_best = $bs;
16735                         }
16736                     }
16737                 }
16738
16739                 # recombine the pair with the greatest bond strength
16740                 if ($n_best) {
16741                     splice @{$ri_beg}, $n_best,     1;
16742                     splice @{$ri_end}, $n_best - 1, 1;
16743                     splice @joint,     $n_best,     1;
16744
16745                     # keep going if we are still making progress
16746                     $more_to_do++;
16747                 }
16748             }    # end iteration loop
16749
16750         }    # end loop over sections
16751
16752       RETURN:
16753
16754         if (DEBUG_RECOMBINE) {
16755             my $nmax = @{$ri_end} - 1;
16756             print STDERR
16757 "exiting recombine with $nmax lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
16758         }
16759         return;
16760     }
16761 } ## end closure recombine_breakpoints
16762
16763 sub insert_final_ternary_breaks {
16764
16765     my ( $self, $ri_left, $ri_right ) = @_;
16766
16767     # Called once per batch to look for and do any final line breaks for
16768     # long ternary chains
16769
16770     my $nmax = @{$ri_right} - 1;
16771
16772     # scan the left and right end tokens of all lines
16773     my $count         = 0;
16774     my $i_first_colon = -1;
16775     for my $n ( 0 .. $nmax ) {
16776         my $il    = $ri_left->[$n];
16777         my $ir    = $ri_right->[$n];
16778         my $typel = $types_to_go[$il];
16779         my $typer = $types_to_go[$ir];
16780         return if ( $typel eq '?' );
16781         return if ( $typer eq '?' );
16782         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
16783         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16784     }
16785
16786     # For long ternary chains,
16787     # if the first : we see has its ? is in the interior
16788     # of a preceding line, then see if there are any good
16789     # breakpoints before the ?.
16790     if ( $i_first_colon > 0 ) {
16791         my $i_question = $mate_index_to_go[$i_first_colon];
16792         if ( $i_question > 0 ) {
16793             my @insert_list;
16794             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16795                 my $token = $tokens_to_go[$ii];
16796                 my $type  = $types_to_go[$ii];
16797
16798                 # For now, a good break is either a comma or,
16799                 # in a long chain, a 'return'.
16800                 # Patch for RT #126633: added the $nmax>1 check to avoid
16801                 # breaking after a return for a simple ternary.  For longer
16802                 # chains the break after return allows vertical alignment, so
16803                 # it is still done.  So perltidy -wba='?' will not break
16804                 # immediately after the return in the following statement:
16805                 # sub x {
16806                 #    return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
16807                 #      'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
16808                 # }
16809                 if (
16810                     (
16811                            $type eq ','
16812                         || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
16813                     )
16814                     && $self->in_same_container_i( $ii, $i_question )
16815                   )
16816                 {
16817                     push @insert_list, $ii;
16818                     last;
16819                 }
16820             }
16821
16822             # insert any new break points
16823             if (@insert_list) {
16824                 $self->insert_additional_breaks( \@insert_list, $ri_left,
16825                     $ri_right );
16826             }
16827         }
16828     }
16829     return;
16830 }
16831
16832 sub insert_breaks_before_list_opening_containers {
16833
16834     my ( $self, $ri_left, $ri_right ) = @_;
16835
16836     # This routine is called once per batch to implement the parameters
16837     # --break-before-hash-brace, etc.
16838
16839     # Nothing to do if none of these parameters has been set
16840     return unless %break_before_container_types;
16841
16842     my $nmax = @{$ri_right} - 1;
16843     return unless ( $nmax >= 0 );
16844
16845     my $rLL = $self->[_rLL_];
16846
16847     my $rbreak_before_container_by_seqno =
16848       $self->[_rbreak_before_container_by_seqno_];
16849     my $rK_weld_left = $self->[_rK_weld_left_];
16850
16851     # scan the ends of all lines
16852     my @insert_list;
16853     for my $n ( 0 .. $nmax ) {
16854         my $il = $ri_left->[$n];
16855         my $ir = $ri_right->[$n];
16856         next unless ( $ir > $il );
16857         my $Kl       = $K_to_go[$il];
16858         my $Kr       = $K_to_go[$ir];
16859         my $Kend     = $Kr;
16860         my $type_end = $rLL->[$Kr]->[_TYPE_];
16861
16862         # Backup before any side comment
16863         if ( $type_end eq '#' ) {
16864             $Kend = $self->K_previous_nonblank($Kr);
16865             next unless defined($Kend);
16866             $type_end = $rLL->[$Kend]->[_TYPE_];
16867         }
16868
16869         # Backup to the start of any weld; fix for b1173.
16870         if ($total_weld_count) {
16871             my $Kend_test = $rK_weld_left->{$Kend};
16872             if ( defined($Kend_test) && $Kend_test > $Kl ) {
16873                 $Kend      = $Kend_test;
16874                 $Kend_test = $rK_weld_left->{$Kend};
16875             }
16876
16877             # Do not break if we did not back up to the start of a weld
16878             # (shouldn't happen)
16879             next if ( defined($Kend_test) );
16880         }
16881
16882         my $token = $rLL->[$Kend]->[_TOKEN_];
16883         next unless ( $is_opening_token{$token} );
16884         next unless ( $Kl < $Kend - 1 );
16885
16886         my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
16887         next unless ( defined($seqno) );
16888
16889         # Use the flag which was previously set
16890         next unless ( $rbreak_before_container_by_seqno->{$seqno} );
16891
16892         # Install a break before this opening token.
16893         my $Kbreak = $self->K_previous_nonblank($Kend);
16894         my $ibreak = $Kbreak - $Kl + $il;
16895         next if ( $ibreak < $il );
16896         next if ( $nobreak_to_go[$ibreak] );
16897         push @insert_list, $ibreak;
16898     }
16899
16900     # insert any new break points
16901     if (@insert_list) {
16902         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16903     }
16904     return;
16905 }
16906
16907 sub note_added_semicolon {
16908     my ( $self, $line_number ) = @_;
16909     $self->[_last_added_semicolon_at_] = $line_number;
16910     if ( $self->[_added_semicolon_count_] == 0 ) {
16911         $self->[_first_added_semicolon_at_] = $line_number;
16912     }
16913     $self->[_added_semicolon_count_]++;
16914     write_logfile_entry("Added ';' here\n");
16915     return;
16916 }
16917
16918 sub note_deleted_semicolon {
16919     my ( $self, $line_number ) = @_;
16920     $self->[_last_deleted_semicolon_at_] = $line_number;
16921     if ( $self->[_deleted_semicolon_count_] == 0 ) {
16922         $self->[_first_deleted_semicolon_at_] = $line_number;
16923     }
16924     $self->[_deleted_semicolon_count_]++;
16925     write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
16926     return;
16927 }
16928
16929 sub note_embedded_tab {
16930     my ( $self, $line_number ) = @_;
16931     $self->[_embedded_tab_count_]++;
16932     $self->[_last_embedded_tab_at_] = $line_number;
16933     if ( !$self->[_first_embedded_tab_at_] ) {
16934         $self->[_first_embedded_tab_at_] = $line_number;
16935     }
16936
16937     if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
16938         write_logfile_entry("Embedded tabs in quote or pattern\n");
16939     }
16940     return;
16941 }
16942
16943 use constant DEBUG_CORRECT_LP => 0;
16944
16945 sub correct_lp_indentation {
16946
16947     # When the -lp option is used, we need to make a last pass through
16948     # each line to correct the indentation positions in case they differ
16949     # from the predictions.  This is necessary because perltidy uses a
16950     # predictor/corrector method for aligning with opening parens.  The
16951     # predictor is usually good, but sometimes stumbles.  The corrector
16952     # tries to patch things up once the actual opening paren locations
16953     # are known.
16954     my ( $self, $ri_first, $ri_last ) = @_;
16955     my $K_opening_container = $self->[_K_opening_container_];
16956     my $K_closing_container = $self->[_K_closing_container_];
16957     my $do_not_pad          = 0;
16958
16959     #  Note on flag '$do_not_pad':
16960     #  We want to avoid a situation like this, where the aligner inserts
16961     #  whitespace before the '=' to align it with a previous '=', because
16962     #  otherwise the parens might become mis-aligned in a situation like
16963     #  this, where the '=' has become aligned with the previous line,
16964     #  pushing the opening '(' forward beyond where we want it.
16965     #
16966     #  $mkFloor::currentRoom = '';
16967     #  $mkFloor::c_entry     = $c->Entry(
16968     #                                 -width        => '10',
16969     #                                 -relief       => 'sunken',
16970     #                                 ...
16971     #                                 );
16972     #
16973     #  We leave it to the aligner to decide how to do this.
16974
16975     # first remove continuation indentation if appropriate
16976     my $rLL      = $self->[_rLL_];
16977     my $max_line = @{$ri_first} - 1;
16978
16979     #---------------------------------------------------------------------------
16980     # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
16981     #---------------------------------------------------------------------------
16982
16983     # The point is that sub 'starting_one_line_block' made one-line blocks based
16984     # on default indentation, not -lp indentation. So some of the one-line
16985     # blocks may be too long when given -lp indentation.  We will fix that now
16986     # if possible, using the list of these closing block indexes.
16987     my $ri_starting_one_line_block =
16988       $self->[_this_batch_]->[_ri_starting_one_line_block_];
16989     if ( @{$ri_starting_one_line_block} ) {
16990         my @ilist = @{$ri_starting_one_line_block};
16991         my $inext = shift(@ilist);
16992
16993         # loop over lines, checking length of each with a one-line block
16994         my ( $ibeg, $iend );
16995         foreach my $line ( 0 .. $max_line ) {
16996             $iend = $ri_last->[$line];
16997             next if ( $inext > $iend );
16998             $ibeg = $ri_first->[$line];
16999
17000             # This is just for lines with indentation objects (c098)
17001             my $excess =
17002               ref( $leading_spaces_to_go[$ibeg] )
17003               ? $self->excess_line_length( $ibeg, $iend )
17004               : 0;
17005
17006             if ( $excess > 0 ) {
17007                 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
17008
17009                 if ( $available_spaces > 0 ) {
17010                     my $delete_want = min( $available_spaces, $excess );
17011                     my $deleted_spaces =
17012                       $self->reduce_lp_indentation( $ibeg, $delete_want );
17013                     $available_spaces =
17014                       $self->get_available_spaces_to_go($ibeg);
17015                 }
17016             }
17017
17018             # skip forward to next one-line block to check
17019             while (@ilist) {
17020                 $inext = shift @ilist;
17021                 next if ( $inext <= $iend );
17022                 last if ( $inext > $iend );
17023             }
17024             last if ( $inext <= $iend );
17025         }
17026     }
17027
17028     #-------------------------------------------------------------------
17029     # PASS 2: look for and fix other problems in each line of this batch
17030     #-------------------------------------------------------------------
17031
17032     # look at each output line ...
17033     my ( $ibeg, $iend );
17034     foreach my $line ( 0 .. $max_line ) {
17035         $ibeg = $ri_first->[$line];
17036         $iend = $ri_last->[$line];
17037
17038         # looking at each token in this output line ...
17039         foreach my $i ( $ibeg .. $iend ) {
17040
17041             # How many space characters to place before this token
17042             # for special alignment.  Actual padding is done in the
17043             # continue block.
17044
17045             # looking for next unvisited indentation item ...
17046             my $indentation = $leading_spaces_to_go[$i];
17047
17048             # This is just for indentation objects (c098)
17049             next unless ( ref($indentation) );
17050
17051             # Visit each indentation object just once
17052             next if ( $indentation->get_marked() );
17053
17054             # Mark first visit
17055             $indentation->set_marked(1);
17056
17057             # Skip indentation objects which do not align with container tokens
17058             my $align_seqno = $indentation->get_align_seqno();
17059             next unless ($align_seqno);
17060
17061             # Skip a container which is entirely on this line
17062             my $Ko = $K_opening_container->{$align_seqno};
17063             my $Kc = $K_closing_container->{$align_seqno};
17064             if ( defined($Ko) && defined($Kc) ) {
17065                 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
17066             }
17067
17068             if ( $line == 1 && $i == $ibeg ) {
17069                 $do_not_pad = 1;
17070             }
17071
17072             #--------------------------------------------
17073             # Now see what the error is and try to fix it
17074             #--------------------------------------------
17075             my $closing_index = $indentation->get_closed();
17076             my $predicted_pos = $indentation->get_spaces();
17077
17078             # Find actual position:
17079             my $actual_pos;
17080
17081             if ( $i == $ibeg ) {
17082
17083                 # Case 1: token is first character of of batch - table lookup
17084                 if ( $line == 0 ) {
17085
17086                     $actual_pos = $predicted_pos;
17087
17088                     my ( $indent, $offset, $is_leading, $exists ) =
17089                       get_saved_opening_indentation($align_seqno);
17090                     if ( defined($indent) ) {
17091
17092                         # FIXME: should use '1' here if no space after opening
17093                         # and '2' if want space; hardwired at 1 like -gnu-style
17094                         $actual_pos = get_spaces($indent) + $offset + 1;
17095                     }
17096                 }
17097
17098                 # Case 2: token starts a new line - use length of previous line
17099                 else {
17100
17101                     my $ibegm = $ri_first->[ $line - 1 ];
17102                     my $iendm = $ri_last->[ $line - 1 ];
17103                     $actual_pos = total_line_length( $ibegm, $iendm );
17104
17105                     # follow -pt style
17106                     ++$actual_pos
17107                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
17108
17109                 }
17110             }
17111
17112             # Case 3: $i>$ibeg: token is mid-line - use length to previous token
17113             else {
17114
17115                 $actual_pos = total_line_length( $ibeg, $i - 1 );
17116
17117                 # for mid-line token, we must check to see if all
17118                 # additional lines have continuation indentation,
17119                 # and remove it if so.  Otherwise, we do not get
17120                 # good alignment.
17121                 if ( $closing_index > $iend ) {
17122                     my $ibeg_next = $ri_first->[ $line + 1 ];
17123                     if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
17124                         $self->undo_lp_ci( $line, $i, $closing_index,
17125                             $ri_first, $ri_last );
17126                     }
17127                 }
17128             }
17129
17130             # By how many spaces (plus or minus) would we need to increase the
17131             # indentation to get alignment with the opening token?
17132             my $move_right = $actual_pos - $predicted_pos;
17133
17134             if (DEBUG_CORRECT_LP) {
17135                 my $tok   = substr( $tokens_to_go[$i], 0, 8 );
17136                 my $avail = $self->get_available_spaces_to_go($ibeg);
17137                 print
17138 "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
17139             }
17140
17141             # nothing more to do if no error to correct (gnu2.t)
17142             if ( $move_right == 0 ) {
17143                 $indentation->set_recoverable_spaces($move_right);
17144                 next;
17145             }
17146
17147             # Get any collapsed length defined for -xlp
17148             my $collapsed_length =
17149               $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
17150             $collapsed_length = 0 unless ( defined($collapsed_length) );
17151
17152             if (DEBUG_CORRECT_LP) {
17153                 print
17154 "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
17155             }
17156
17157             # if we have not seen closure for this indentation in this batch,
17158             # and do not have a collapsed length estimate, we can only pass on
17159             # a request to the vertical aligner
17160             if ( $closing_index < 0 && !$collapsed_length ) {
17161                 $indentation->set_recoverable_spaces($move_right);
17162                 next;
17163             }
17164
17165             # If necessary, look ahead to see if there is really any leading
17166             # whitespace dependent on this whitespace, and also find the
17167             # longest line using this whitespace.  Since it is always safe to
17168             # move left if there are no dependents, we only need to do this if
17169             # we may have dependent nodes or need to move right.
17170
17171             my $have_child = $indentation->get_have_child();
17172             my %saw_indentation;
17173             my $line_count = 1;
17174             $saw_indentation{$indentation} = $indentation;
17175
17176             # How far can we move right before we hit the limit?
17177             # let $right_margen = the number of spaces that we can increase
17178             # the current indentation before hitting the maximum line length.
17179             my $right_margin = 0;
17180
17181             if ( $have_child || $move_right > 0 ) {
17182                 $have_child = 0;
17183
17184                 # include estimated collapsed length for incomplete containers
17185                 my $max_length = 0;
17186                 if ( $Kc > $K_to_go[$max_index_to_go] ) {
17187                     $max_length = $collapsed_length + $predicted_pos;
17188                 }
17189
17190                 if ( $i == $ibeg ) {
17191                     my $length = total_line_length( $ibeg, $iend );
17192                     if ( $length > $max_length ) { $max_length = $length }
17193                 }
17194
17195                 # look ahead at the rest of the lines of this batch..
17196                 foreach my $line_t ( $line + 1 .. $max_line ) {
17197                     my $ibeg_t = $ri_first->[$line_t];
17198                     my $iend_t = $ri_last->[$line_t];
17199                     last if ( $closing_index <= $ibeg_t );
17200
17201                     # remember all different indentation objects
17202                     my $indentation_t = $leading_spaces_to_go[$ibeg_t];
17203                     $saw_indentation{$indentation_t} = $indentation_t;
17204                     $line_count++;
17205
17206                     # remember longest line in the group
17207                     my $length_t = total_line_length( $ibeg_t, $iend_t );
17208                     if ( $length_t > $max_length ) {
17209                         $max_length = $length_t;
17210                     }
17211                 }
17212
17213                 $right_margin =
17214                   $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
17215                   $max_length;
17216                 if ( $right_margin < 0 ) { $right_margin = 0 }
17217             }
17218
17219             my $first_line_comma_count =
17220               grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
17221             my $comma_count = $indentation->get_comma_count();
17222             my $arrow_count = $indentation->get_arrow_count();
17223
17224             # This is a simple approximate test for vertical alignment:
17225             # if we broke just after an opening paren, brace, bracket,
17226             # and there are 2 or more commas in the first line,
17227             # and there are no '=>'s,
17228             # then we are probably vertically aligned.  We could set
17229             # an exact flag in sub break_lists, but this is good
17230             # enough.
17231             my $indentation_count = keys %saw_indentation;
17232             my $is_vertically_aligned =
17233               (      $i == $ibeg
17234                   && $first_line_comma_count > 1
17235                   && $indentation_count == 1
17236                   && ( $arrow_count == 0 || $arrow_count == $line_count ) );
17237
17238             # Make the move if possible ..
17239             if (
17240
17241                 # we can always move left
17242                 $move_right < 0
17243
17244                 # -xlp
17245
17246                 # incomplete container
17247                 || (   $rOpts_extended_line_up_parentheses
17248                     && $Kc > $K_to_go[$max_index_to_go] )
17249                 || $closing_index < 0
17250
17251                 # but we should only move right if we are sure it will
17252                 # not spoil vertical alignment
17253                 || ( $comma_count == 0 )
17254                 || ( $comma_count > 0 && !$is_vertically_aligned )
17255               )
17256             {
17257                 my $move =
17258                   ( $move_right <= $right_margin )
17259                   ? $move_right
17260                   : $right_margin;
17261
17262                 if (DEBUG_CORRECT_LP) {
17263                     print
17264                       "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
17265                 }
17266
17267                 foreach ( keys %saw_indentation ) {
17268                     $saw_indentation{$_}
17269                       ->permanently_decrease_available_spaces( -$move );
17270                 }
17271             }
17272
17273             # Otherwise, record what we want and the vertical aligner
17274             # will try to recover it.
17275             else {
17276                 $indentation->set_recoverable_spaces($move_right);
17277             }
17278         } ## end loop over tokens in a line
17279     } ## end loop over lines
17280     return $do_not_pad;
17281 }
17282
17283 sub undo_lp_ci {
17284
17285     # If there is a single, long parameter within parens, like this:
17286     #
17287     #  $self->command( "/msg "
17288     #        . $infoline->chan
17289     #        . " You said $1, but did you know that it's square was "
17290     #        . $1 * $1 . " ?" );
17291     #
17292     # we can remove the continuation indentation of the 2nd and higher lines
17293     # to achieve this effect, which is more pleasing:
17294     #
17295     #  $self->command("/msg "
17296     #                 . $infoline->chan
17297     #                 . " You said $1, but did you know that it's square was "
17298     #                 . $1 * $1 . " ?");
17299
17300     my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
17301       @_;
17302     my $max_line = @{$ri_first} - 1;
17303
17304     # must be multiple lines
17305     return unless $max_line > $line_open;
17306
17307     my $lev_start     = $levels_to_go[$i_start];
17308     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
17309
17310     # see if all additional lines in this container have continuation
17311     # indentation
17312     my $n;
17313     my $line_1 = 1 + $line_open;
17314     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
17315         my $ibeg = $ri_first->[$n];
17316         my $iend = $ri_last->[$n];
17317         if ( $ibeg eq $closing_index ) { $n--; last }
17318         return if ( $lev_start != $levels_to_go[$ibeg] );
17319         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
17320         last   if ( $closing_index <= $iend );
17321     }
17322
17323     # we can reduce the indentation of all continuation lines
17324     my $continuation_line_count = $n - $line_open;
17325     @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17326       (0) x ($continuation_line_count);
17327     @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17328       @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
17329     return;
17330 }
17331
17332 ###############################################
17333 # CODE SECTION 10: Code to break long statments
17334 ###############################################
17335
17336 sub break_long_lines {
17337
17338     #-----------------------------------------------------------
17339     # Break a batch of tokens into lines which do not exceed the
17340     # maximum line length.
17341     #-----------------------------------------------------------
17342
17343     # Define an array of indexes for inserting newline characters to
17344     # keep the line lengths below the maximum desired length.  There is
17345     # an implied break after the last token, so it need not be included.
17346
17347     # Method:
17348     # This routine is part of series of routines which adjust line
17349     # lengths.  It is only called if a statement is longer than the
17350     # maximum line length, or if a preliminary scanning located
17351     # desirable break points.   Sub break_lists has already looked at
17352     # these tokens and set breakpoints (in array
17353     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17354     # after commas, after opening parens, and before closing parens).
17355     # This routine will honor these breakpoints and also add additional
17356     # breakpoints as necessary to keep the line length below the maximum
17357     # requested.  It bases its decision on where the 'bond strength' is
17358     # lowest.
17359
17360     # Output: returns references to the arrays:
17361     #  @i_first
17362     #  @i_last
17363     # which contain the indexes $i of the first and last tokens on each
17364     # line.
17365
17366     # In addition, the array:
17367     #   $forced_breakpoint_to_go[$i]
17368     # may be updated to be =1 for any index $i after which there must be
17369     # a break.  This signals later routines not to undo the breakpoint.
17370
17371     my ( $self, $saw_good_break, $rcolon_list ) = @_;
17372
17373     # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
17374     # order.
17375
17376     use constant DEBUG_BREAK_LINES => 0;
17377
17378     my @i_first        = ();    # the first index to output
17379     my @i_last         = ();    # the last index to output
17380     my @i_colon_breaks = ();    # needed to decide if we have to break at ?'s
17381     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17382
17383     $self->set_bond_strengths();
17384
17385     my $imin = 0;
17386     my $imax = $max_index_to_go;
17387     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17388     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17389     my $i_begin = $imin;        # index for starting next iteration
17390
17391     my $leading_spaces          = leading_spaces_to_go($imin);
17392     my $line_count              = 0;
17393     my $last_break_strength     = NO_BREAK;
17394     my $i_last_break            = -1;
17395     my $max_bias                = 0.001;
17396     my $tiny_bias               = 0.0001;
17397     my $leading_alignment_token = "";
17398     my $leading_alignment_type  = "";
17399
17400     # see if any ?/:'s are in order
17401     my $colons_in_order = 1;
17402     my $last_tok        = "";
17403     foreach ( @{$rcolon_list} ) {
17404         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17405         $last_tok = $_;
17406     }
17407
17408     # This is a sufficient but not necessary condition for colon chain
17409     my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
17410
17411     my $Msg = "";
17412
17413     #-------------------------------------------------------
17414     # BEGINNING of main loop to set continuation breakpoints
17415     # Keep iterating until we reach the end
17416     #-------------------------------------------------------
17417     while ( $i_begin <= $imax ) {
17418         my $lowest_strength        = NO_BREAK;
17419         my $starting_sum           = $summed_lengths_to_go[$i_begin];
17420         my $i_lowest               = -1;
17421         my $i_test                 = -1;
17422         my $lowest_next_token      = '';
17423         my $lowest_next_type       = 'b';
17424         my $i_lowest_next_nonblank = -1;
17425         my $maximum_line_length =
17426           $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
17427
17428         # Do not separate an isolated bare word from an opening paren.
17429         # Alternate Fix #2 for issue b1299.  This waits as long as possible
17430         # to make the decision.
17431         if ( $types_to_go[$i_begin] eq 'i'
17432             && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
17433         {
17434             my $i_next_nonblank = $inext_to_go[$i_begin];
17435             if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
17436                 $bond_strength_to_go[$i_begin] = NO_BREAK;
17437             }
17438         }
17439
17440         #-------------------------------------------------------
17441         # BEGINNING of inner loop to find the best next breakpoint
17442         #-------------------------------------------------------
17443         my $strength = NO_BREAK;
17444         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17445             my $type                     = $types_to_go[$i_test];
17446             my $token                    = $tokens_to_go[$i_test];
17447             my $next_type                = $types_to_go[ $i_test + 1 ];
17448             my $next_token               = $tokens_to_go[ $i_test + 1 ];
17449             my $i_next_nonblank          = $inext_to_go[$i_test];
17450             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
17451             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
17452             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17453
17454             # adjustments to the previous bond strength may have been made, and
17455             # we must keep the bond strength of a token and its following blank
17456             # the same;
17457             my $last_strength = $strength;
17458             $strength = $bond_strength_to_go[$i_test];
17459             if ( $type eq 'b' ) { $strength = $last_strength }
17460
17461             # reduce strength a bit to break ties at an old comma breakpoint ...
17462             if (
17463
17464                 $old_breakpoint_to_go[$i_test]
17465
17466                 # Patch: limited to just commas to avoid blinking states
17467                 && $type eq ','
17468
17469                 # which is a 'good' breakpoint, meaning ...
17470                 # we don't want to break before it
17471                 && !$want_break_before{$type}
17472
17473                 # and either we want to break before the next token
17474                 # or the next token is not short (i.e. not a '*', '/' etc.)
17475                 && $i_next_nonblank <= $imax
17476                 && (   $want_break_before{$next_nonblank_type}
17477                     || $token_lengths_to_go[$i_next_nonblank] > 2
17478                     || $next_nonblank_type eq ','
17479                     || $is_opening_type{$next_nonblank_type} )
17480               )
17481             {
17482                 $strength -= $tiny_bias;
17483                 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
17484             }
17485
17486             # otherwise increase strength a bit if this token would be at the
17487             # maximum line length.  This is necessary to avoid blinking
17488             # in the above example when the -iob flag is added.
17489             else {
17490                 my $len =
17491                   $leading_spaces +
17492                   $summed_lengths_to_go[ $i_test + 1 ] -
17493                   $starting_sum;
17494                 if ( $len >= $maximum_line_length ) {
17495                     $strength += $tiny_bias;
17496                     DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
17497                 }
17498             }
17499
17500             my $must_break = 0;
17501
17502             # Force an immediate break at certain operators
17503             # with lower level than the start of the line,
17504             # unless we've already seen a better break.
17505             #
17506             #------------------------------------
17507             # Note on an issue with a preceding ?
17508             #------------------------------------
17509             # We don't include a ? in the above list, but there may
17510             # be a break at a previous ? if the line is long.
17511             # Because of this we do not want to force a break if
17512             # there is a previous ? on this line.  For now the best way
17513             # to do this is to not break if we have seen a lower strength
17514             # point, which is probably a ?.
17515             #
17516             # Example of unwanted breaks we are avoiding at a '.' following a ?
17517             # from pod2html using perltidy -gnu:
17518             # )
17519             # ? "\n&lt;A NAME=\""
17520             # . $value
17521             # . "\"&gt;\n$text&lt;/A&gt;\n"
17522             # : "\n$type$pod2.html\#" . $value . "\"&gt;$text&lt;\/A&gt;\n";
17523             if (
17524                 ( $strength <= $lowest_strength )
17525                 && ( $nesting_depth_to_go[$i_begin] >
17526                     $nesting_depth_to_go[$i_next_nonblank] )
17527                 && (
17528                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17529                     || (   $next_nonblank_type eq 'k'
17530                         && $next_nonblank_token =~ /^(and|or)$/ )
17531                 )
17532               )
17533             {
17534                 $self->set_forced_breakpoint($i_next_nonblank);
17535                 DEBUG_BREAK_LINES
17536                   && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
17537             }
17538
17539             if (
17540
17541                 # Try to put a break where requested by break_lists
17542                 $forced_breakpoint_to_go[$i_test]
17543
17544                 # break between ) { in a continued line so that the '{' can
17545                 # be outdented
17546                 # See similar logic in break_lists which catches instances
17547                 # where a line is just something like ') {'.  We have to
17548                 # be careful because the corresponding block keyword might
17549                 # not be on the first line, such as 'for' here:
17550                 #
17551                 # eval {
17552                 #     for ("a") {
17553                 #         for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
17554                 #     }
17555                 # };
17556                 #
17557                 || (
17558                        $line_count
17559                     && ( $token eq ')' )
17560                     && ( $next_nonblank_type eq '{' )
17561                     && ($next_nonblank_block_type)
17562                     && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
17563
17564                     # RT #104427: Dont break before opening sub brace because
17565                     # sub block breaks handled at higher level, unless
17566                     # it looks like the preceding list is long and broken
17567                     && !(
17568
17569                         (
17570                                $next_nonblank_block_type =~ /$SUB_PATTERN/
17571                             || $next_nonblank_block_type =~ /$ASUB_PATTERN/
17572                         )
17573                         && ( $nesting_depth_to_go[$i_begin] ==
17574                             $nesting_depth_to_go[$i_next_nonblank] )
17575                     )
17576
17577                     && !$rOpts_opening_brace_always_on_right
17578                 )
17579
17580                 # There is an implied forced break at a terminal opening brace
17581                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17582               )
17583             {
17584
17585                 # Forced breakpoints must sometimes be overridden, for example
17586                 # because of a side comment causing a NO_BREAK.  It is easier
17587                 # to catch this here than when they are set.
17588                 if ( $strength < NO_BREAK - 1 ) {
17589                     $strength   = $lowest_strength - $tiny_bias;
17590                     $must_break = 1;
17591                     DEBUG_BREAK_LINES
17592                       && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
17593                 }
17594             }
17595
17596             # quit if a break here would put a good terminal token on
17597             # the next line and we already have a possible break
17598             if (
17599                    !$must_break
17600                 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
17601                 && (
17602                     (
17603                         $leading_spaces +
17604                         $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
17605                         $starting_sum
17606                     ) > $maximum_line_length
17607                 )
17608               )
17609             {
17610                 if ( $i_lowest >= 0 ) {
17611                     DEBUG_BREAK_LINES && do {
17612                         $Msg .= " :quit at good terminal='$next_nonblank_type'";
17613                     };
17614                     last;
17615                 }
17616             }
17617
17618             # Avoid a break which would strand a single punctuation
17619             # token.  For example, we do not want to strand a leading
17620             # '.' which is followed by a long quoted string.
17621             # But note that we do want to do this with -extrude (l=1)
17622             # so please test any changes to this code on -extrude.
17623             if (
17624                    !$must_break
17625                 && ( $i_test == $i_begin )
17626                 && ( $i_test < $imax )
17627                 && ( $token eq $type )
17628                 && (
17629                     (
17630                         $leading_spaces +
17631                         $summed_lengths_to_go[ $i_test + 1 ] -
17632                         $starting_sum
17633                     ) < $maximum_line_length
17634                 )
17635               )
17636             {
17637                 $i_test = min( $imax, $inext_to_go[$i_test] );
17638                 DEBUG_BREAK_LINES && do {
17639                     $Msg .= " :redo at i=$i_test";
17640                 };
17641                 redo;
17642             }
17643
17644             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17645             {
17646
17647                 # break at previous best break if it would have produced
17648                 # a leading alignment of certain common tokens, and it
17649                 # is different from the latest candidate break
17650                 if ($leading_alignment_type) {
17651                     DEBUG_BREAK_LINES && do {
17652                         $Msg .=
17653 " :last at leading_alignment='$leading_alignment_type'";
17654                     };
17655                     last;
17656                 }
17657
17658                 # Force at least one breakpoint if old code had good
17659                 # break It is only called if a breakpoint is required or
17660                 # desired.  This will probably need some adjustments
17661                 # over time.  A goal is to try to be sure that, if a new
17662                 # side comment is introduced into formatted text, then
17663                 # the same breakpoints will occur.  scbreak.t
17664                 if (
17665                     $i_test == $imax            # we are at the end
17666                     && !get_forced_breakpoint_count()
17667                     && $saw_good_break          # old line had good break
17668                     && $type =~ /^[#;\{]$/      # and this line ends in
17669                                                 # ';' or side comment
17670                     && $i_last_break < 0        # and we haven't made a break
17671                     && $i_lowest >= 0           # and we saw a possible break
17672                     && $i_lowest < $imax - 1    # (but not just before this ;)
17673                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17674                   )
17675                 {
17676
17677                     DEBUG_BREAK_LINES && do {
17678                         $Msg .= " :last at good old break\n";
17679                     };
17680                     last;
17681                 }
17682
17683                 # Do not skip past an important break point in a short final
17684                 # segment.  For example, without this check we would miss the
17685                 # break at the final / in the following code:
17686                 #
17687                 #  $depth_stop =
17688                 #    ( $tau * $mass_pellet * $q_0 *
17689                 #        ( 1. - exp( -$t_stop / $tau ) ) -
17690                 #        4. * $pi * $factor * $k_ice *
17691                 #        ( $t_melt - $t_ice ) *
17692                 #        $r_pellet *
17693                 #        $t_stop ) /
17694                 #    ( $rho_ice * $Qs * $pi * $r_pellet**2 );
17695                 #
17696                 if (
17697                        $line_count > 2
17698                     && $i_lowest >= 0    # and we saw a possible break
17699                     && $i_lowest < $i_test
17700                     && $i_test > $imax - 2
17701                     && $nesting_depth_to_go[$i_begin] >
17702                     $nesting_depth_to_go[$i_lowest]
17703                     && $lowest_strength < $last_break_strength - .5 * WEAK
17704                   )
17705                 {
17706                     # Make this break for math operators for now
17707                     my $ir = $inext_to_go[$i_lowest];
17708                     my $il = $iprev_to_go[$ir];
17709                     if (   $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
17710                         || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
17711                     {
17712                         DEBUG_BREAK_LINES && do {
17713                             $Msg .= " :last-noskip_short";
17714                         };
17715                         last;
17716                     }
17717                 }
17718
17719                 # Update the minimum bond strength location
17720                 $lowest_strength        = $strength;
17721                 $i_lowest               = $i_test;
17722                 $lowest_next_token      = $next_nonblank_token;
17723                 $lowest_next_type       = $next_nonblank_type;
17724                 $i_lowest_next_nonblank = $i_next_nonblank;
17725                 if ($must_break) {
17726                     DEBUG_BREAK_LINES && do {
17727                         $Msg .= " :last-must_break";
17728                     };
17729                     last;
17730                 }
17731
17732                 # set flags to remember if a break here will produce a
17733                 # leading alignment of certain common tokens
17734                 if (   $line_count > 0
17735                     && $i_test < $imax
17736                     && ( $lowest_strength - $last_break_strength <= $max_bias )
17737                   )
17738                 {
17739                     my $i_last_end = $iprev_to_go[$i_begin];
17740                     my $tok_beg    = $tokens_to_go[$i_begin];
17741                     my $type_beg   = $types_to_go[$i_begin];
17742                     if (
17743
17744                         # check for leading alignment of certain tokens
17745                         (
17746                                $tok_beg eq $next_nonblank_token
17747                             && $is_chain_operator{$tok_beg}
17748                             && (   $type_beg eq 'k'
17749                                 || $type_beg eq $tok_beg )
17750                             && $nesting_depth_to_go[$i_begin] >=
17751                             $nesting_depth_to_go[$i_next_nonblank]
17752                         )
17753
17754                         || (   $tokens_to_go[$i_last_end] eq $token
17755                             && $is_chain_operator{$token}
17756                             && ( $type eq 'k' || $type eq $token )
17757                             && $nesting_depth_to_go[$i_last_end] >=
17758                             $nesting_depth_to_go[$i_test] )
17759                       )
17760                     {
17761                         $leading_alignment_token = $next_nonblank_token;
17762                         $leading_alignment_type  = $next_nonblank_type;
17763                     }
17764                 }
17765             }
17766
17767             my $too_long = ( $i_test >= $imax );
17768             if ( !$too_long ) {
17769                 my $next_length =
17770                   $leading_spaces +
17771                   $summed_lengths_to_go[ $i_test + 2 ] -
17772                   $starting_sum;
17773                 $too_long = $next_length > $maximum_line_length;
17774
17775                 # To prevent blinkers we will avoid leaving a token exactly at
17776                 # the line length limit unless it is the last token or one of
17777                 # several "good" types.
17778                 #
17779                 # The following code was a blinker with -pbp before this
17780                 # modification:
17781 ##                    $last_nonblank_token eq '('
17782 ##                        && $is_indirect_object_taker{ $paren_type
17783 ##                            [$paren_depth] }
17784                 # The issue causing the problem is that if the
17785                 # term [$paren_depth] gets broken across a line then
17786                 # the whitespace routine doesn't see both opening and closing
17787                 # brackets and will format like '[ $paren_depth ]'.  This
17788                 # leads to an oscillation in length depending if we break
17789                 # before the closing bracket or not.
17790                 if (  !$too_long
17791                     && $i_test + 1 < $imax
17792                     && $next_nonblank_type ne ','
17793                     && !$is_closing_type{$next_nonblank_type} )
17794                 {
17795                     $too_long = $next_length >= $maximum_line_length;
17796                     DEBUG_BREAK_LINES && do {
17797                         $Msg .= " :too_long=$too_long" if ($too_long);
17798                     }
17799                 }
17800             }
17801
17802             DEBUG_BREAK_LINES && do {
17803                 my $ltok     = $token;
17804                 my $rtok     = $next_nonblank_token ? $next_nonblank_token : "";
17805                 my $i_testp2 = $i_test + 2;
17806                 if ( $i_testp2 > $max_index_to_go + 1 ) {
17807                     $i_testp2 = $max_index_to_go + 1;
17808                 }
17809                 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
17810                 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
17811                 print STDOUT
17812 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength    $ltok $rtok\n";
17813             };
17814
17815             # allow one extra terminal token after exceeding line length
17816             # if it would strand this token.
17817             if (   $rOpts_fuzzy_line_length
17818                 && $too_long
17819                 && $i_lowest == $i_test
17820                 && $token_lengths_to_go[$i_test] > 1
17821                 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
17822               )
17823             {
17824                 $too_long = 0;
17825                 DEBUG_BREAK_LINES && do {
17826                     $Msg .= " :do_not_strand next='$next_nonblank_type'";
17827                 };
17828             }
17829
17830             # we are done if...
17831             if (
17832
17833                 # ... no more space and we have a break
17834                 $too_long && $i_lowest >= 0
17835
17836                 # ... or no more tokens
17837                 || $i_test == $imax
17838               )
17839             {
17840                 DEBUG_BREAK_LINES && do {
17841                     $Msg .=
17842 " :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
17843                 };
17844                 last;
17845             }
17846         }
17847
17848         #-------------------------------------------------------
17849         # END of inner loop to find the best next breakpoint
17850         # Now decide exactly where to put the breakpoint
17851         #-------------------------------------------------------
17852
17853         # it's always ok to break at imax if no other break was found
17854         if ( $i_lowest < 0 ) { $i_lowest = $imax }
17855
17856         # semi-final index calculation
17857         my $i_next_nonblank     = $inext_to_go[$i_lowest];
17858         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17859         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17860
17861         #-------------------------------------------------------
17862         # ?/: rule 1 : if a break here will separate a '?' on this
17863         # line from its closing ':', then break at the '?' instead.
17864         #-------------------------------------------------------
17865         foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17866             next unless ( $tokens_to_go[$i] eq '?' );
17867
17868             # do not break if probable sequence of ?/: statements
17869             next if ($is_colon_chain);
17870
17871             # do not break if statement is broken by side comment
17872             next
17873               if ( $tokens_to_go[$max_index_to_go] eq '#'
17874                 && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
17875
17876             # no break needed if matching : is also on the line
17877             next
17878               if ( $mate_index_to_go[$i] >= 0
17879                 && $mate_index_to_go[$i] <= $i_next_nonblank );
17880
17881             $i_lowest = $i;
17882             if ( $want_break_before{'?'} ) { $i_lowest-- }
17883             last;
17884         }
17885
17886         #-------------------------------------------------------
17887         # END of inner loop to find the best next breakpoint:
17888         # Break the line after the token with index i=$i_lowest
17889         #-------------------------------------------------------
17890
17891         # final index calculation
17892         $i_next_nonblank     = $inext_to_go[$i_lowest];
17893         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17894         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17895
17896         DEBUG_BREAK_LINES
17897           && print STDOUT
17898 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
17899         $Msg = "";
17900
17901         #-------------------------------------------------------
17902         # ?/: rule 2 : if we break at a '?', then break at its ':'
17903         #
17904         # Note: this rule is also in sub break_lists to handle a break
17905         # at the start and end of a line (in case breaks are dictated
17906         # by side comments).
17907         #-------------------------------------------------------
17908         if ( $next_nonblank_type eq '?' ) {
17909             $self->set_closing_breakpoint($i_next_nonblank);
17910         }
17911         elsif ( $types_to_go[$i_lowest] eq '?' ) {
17912             $self->set_closing_breakpoint($i_lowest);
17913         }
17914
17915         #-------------------------------------------------------
17916         # ?/: rule 3 : if we break at a ':' then we save
17917         # its location for further work below.  We may need to go
17918         # back and break at its '?'.
17919         #-------------------------------------------------------
17920         if ( $next_nonblank_type eq ':' ) {
17921             push @i_colon_breaks, $i_next_nonblank;
17922         }
17923         elsif ( $types_to_go[$i_lowest] eq ':' ) {
17924             push @i_colon_breaks, $i_lowest;
17925         }
17926
17927         # here we should set breaks for all '?'/':' pairs which are
17928         # separated by this line
17929
17930         $line_count++;
17931
17932         # save this line segment, after trimming blanks at the ends
17933         push( @i_first,
17934             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17935         push( @i_last,
17936             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17937
17938         # set a forced breakpoint at a container opening, if necessary, to
17939         # signal a break at a closing container.  Excepting '(' for now.
17940         if (
17941             (
17942                    $tokens_to_go[$i_lowest] eq '{'
17943                 || $tokens_to_go[$i_lowest] eq '['
17944             )
17945             && !$forced_breakpoint_to_go[$i_lowest]
17946           )
17947         {
17948             $self->set_closing_breakpoint($i_lowest);
17949         }
17950
17951         # get ready to go again
17952         $i_begin                 = $i_lowest + 1;
17953         $last_break_strength     = $lowest_strength;
17954         $i_last_break            = $i_lowest;
17955         $leading_alignment_token = "";
17956         $leading_alignment_type  = "";
17957         $lowest_next_token       = '';
17958         $lowest_next_type        = 'b';
17959
17960         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17961             $i_begin++;
17962         }
17963
17964         # update indentation size
17965         if ( $i_begin <= $imax ) {
17966             $leading_spaces = leading_spaces_to_go($i_begin);
17967             DEBUG_BREAK_LINES
17968               && print STDOUT
17969               "updating leading spaces to be $leading_spaces at i=$i_begin\n";
17970         }
17971     }
17972
17973     #-------------------------------------------------------
17974     # END of main loop to set continuation breakpoints
17975     # Now go back and make any necessary corrections
17976     #-------------------------------------------------------
17977
17978     #-------------------------------------------------------
17979     # ?/: rule 4 -- if we broke at a ':', then break at
17980     # corresponding '?' unless this is a chain of ?: expressions
17981     #-------------------------------------------------------
17982     if (@i_colon_breaks) {
17983
17984         # using a simple method for deciding if we are in a ?/: chain --
17985         # this is a chain if it has multiple ?/: pairs all in order;
17986         # otherwise not.
17987         # Note that if line starts in a ':' we count that above as a break
17988         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17989
17990         unless ($is_chain) {
17991             my @insert_list = ();
17992             foreach (@i_colon_breaks) {
17993                 my $i_question = $mate_index_to_go[$_];
17994                 if ( $i_question >= 0 ) {
17995                     if ( $want_break_before{'?'} ) {
17996                         $i_question = $iprev_to_go[$i_question];
17997                     }
17998
17999                     if ( $i_question >= 0 ) {
18000                         push @insert_list, $i_question;
18001                     }
18002                 }
18003                 $self->insert_additional_breaks( \@insert_list, \@i_first,
18004                     \@i_last );
18005             }
18006         }
18007     }
18008     return ( \@i_first, \@i_last );
18009 }
18010
18011 ###########################################
18012 # CODE SECTION 11: Code to break long lists
18013 ###########################################
18014
18015 {    ## begin closure break_lists
18016
18017     # These routines and variables are involved in finding good
18018     # places to break long lists.
18019
18020     use constant DEBUG_BREAK_LISTS => 0;
18021
18022     my (
18023         $block_type,                $current_depth,
18024         $depth,                     $i,
18025         $i_last_nonblank_token,     $last_nonblank_token,
18026         $last_nonblank_type,        $last_nonblank_block_type,
18027         $last_old_breakpoint_count, $minimum_depth,
18028         $next_nonblank_block_type,  $next_nonblank_token,
18029         $next_nonblank_type,        $old_breakpoint_count,
18030         $starting_breakpoint_count, $starting_depth,
18031         $token,                     $type,
18032         $type_sequence,
18033     );
18034
18035     my (
18036         @breakpoint_stack,              @breakpoint_undo_stack,
18037         @comma_index,                   @container_type,
18038         @identifier_count_stack,        @index_before_arrow,
18039         @interrupted_list,              @item_count_stack,
18040         @last_comma_index,              @last_dot_index,
18041         @last_nonblank_type,            @old_breakpoint_count_stack,
18042         @opening_structure_index_stack, @rfor_semicolon_list,
18043         @has_old_logical_breakpoints,   @rand_or_list,
18044         @i_equals,                      @override_cab3,
18045         @type_sequence_stack,
18046     );
18047
18048     # these arrays must retain values between calls
18049     my ( @has_broken_sublist, @dont_align, @want_comma_break );
18050
18051     my $length_tol;
18052     my $lp_tol_boost;
18053     my $list_stress_level;
18054
18055     sub initialize_break_lists {
18056         @dont_align         = ();
18057         @has_broken_sublist = ();
18058         @want_comma_break   = ();
18059
18060         #---------------------------------------------------
18061         # Set tolerances to prevent formatting instabilities
18062         #---------------------------------------------------
18063
18064         # Define tolerances to use when checking if closed
18065         # containers will fit on one line.  This is necessary to avoid
18066         # formatting instability. The basic tolerance is based on the
18067         # following:
18068
18069         # - Always allow for at least one extra space after a closing token so
18070         # that we do not strand a comma or semicolon. (oneline.t).
18071
18072         # - Use an increased line length tolerance when -ci > -i to avoid
18073         # blinking states (case b923 and others).
18074         $length_tol =
18075           1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
18076
18077         # In addition, it may be necessary to use a few extra tolerance spaces
18078         # when -lp is used and/or when -xci is used.  The history of this
18079         # so far is as follows:
18080
18081         # FIX1: At least 3 characters were been found to be required for -lp
18082         # to fixes cases b1059 b1063 b1117.
18083
18084         # FIX2: Further testing showed that we need a total of 3 extra spaces
18085         # when -lp is set for non-lists, and at least 2 spaces when -lp and
18086         # -xci are set.
18087         # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
18088         # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
18089         # b1165
18090
18091         # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
18092         # 'find_token_starting_list' to go back before an initial blank space.
18093         # This fixed these three cases, and allowed the tolerances to be
18094         # reduced to continue to fix all other known cases of instability.
18095         # This gives the current tolerance formulation.
18096
18097         $lp_tol_boost = 0;
18098
18099         if ($rOpts_line_up_parentheses) {
18100
18101             # boost tol for combination -lp -xci
18102             if ($rOpts_extended_continuation_indentation) {
18103                 $lp_tol_boost = 2;
18104             }
18105
18106             # boost tol for combination -lp and any -vtc > 0, but only for
18107             # non-list containers
18108             else {
18109                 foreach ( keys %closing_vertical_tightness ) {
18110                     next
18111                       unless ( $closing_vertical_tightness{$_} );
18112                     $lp_tol_boost = 1;    # Fixes B1193;
18113                     last;
18114                 }
18115             }
18116         }
18117
18118         # Define a level where list formatting becomes highly stressed and
18119         # needs to be simplified. Introduced for case b1262.
18120         $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
18121
18122         return;
18123     }
18124
18125     # routine to define essential variables when we go 'up' to
18126     # a new depth
18127     sub check_for_new_minimum_depth {
18128         my $depth = shift;
18129         if ( $depth < $minimum_depth ) {
18130
18131             $minimum_depth = $depth;
18132
18133             # these arrays need not retain values between calls
18134             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
18135             $container_type[$depth]                = "";
18136             $identifier_count_stack[$depth]        = 0;
18137             $index_before_arrow[$depth]            = -1;
18138             $interrupted_list[$depth]              = 1;
18139             $item_count_stack[$depth]              = 0;
18140             $last_nonblank_type[$depth]            = "";
18141             $opening_structure_index_stack[$depth] = -1;
18142
18143             $breakpoint_undo_stack[$depth]       = undef;
18144             $comma_index[$depth]                 = undef;
18145             $last_comma_index[$depth]            = undef;
18146             $last_dot_index[$depth]              = undef;
18147             $old_breakpoint_count_stack[$depth]  = undef;
18148             $has_old_logical_breakpoints[$depth] = 0;
18149             $rand_or_list[$depth]                = [];
18150             $rfor_semicolon_list[$depth]         = [];
18151             $i_equals[$depth]                    = -1;
18152
18153             # these arrays must retain values between calls
18154             if ( !defined( $has_broken_sublist[$depth] ) ) {
18155                 $dont_align[$depth]         = 0;
18156                 $has_broken_sublist[$depth] = 0;
18157                 $want_comma_break[$depth]   = 0;
18158             }
18159         }
18160         return;
18161     }
18162
18163     # routine to decide which commas to break at within a container;
18164     # returns:
18165     #   $bp_count = number of comma breakpoints set
18166     #   $do_not_break_apart = a flag indicating if container need not
18167     #     be broken open
18168     sub set_comma_breakpoints {
18169
18170         my ( $self, $dd ) = @_;
18171         my $bp_count           = 0;
18172         my $do_not_break_apart = 0;
18173
18174         # Do not break a list unless there are some non-line-ending commas.
18175         # This avoids getting different results with only non-essential commas,
18176         # and fixes b1192.
18177         my $seqno = $type_sequence_stack[$dd];
18178         my $real_comma_count =
18179           $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
18180
18181         # anything to do?
18182         if ( $item_count_stack[$dd] ) {
18183
18184             # handle commas not in containers...
18185             if ( $dont_align[$dd] ) {
18186                 $self->do_uncontained_comma_breaks($dd);
18187             }
18188
18189             # handle commas within containers...
18190             elsif ($real_comma_count) {
18191                 my $fbc = get_forced_breakpoint_count();
18192
18193                 # always open comma lists not preceded by keywords,
18194                 # barewords, identifiers (that is, anything that doesn't
18195                 # look like a function call)
18196                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
18197
18198                 $self->set_comma_breakpoints_do(
18199                     {
18200                         depth            => $dd,
18201                         i_opening_paren  => $opening_structure_index_stack[$dd],
18202                         i_closing_paren  => $i,
18203                         item_count       => $item_count_stack[$dd],
18204                         identifier_count => $identifier_count_stack[$dd],
18205                         rcomma_index     => $comma_index[$dd],
18206                         next_nonblank_type  => $next_nonblank_type,
18207                         list_type           => $container_type[$dd],
18208                         interrupted         => $interrupted_list[$dd],
18209                         rdo_not_break_apart => \$do_not_break_apart,
18210                         must_break_open     => $must_break_open,
18211                         has_broken_sublist  => $has_broken_sublist[$dd],
18212                     }
18213                 );
18214                 $bp_count           = get_forced_breakpoint_count() - $fbc;
18215                 $do_not_break_apart = 0 if $must_break_open;
18216             }
18217         }
18218         return ( $bp_count, $do_not_break_apart );
18219     }
18220
18221     # These types are excluded at breakpoints to prevent blinking
18222     # Switched from excluded to included as part of fix for b1214
18223     ##my %is_uncontained_comma_break_excluded_type;
18224     my %is_uncontained_comma_break_included_type;
18225
18226     BEGIN {
18227         ##my @q = qw< L { ( [ ? : + - =~ >;
18228         ##@is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
18229
18230         my @q = qw< k R } ) ] Y Z U w i q Q .
18231           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
18232         @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
18233     }
18234
18235     sub do_uncontained_comma_breaks {
18236
18237         # Handle commas not in containers...
18238         # This is a catch-all routine for commas that we
18239         # don't know what to do with because the don't fall
18240         # within containers.  We will bias the bond strength
18241         # to break at commas which ended lines in the input
18242         # file.  This usually works better than just trying
18243         # to put as many items on a line as possible.  A
18244         # downside is that if the input file is garbage it
18245         # won't work very well. However, the user can always
18246         # prevent following the old breakpoints with the
18247         # -iob flag.
18248         my ( $self, $dd ) = @_;
18249         my $bias                  = -.01;
18250         my $old_comma_break_count = 0;
18251         foreach my $ii ( @{ $comma_index[$dd] } ) {
18252             if ( $old_breakpoint_to_go[$ii] ) {
18253                 $old_comma_break_count++;
18254                 $bond_strength_to_go[$ii] = $bias;
18255
18256                 # reduce bias magnitude to force breaks in order
18257                 $bias *= 0.99;
18258             }
18259         }
18260
18261         # Also put a break before the first comma if
18262         # (1) there was a break there in the input, and
18263         # (2) there was exactly one old break before the first comma break
18264         # (3) OLD: there are multiple old comma breaks
18265         # (3) NEW: there are one or more old comma breaks (see return example)
18266         # (4) the first comma is at the starting level ...
18267         #     ... fixes cases b064 b065 b068 b210 b747
18268         # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
18269         #     ... fixes b1220.  If ci>0 we are in the middle of a snippet,
18270         #     maybe because -boc has been forcing out previous lines.
18271
18272         # For example, we will follow the user and break after
18273         # 'print' in this snippet:
18274         #    print
18275         #      "conformability (Not the same dimension)\n",
18276         #      "\t", $have, " is ", text_unit($hu), "\n",
18277         #      "\t", $want, " is ", text_unit($wu), "\n",
18278         #      ;
18279         #
18280         # Another example, just one comma, where we will break after
18281         # the return:
18282         #  return
18283         #    $x * cos($a) - $y * sin($a),
18284         #    $x * sin($a) + $y * cos($a);
18285
18286         # Breaking a print statement:
18287         # print SAVEOUT
18288         #   ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
18289         #   ( $? & 128 ) ? " -- core dumped" : "", "\n";
18290         #
18291         #  But we will not force a break after the opening paren here
18292         #  (causes a blinker):
18293         #        $heap->{stream}->set_output_filter(
18294         #            poe::filter::reference->new('myotherfreezer') ),
18295         #          ;
18296         #
18297         my $i_first_comma = $comma_index[$dd]->[0];
18298         my $level_comma   = $levels_to_go[$i_first_comma];
18299         my $ci_start      = $ci_levels_to_go[0];
18300
18301         # Here we want to use the value of ci before any -xci adjustment
18302         if ( $ci_start && $rOpts_extended_continuation_indentation ) {
18303             my $K0 = $K_to_go[0];
18304             if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
18305         }
18306         if (  !$ci_start
18307             && $old_breakpoint_to_go[$i_first_comma]
18308             && $level_comma == $levels_to_go[0] )
18309         {
18310             my $ibreak    = -1;
18311             my $obp_count = 0;
18312             for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
18313                 if ( $old_breakpoint_to_go[$ii] ) {
18314                     $obp_count++;
18315                     last if ( $obp_count > 1 );
18316                     $ibreak = $ii
18317                       if ( $levels_to_go[$ii] == $level_comma );
18318                 }
18319             }
18320
18321             # Changed rule from multiple old commas to just one here:
18322             if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
18323             {
18324                 my $ibreak_m = $ibreak;
18325                 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
18326                 if ( $ibreak_m >= 0 ) {
18327
18328                     # In order to avoid blinkers we have to be fairly
18329                     # restrictive:
18330
18331                     # OLD Rules:
18332                     #  Rule 1: Do not to break before an opening token
18333                     #  Rule 2: avoid breaking at ternary operators
18334                     #  (see b931, which is similar to the above print example)
18335                     #  Rule 3: Do not break at chain operators to fix case b1119
18336                     #   - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
18337
18338                     # NEW Rule, replaced above rules after case b1214:
18339                     #  only break at one of the included types
18340
18341                     # Be sure to test any changes to these rules against runs
18342                     # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
18343                     # series.
18344                     my $type_m = $types_to_go[$ibreak_m];
18345
18346                     # Switched from excluded to included for b1214. If necessary
18347                     # the token could also be checked if type_m eq 'k'
18348                     ##if ( !$is_uncontained_comma_break_excluded_type{$type_m} ) {
18349                     ##my $token_m = $tokens_to_go[$ibreak_m];
18350                     if ( $is_uncontained_comma_break_included_type{$type_m} ) {
18351                         $self->set_forced_breakpoint($ibreak);
18352                     }
18353                 }
18354             }
18355         }
18356         return;
18357     }
18358
18359     my %is_logical_container;
18360     my %quick_filter;
18361
18362     BEGIN {
18363         my @q = qw# if elsif unless while and or err not && | || ? : ! #;
18364         @is_logical_container{@q} = (1) x scalar(@q);
18365
18366         # This filter will allow most tokens to skip past a section of code
18367         %quick_filter = %is_assignment;
18368         @q            = qw# => . ; < > ~ #;
18369         push @q, ',';
18370         @quick_filter{@q} = (1) x scalar(@q);
18371     }
18372
18373     sub set_for_semicolon_breakpoints {
18374         my ( $self, $dd ) = @_;
18375         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
18376             $self->set_forced_breakpoint($_);
18377         }
18378         return;
18379     }
18380
18381     sub set_logical_breakpoints {
18382         my ( $self, $dd ) = @_;
18383         if (
18384                $item_count_stack[$dd] == 0
18385             && $is_logical_container{ $container_type[$dd] }
18386
18387             || $has_old_logical_breakpoints[$dd]
18388           )
18389         {
18390
18391             # Look for breaks in this order:
18392             # 0   1    2   3
18393             # or  and  ||  &&
18394             foreach my $i ( 0 .. 3 ) {
18395                 if ( $rand_or_list[$dd][$i] ) {
18396                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
18397                         $self->set_forced_breakpoint($_);
18398                     }
18399
18400                     # break at any 'if' and 'unless' too
18401                     foreach ( @{ $rand_or_list[$dd][4] } ) {
18402                         $self->set_forced_breakpoint($_);
18403                     }
18404                     $rand_or_list[$dd] = [];
18405                     last;
18406                 }
18407             }
18408         }
18409         return;
18410     }
18411
18412     sub is_unbreakable_container {
18413
18414         # never break a container of one of these types
18415         # because bad things can happen (map1.t)
18416         my $dd = shift;
18417         return $is_sort_map_grep{ $container_type[$dd] };
18418     }
18419
18420     sub break_lists {
18421
18422         my ( $self, $is_long_line ) = @_;
18423
18424         #----------------------------------------------------------------------
18425         # This routine is called once per batch, if the batch is a list, to set
18426         # line breaks so that hierarchical structure can be displayed and so
18427         # that list items can be vertically aligned.  The output of this
18428         # routine is stored in the array @forced_breakpoint_to_go, which is
18429         # used by sub 'break_long_lines' to set final breakpoints.
18430         #----------------------------------------------------------------------
18431
18432         my $rLL                  = $self->[_rLL_];
18433         my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
18434         my $ris_broken_container = $self->[_ris_broken_container_];
18435         my $rbreak_before_container_by_seqno =
18436           $self->[_rbreak_before_container_by_seqno_];
18437
18438         $starting_depth = $nesting_depth_to_go[0];
18439
18440         $block_type                = ' ';
18441         $current_depth             = $starting_depth;
18442         $i                         = -1;
18443         $last_nonblank_token       = ';';
18444         $last_nonblank_type        = ';';
18445         $last_nonblank_block_type  = ' ';
18446         $last_old_breakpoint_count = 0;
18447         $minimum_depth = $current_depth + 1;    # forces update in check below
18448         $old_breakpoint_count      = 0;
18449         $starting_breakpoint_count = get_forced_breakpoint_count();
18450         $token                     = ';';
18451         $type                      = ';';
18452         $type_sequence             = '';
18453
18454         my $total_depth_variation = 0;
18455         my $i_old_assignment_break;
18456         my $depth_last = $starting_depth;
18457         my $comma_follows_last_closing_token;
18458
18459         check_for_new_minimum_depth($current_depth);
18460
18461         my $want_previous_breakpoint = -1;
18462
18463         my $saw_good_breakpoint;
18464         my $i_line_end   = -1;
18465         my $i_line_start = -1;
18466         my $i_last_colon = -1;
18467
18468         #----------------------------------------
18469         # Main loop over all tokens in this batch
18470         #----------------------------------------
18471         while ( ++$i <= $max_index_to_go ) {
18472             if ( $type ne 'b' ) {
18473                 $i_last_nonblank_token    = $i - 1;
18474                 $last_nonblank_type       = $type;
18475                 $last_nonblank_token      = $token;
18476                 $last_nonblank_block_type = $block_type;
18477             } ## end if ( $type ne 'b' )
18478             $type          = $types_to_go[$i];
18479             $block_type    = $block_type_to_go[$i];
18480             $token         = $tokens_to_go[$i];
18481             $type_sequence = $type_sequence_to_go[$i];
18482             my $next_type       = $types_to_go[ $i + 1 ];
18483             my $next_token      = $tokens_to_go[ $i + 1 ];
18484             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
18485             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
18486             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
18487             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18488
18489             # set break if flag was set
18490             if ( $want_previous_breakpoint >= 0 ) {
18491                 $self->set_forced_breakpoint($want_previous_breakpoint);
18492                 $want_previous_breakpoint = -1;
18493             }
18494
18495             $last_old_breakpoint_count = $old_breakpoint_count;
18496
18497             # Fixed for case b1097 to not consider old breaks at highly
18498             # stressed locations, such as types 'L' and 'R'.  It might be
18499             # useful to generalize this concept in the future by looking at
18500             # actual bond strengths.
18501             if (   $old_breakpoint_to_go[$i]
18502                 && $type ne 'L'
18503                 && $next_nonblank_type ne 'R' )
18504             {
18505                 $i_line_end   = $i;
18506                 $i_line_start = $i_next_nonblank;
18507
18508                 $old_breakpoint_count++;
18509
18510                 # Break before certain keywords if user broke there and
18511                 # this is a 'safe' break point. The idea is to retain
18512                 # any preferred breaks for sequential list operations,
18513                 # like a schwartzian transform.
18514                 if ($rOpts_break_at_old_keyword_breakpoints) {
18515                     if (
18516                            $next_nonblank_type eq 'k'
18517                         && $is_keyword_returning_list{$next_nonblank_token}
18518                         && (   $type =~ /^[=\)\]\}Riw]$/
18519                             || $type eq 'k'
18520                             && $is_keyword_returning_list{$token} )
18521                       )
18522                     {
18523
18524                         # we actually have to set this break next time through
18525                         # the loop because if we are at a closing token (such
18526                         # as '}') which forms a one-line block, this break might
18527                         # get undone.
18528
18529                         # And do not do this at an equals if the user wants
18530                         # breaks before an equals (blinker cases b434 b903)
18531                         unless ( $type eq '=' && $want_break_before{$type} ) {
18532                             $want_previous_breakpoint = $i;
18533                         }
18534                     } ## end if ( $next_nonblank_type...)
18535                 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
18536
18537                 # Break before attributes if user broke there
18538                 if ($rOpts_break_at_old_attribute_breakpoints) {
18539                     if ( $next_nonblank_type eq 'A' ) {
18540                         $want_previous_breakpoint = $i;
18541                     }
18542                 }
18543
18544                 # remember an = break as possible good break point
18545                 if ( $is_assignment{$type} ) {
18546                     $i_old_assignment_break = $i;
18547                 }
18548                 elsif ( $is_assignment{$next_nonblank_type} ) {
18549                     $i_old_assignment_break = $i_next_nonblank;
18550                 }
18551             } ## end if ( $old_breakpoint_to_go...)
18552
18553             next if ( $type eq 'b' );
18554             $depth = $nesting_depth_to_go[ $i + 1 ];
18555
18556             $total_depth_variation += abs( $depth - $depth_last );
18557             $depth_last = $depth;
18558
18559             # safety check - be sure we always break after a comment
18560             # Shouldn't happen .. an error here probably means that the
18561             # nobreak flag did not get turned off correctly during
18562             # formatting.
18563             if ( $type eq '#' ) {
18564                 if ( $i != $max_index_to_go ) {
18565                     if (DEVEL_MODE) {
18566                         Fault(<<EOM);
18567 Non-fatal program bug: backup logic required to break after a comment
18568 EOM
18569                     }
18570                     $nobreak_to_go[$i] = 0;
18571                     $self->set_forced_breakpoint($i);
18572                 } ## end if ( $i != $max_index_to_go)
18573             } ## end if ( $type eq '#' )
18574
18575             # Force breakpoints at certain tokens in long lines.
18576             # Note that such breakpoints will be undone later if these tokens
18577             # are fully contained within parens on a line.
18578             if (
18579
18580                 # break before a keyword within a line
18581                 $type eq 'k'
18582                 && $i > 0
18583
18584                 # if one of these keywords:
18585                 && $is_if_unless_while_until_for_foreach{$token}
18586
18587                 # but do not break at something like '1 while'
18588                 && ( $last_nonblank_type ne 'n' || $i > 2 )
18589
18590                 # and let keywords follow a closing 'do' brace
18591                 && $last_nonblank_block_type ne 'do'
18592
18593                 && (
18594                     $is_long_line
18595
18596                     # or container is broken (by side-comment, etc)
18597                     || (   $next_nonblank_token eq '('
18598                         && $mate_index_to_go[$i_next_nonblank] < $i )
18599                 )
18600               )
18601             {
18602                 $self->set_forced_breakpoint( $i - 1 );
18603             } ## end if ( $type eq 'k' && $i...)
18604
18605             # remember locations of '||'  and '&&' for possible breaks if we
18606             # decide this is a long logical expression.
18607             if ( $type eq '||' ) {
18608                 push @{ $rand_or_list[$depth][2] }, $i;
18609                 ++$has_old_logical_breakpoints[$depth]
18610                   if ( ( $i == $i_line_start || $i == $i_line_end )
18611                     && $rOpts_break_at_old_logical_breakpoints );
18612             } ## end elsif ( $type eq '||' )
18613             elsif ( $type eq '&&' ) {
18614                 push @{ $rand_or_list[$depth][3] }, $i;
18615                 ++$has_old_logical_breakpoints[$depth]
18616                   if ( ( $i == $i_line_start || $i == $i_line_end )
18617                     && $rOpts_break_at_old_logical_breakpoints );
18618             } ## end elsif ( $type eq '&&' )
18619             elsif ( $type eq 'f' ) {
18620                 push @{ $rfor_semicolon_list[$depth] }, $i;
18621             }
18622             elsif ( $type eq 'k' ) {
18623                 if ( $token eq 'and' ) {
18624                     push @{ $rand_or_list[$depth][1] }, $i;
18625                     ++$has_old_logical_breakpoints[$depth]
18626                       if ( ( $i == $i_line_start || $i == $i_line_end )
18627                         && $rOpts_break_at_old_logical_breakpoints );
18628                 } ## end if ( $token eq 'and' )
18629
18630                 # break immediately at 'or's which are probably not in a logical
18631                 # block -- but we will break in logical breaks below so that
18632                 # they do not add to the forced_breakpoint_count
18633                 elsif ( $token eq 'or' ) {
18634                     push @{ $rand_or_list[$depth][0] }, $i;
18635                     ++$has_old_logical_breakpoints[$depth]
18636                       if ( ( $i == $i_line_start || $i == $i_line_end )
18637                         && $rOpts_break_at_old_logical_breakpoints );
18638                     if ( $is_logical_container{ $container_type[$depth] } ) {
18639                     }
18640                     else {
18641                         if ($is_long_line) { $self->set_forced_breakpoint($i) }
18642                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
18643                             && $rOpts_break_at_old_logical_breakpoints )
18644                         {
18645                             $saw_good_breakpoint = 1;
18646                         }
18647                     } ## end else [ if ( $is_logical_container...)]
18648                 } ## end elsif ( $token eq 'or' )
18649                 elsif ( $token eq 'if' || $token eq 'unless' ) {
18650                     push @{ $rand_or_list[$depth][4] }, $i;
18651                     if ( ( $i == $i_line_start || $i == $i_line_end )
18652                         && $rOpts_break_at_old_logical_breakpoints )
18653                     {
18654                         $self->set_forced_breakpoint($i);
18655                     }
18656                 } ## end elsif ( $token eq 'if' ||...)
18657             } ## end elsif ( $type eq 'k' )
18658             elsif ( $is_assignment{$type} ) {
18659                 $i_equals[$depth] = $i;
18660             }
18661
18662             if ($type_sequence) {
18663
18664                 # handle any postponed closing breakpoints
18665                 if ( $is_closing_sequence_token{$token} ) {
18666                     if ( $type eq ':' ) {
18667                         $i_last_colon = $i;
18668
18669                         # retain break at a ':' line break
18670                         if (   ( $i == $i_line_start || $i == $i_line_end )
18671                             && $rOpts_break_at_old_ternary_breakpoints
18672                             && $levels_to_go[$i] < $list_stress_level )
18673                         {
18674
18675                             $self->set_forced_breakpoint($i);
18676
18677                             # Break at a previous '=', but only if it is before
18678                             # the mating '?'. Mate_index test fixes b1287.
18679                             my $ieq = $i_equals[$depth];
18680                             if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
18681                                 $self->set_forced_breakpoint(
18682                                     $i_equals[$depth] );
18683                                 $i_equals[$depth] = -1;
18684                             }
18685                         } ## end if ( ( $i == $i_line_start...))
18686                     } ## end if ( $type eq ':' )
18687                     if ( has_postponed_breakpoint($type_sequence) ) {
18688                         my $inc = ( $type eq ':' ) ? 0 : 1;
18689                         if ( $i >= $inc ) {
18690                             $self->set_forced_breakpoint( $i - $inc );
18691                         }
18692                     }
18693                 } ## end if ( $is_closing_sequence_token{$token} )
18694
18695                 # set breaks at ?/: if they will get separated (and are
18696                 # not a ?/: chain), or if the '?' is at the end of the
18697                 # line
18698                 elsif ( $token eq '?' ) {
18699                     my $i_colon = $mate_index_to_go[$i];
18700                     if (
18701                         $i_colon <= 0  # the ':' is not in this batch
18702                         || $i == 0     # this '?' is the first token of the line
18703                         || $i ==
18704                         $max_index_to_go    # or this '?' is the last token
18705                       )
18706                     {
18707
18708                         # don't break if # this has a side comment, and
18709                         # don't break at a '?' if preceded by ':' on
18710                         # this line of previous ?/: pair on this line.
18711                         # This is an attempt to preserve a chain of ?/:
18712                         # expressions (elsif2.t).
18713                         if (
18714                             (
18715                                    $i_last_colon < 0
18716                                 || $parent_seqno_to_go[$i_last_colon] !=
18717                                 $parent_seqno_to_go[$i]
18718                             )
18719                             && $tokens_to_go[$max_index_to_go] ne '#'
18720                           )
18721                         {
18722                             $self->set_forced_breakpoint($i);
18723                         }
18724                         $self->set_closing_breakpoint($i);
18725                     } ## end if ( $i_colon <= 0  ||...)
18726                 } ## end elsif ( $token eq '?' )
18727
18728                 elsif ( $is_opening_token{$token} ) {
18729
18730                     # do requeste -lp breaks at the OPENING token for BROKEN
18731                     # blocks.  NOTE: this can be done for both -lp and -xlp,
18732                     # but only -xlp can really take advantage of this.  So this
18733                     # is currently restricted to -xlp to avoid excess changes to
18734                     # existing -lp formatting.
18735                     if (   $rOpts_extended_line_up_parentheses
18736                         && $mate_index_to_go[$i] < 0 )
18737                     {
18738                         my $lp_object =
18739                           $self->[_rlp_object_by_seqno_]->{$type_sequence};
18740                         if ($lp_object) {
18741                             my $K_begin_line = $lp_object->get_K_begin_line();
18742                             my $i_begin_line = $K_begin_line - $K_to_go[0];
18743                             $self->set_forced_lp_break( $i_begin_line, $i );
18744                         }
18745                     }
18746                 }
18747
18748             } ## end if ($type_sequence)
18749
18750 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
18751
18752             #------------------------------------------------------------
18753             # Handle Increasing Depth..
18754             #
18755             # prepare for a new list when depth increases
18756             # token $i is a '(','{', or '['
18757             #------------------------------------------------------------
18758             # hardened against bad input syntax: depth jump must be 1 and type
18759             # must be opening..fixes c102
18760             if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
18761
18762                 $type_sequence_stack[$depth] = $type_sequence;
18763                 $override_cab3[$depth] =
18764                      $rOpts_comma_arrow_breakpoints == 3
18765                   && $type_sequence
18766                   && $self->[_roverride_cab3_]->{$type_sequence};
18767                 $breakpoint_stack[$depth] = get_forced_breakpoint_count();
18768                 $breakpoint_undo_stack[$depth] =
18769                   get_forced_breakpoint_undo_count();
18770                 $has_broken_sublist[$depth]            = 0;
18771                 $identifier_count_stack[$depth]        = 0;
18772                 $index_before_arrow[$depth]            = -1;
18773                 $interrupted_list[$depth]              = 0;
18774                 $item_count_stack[$depth]              = 0;
18775                 $last_comma_index[$depth]              = undef;
18776                 $last_dot_index[$depth]                = undef;
18777                 $last_nonblank_type[$depth]            = $last_nonblank_type;
18778                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
18779                 $opening_structure_index_stack[$depth] = $i;
18780                 $rand_or_list[$depth]                  = [];
18781                 $rfor_semicolon_list[$depth]           = [];
18782                 $i_equals[$depth]                      = -1;
18783                 $want_comma_break[$depth]              = 0;
18784                 $container_type[$depth] =
18785
18786                   #      k => && || ? : .
18787                   $is_container_label_type{$last_nonblank_type}
18788                   ? $last_nonblank_token
18789                   : "";
18790                 $has_old_logical_breakpoints[$depth] = 0;
18791
18792                 # if line ends here then signal closing token to break
18793                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
18794                 {
18795                     $self->set_closing_breakpoint($i);
18796                 }
18797
18798                 # Not all lists of values should be vertically aligned..
18799                 $dont_align[$depth] =
18800
18801                   # code BLOCKS are handled at a higher level
18802                   ( $block_type ne "" )
18803
18804                   # certain paren lists
18805                   || ( $type eq '(' ) && (
18806
18807                     # it does not usually look good to align a list of
18808                     # identifiers in a parameter list, as in:
18809                     #    my($var1, $var2, ...)
18810                     # (This test should probably be refined, for now I'm just
18811                     # testing for any keyword)
18812                     ( $last_nonblank_type eq 'k' )
18813
18814                     # a trailing '(' usually indicates a non-list
18815                     || ( $next_nonblank_type eq '(' )
18816                   );
18817
18818                 # patch to outdent opening brace of long if/for/..
18819                 # statements (like this one).  See similar coding in
18820                 # set_continuation breaks.  We have also catch it here for
18821                 # short line fragments which otherwise will not go through
18822                 # break_long_lines.
18823                 if (
18824                     $block_type
18825
18826                     # if we have the ')' but not its '(' in this batch..
18827                     && ( $last_nonblank_token eq ')' )
18828                     && $mate_index_to_go[$i_last_nonblank_token] < 0
18829
18830                     # and user wants brace to left
18831                     && !$rOpts_opening_brace_always_on_right
18832
18833                     && ( $type eq '{' )     # should be true
18834                     && ( $token eq '{' )    # should be true
18835                   )
18836                 {
18837                     $self->set_forced_breakpoint( $i - 1 );
18838                 } ## end if ( $block_type && ( ...))
18839             } ## end if ( $depth > $current_depth)
18840
18841             #------------------------------------------------------------
18842             # Handle Decreasing Depth..
18843             #
18844             # finish off any old list when depth decreases
18845             # token $i is a ')','}', or ']'
18846             #------------------------------------------------------------
18847             # hardened against bad input syntax: depth jump must be 1 and type
18848             # must be closing .. fixes c102
18849             elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
18850
18851                 check_for_new_minimum_depth($depth);
18852
18853                 $comma_follows_last_closing_token =
18854                   $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
18855
18856                 # force all outer logical containers to break after we see on
18857                 # old breakpoint
18858                 $has_old_logical_breakpoints[$depth] ||=
18859                   $has_old_logical_breakpoints[$current_depth];
18860
18861                 # Patch to break between ') {' if the paren list is broken.
18862                 # There is similar logic in break_long_lines for
18863                 # non-broken lists.
18864                 if (   $token eq ')'
18865                     && $next_nonblank_block_type
18866                     && $interrupted_list[$current_depth]
18867                     && $next_nonblank_type eq '{'
18868                     && !$rOpts_opening_brace_always_on_right )
18869                 {
18870                     $self->set_forced_breakpoint($i);
18871                 } ## end if ( $token eq ')' && ...
18872
18873 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
18874
18875                 # set breaks at commas if necessary
18876                 my ( $bp_count, $do_not_break_apart ) =
18877                   $self->set_comma_breakpoints($current_depth);
18878
18879                 my $i_opening = $opening_structure_index_stack[$current_depth];
18880                 my $saw_opening_structure = ( $i_opening >= 0 );
18881                 my $lp_object;
18882                 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
18883                     $lp_object = $self->[_rlp_object_by_seqno_]
18884                       ->{ $type_sequence_to_go[$i_opening] };
18885                 }
18886
18887                 # this term is long if we had to break at interior commas..
18888                 my $is_long_term = $bp_count > 0;
18889
18890                 # If this is a short container with one or more comma arrows,
18891                 # then we will mark it as a long term to open it if requested.
18892                 # $rOpts_comma_arrow_breakpoints =
18893                 #    0 - open only if comma precedes closing brace
18894                 #    1 - stable: except for one line blocks
18895                 #    2 - try to form 1 line blocks
18896                 #    3 - ignore =>
18897                 #    4 - always open up if vt=0
18898                 #    5 - stable: even for one line blocks if vt=0
18899
18900                 # PATCH: Modify the -cab flag if we are not processing a list:
18901                 # We only want the -cab flag to apply to list containers, so
18902                 # for non-lists we use the default and stable -cab=5 value.
18903                 # Fixes case b939a.
18904                 my $cab_flag = $rOpts_comma_arrow_breakpoints;
18905                 if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
18906                     $cab_flag = 5;
18907                 }
18908
18909                 # Ignore old breakpoints when under stress.
18910                 # Fixes b1203 b1204 as well as b1197-b1200.
18911                 # But not if -lp: fixes b1264, b1265.  NOTE: rechecked with
18912                 # b1264 to see if this check is still required at all, and
18913                 # these still require a check, but at higher level beta+3
18914                 # instead of beta:  b1193 b780
18915                 if (   $saw_opening_structure
18916                     && !$lp_object
18917                     && $levels_to_go[$i_opening] >= $list_stress_level )
18918                 {
18919                     $cab_flag = 2;
18920
18921                     # Do not break hash braces under stress (fixes b1238)
18922                     $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
18923
18924                    # This option fixes b1235, b1237, b1240 with old and new -lp,
18925                    # but formatting is nicer with next option.
18926                     ## $is_long_term ||=
18927                     ##  $levels_to_go[$i_opening] > $stress_level_beta + 1;
18928
18929                     # This option fixes b1240 but not b1235, b1237 with new -lp,
18930                     # but this gives better formatting than the previous option.
18931                     $do_not_break_apart ||=
18932                       $levels_to_go[$i_opening] > $stress_level_beta;
18933                 }
18934
18935                 if (  !$is_long_term
18936                     && $saw_opening_structure
18937                     && $is_opening_token{ $tokens_to_go[$i_opening] }
18938                     && $index_before_arrow[ $depth + 1 ] > 0
18939                     && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
18940                   )
18941                 {
18942                     $is_long_term =
18943                          $cab_flag == 4
18944                       || $cab_flag == 0 && $last_nonblank_token eq ','
18945                       || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
18946                 } ## end if ( !$is_long_term &&...)
18947
18948                 # mark term as long if the length between opening and closing
18949                 # parens exceeds allowed line length
18950                 if ( !$is_long_term && $saw_opening_structure ) {
18951
18952                     my $i_opening_minus =
18953                       $self->find_token_starting_list($i_opening);
18954
18955                     my $excess =
18956                       $self->excess_line_length( $i_opening_minus, $i );
18957
18958                     # Use standard spaces for indentation of lists in -lp mode
18959                     # if it gives a longer line length. This helps to avoid an
18960                     # instability due to forming and breaking one-line blocks.
18961                     # This fixes case b1314.
18962                     my $indentation = $leading_spaces_to_go[$i_opening_minus];
18963                     if ( ref($indentation)
18964                         && $ris_broken_container->{$type_sequence} )
18965                     {
18966                         my $lp_spaces = $indentation->get_spaces();
18967                         my $std_spaces =
18968                           $standard_spaces_to_go[$i_opening_minus];
18969                         my $diff = $std_spaces - $lp_spaces;
18970                         if ( $diff > 0 ) { $excess += $diff }
18971                     }
18972
18973                     my $tol = $length_tol;
18974
18975                     # boost tol for an -lp container
18976                     if (
18977                            $lp_tol_boost
18978                         && $lp_object
18979                         && ( $rOpts_extended_continuation_indentation
18980                             || !$ris_list_by_seqno->{$type_sequence} )
18981                       )
18982                     {
18983                         $tol += $lp_tol_boost;
18984                     }
18985
18986                     # Patch to avoid blinking with -bbxi=2 and -cab=2
18987                     # in which variations in -ci cause unstable formatting
18988                     # in edge cases. We just always add one ci level so that
18989                     # the formatting is independent of the -BBX results.
18990                     # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
18991                     # b1161 b1166 b1167 b1168
18992                     if (  !$ci_levels_to_go[$i_opening]
18993                         && $rbreak_before_container_by_seqno->{$type_sequence} )
18994                     {
18995                         $tol += $rOpts->{'continuation-indentation'};
18996                     }
18997
18998                     $is_long_term = $excess + $tol > 0;
18999
19000                 } ## end if ( !$is_long_term &&...)
19001
19002                 # We've set breaks after all comma-arrows.  Now we have to
19003                 # undo them if this can be a one-line block
19004                 # (the only breakpoints set will be due to comma-arrows)
19005
19006                 if (
19007
19008                     # user doesn't require breaking after all comma-arrows
19009                     ( $cab_flag != 0 ) && ( $cab_flag != 4 )
19010
19011                     # and if the opening structure is in this batch
19012                     && $saw_opening_structure
19013
19014                     # and either on the same old line
19015                     && (
19016                         $old_breakpoint_count_stack[$current_depth] ==
19017                         $last_old_breakpoint_count
19018
19019                         # or user wants to form long blocks with arrows
19020                         || $cab_flag == 2
19021
19022                         # if -cab=3 is overridden then use -cab=2 behavior
19023                         || $cab_flag == 3 && $override_cab3[$current_depth]
19024                     )
19025
19026                     # and we made breakpoints between the opening and closing
19027                     && ( $breakpoint_undo_stack[$current_depth] <
19028                         get_forced_breakpoint_undo_count() )
19029
19030                     # and this block is short enough to fit on one line
19031                     # Note: use < because need 1 more space for possible comma
19032                     && !$is_long_term
19033
19034                   )
19035                 {
19036                     $self->undo_forced_breakpoint_stack(
19037                         $breakpoint_undo_stack[$current_depth] );
19038                 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
19039
19040                 # now see if we have any comma breakpoints left
19041                 my $has_comma_breakpoints =
19042                   ( $breakpoint_stack[$current_depth] !=
19043                       get_forced_breakpoint_count() );
19044
19045                 # update broken-sublist flag of the outer container
19046                 $has_broken_sublist[$depth] =
19047                      $has_broken_sublist[$depth]
19048                   || $has_broken_sublist[$current_depth]
19049                   || $is_long_term
19050                   || $has_comma_breakpoints;
19051
19052 # Having come to the closing ')', '}', or ']', now we have to decide if we
19053 # should 'open up' the structure by placing breaks at the opening and
19054 # closing containers.  This is a tricky decision.  Here are some of the
19055 # basic considerations:
19056 #
19057 # -If this is a BLOCK container, then any breakpoints will have already
19058 # been set (and according to user preferences), so we need do nothing here.
19059 #
19060 # -If we have a comma-separated list for which we can align the list items,
19061 # then we need to do so because otherwise the vertical aligner cannot
19062 # currently do the alignment.
19063 #
19064 # -If this container does itself contain a container which has been broken
19065 # open, then it should be broken open to properly show the structure.
19066 #
19067 # -If there is nothing to align, and no other reason to break apart,
19068 # then do not do it.
19069 #
19070 # We will not break open the parens of a long but 'simple' logical expression.
19071 # For example:
19072 #
19073 # This is an example of a simple logical expression and its formatting:
19074 #
19075 #     if ( $bigwasteofspace1 && $bigwasteofspace2
19076 #         || $bigwasteofspace3 && $bigwasteofspace4 )
19077 #
19078 # Most people would prefer this than the 'spacey' version:
19079 #
19080 #     if (
19081 #         $bigwasteofspace1 && $bigwasteofspace2
19082 #         || $bigwasteofspace3 && $bigwasteofspace4
19083 #     )
19084 #
19085 # To illustrate the rules for breaking logical expressions, consider:
19086 #
19087 #             FULLY DENSE:
19088 #             if ( $opt_excl
19089 #                 and ( exists $ids_excl_uc{$id_uc}
19090 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
19091 #
19092 # This is on the verge of being difficult to read.  The current default is to
19093 # open it up like this:
19094 #
19095 #             DEFAULT:
19096 #             if (
19097 #                 $opt_excl
19098 #                 and ( exists $ids_excl_uc{$id_uc}
19099 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
19100 #               )
19101 #
19102 # This is a compromise which tries to avoid being too dense and to spacey.
19103 # A more spaced version would be:
19104 #
19105 #             SPACEY:
19106 #             if (
19107 #                 $opt_excl
19108 #                 and (
19109 #                     exists $ids_excl_uc{$id_uc}
19110 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
19111 #                 )
19112 #               )
19113 #
19114 # Some people might prefer the spacey version -- an option could be added.  The
19115 # innermost expression contains a long block '( exists $ids_...  ')'.
19116 #
19117 # Here is how the logic goes: We will force a break at the 'or' that the
19118 # innermost expression contains, but we will not break apart its opening and
19119 # closing containers because (1) it contains no multi-line sub-containers itself,
19120 # and (2) there is no alignment to be gained by breaking it open like this
19121 #
19122 #             and (
19123 #                 exists $ids_excl_uc{$id_uc}
19124 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
19125 #             )
19126 #
19127 # (although this looks perfectly ok and might be good for long expressions).  The
19128 # outer 'if' container, though, contains a broken sub-container, so it will be
19129 # broken open to avoid too much density.  Also, since it contains no 'or's, there
19130 # will be a forced break at its 'and'.
19131
19132                 # set some flags telling something about this container..
19133                 my $is_simple_logical_expression = 0;
19134                 if (   $item_count_stack[$current_depth] == 0
19135                     && $saw_opening_structure
19136                     && $tokens_to_go[$i_opening] eq '('
19137                     && $is_logical_container{ $container_type[$current_depth] }
19138                   )
19139                 {
19140
19141                     # This seems to be a simple logical expression with
19142                     # no existing breakpoints.  Set a flag to prevent
19143                     # opening it up.
19144                     if ( !$has_comma_breakpoints ) {
19145                         $is_simple_logical_expression = 1;
19146                     }
19147
19148                     # This seems to be a simple logical expression with
19149                     # breakpoints (broken sublists, for example).  Break
19150                     # at all 'or's and '||'s.
19151                     else {
19152                         $self->set_logical_breakpoints($current_depth);
19153                     }
19154                 } ## end if ( $item_count_stack...)
19155
19156                 if ( $is_long_term
19157                     && @{ $rfor_semicolon_list[$current_depth] } )
19158                 {
19159                     $self->set_for_semicolon_breakpoints($current_depth);
19160
19161                     # open up a long 'for' or 'foreach' container to allow
19162                     # leading term alignment unless -lp is used.
19163                     $has_comma_breakpoints = 1 unless ($lp_object);
19164                 } ## end if ( $is_long_term && ...)
19165
19166                 if (
19167
19168                     # breaks for code BLOCKS are handled at a higher level
19169                     !$block_type
19170
19171                     # we do not need to break at the top level of an 'if'
19172                     # type expression
19173                     && !$is_simple_logical_expression
19174
19175                     ## modification to keep ': (' containers vertically tight;
19176                     ## but probably better to let user set -vt=1 to avoid
19177                     ## inconsistency with other paren types
19178                     ## && ($container_type[$current_depth] ne ':')
19179
19180                     # otherwise, we require one of these reasons for breaking:
19181                     && (
19182
19183                         # - this term has forced line breaks
19184                         $has_comma_breakpoints
19185
19186                        # - the opening container is separated from this batch
19187                        #   for some reason (comment, blank line, code block)
19188                        # - this is a non-paren container spanning multiple lines
19189                         || !$saw_opening_structure
19190
19191                         # - this is a long block contained in another breakable
19192                         #   container
19193                         || $is_long_term && !$self->is_in_block_by_i($i_opening)
19194                     )
19195                   )
19196                 {
19197
19198                     # do special -lp breaks at the CLOSING token for INTACT
19199                     # blocks (because we might not do them if the block does
19200                     # not break open)
19201                     if ($lp_object) {
19202                         my $K_begin_line = $lp_object->get_K_begin_line();
19203                         my $i_begin_line = $K_begin_line - $K_to_go[0];
19204                         $self->set_forced_lp_break( $i_begin_line, $i_opening );
19205                     }
19206
19207                     # break after opening structure.
19208                     # note: break before closing structure will be automatic
19209                     if ( $minimum_depth <= $current_depth ) {
19210
19211                         if ( $i_opening >= 0 ) {
19212                             $self->set_forced_breakpoint($i_opening)
19213                               unless ( $do_not_break_apart
19214                                 || is_unbreakable_container($current_depth) );
19215                         }
19216
19217                         # break at ',' of lower depth level before opening token
19218                         if ( $last_comma_index[$depth] ) {
19219                             $self->set_forced_breakpoint(
19220                                 $last_comma_index[$depth] );
19221                         }
19222
19223                         # break at '.' of lower depth level before opening token
19224                         if ( $last_dot_index[$depth] ) {
19225                             $self->set_forced_breakpoint(
19226                                 $last_dot_index[$depth] );
19227                         }
19228
19229                         # break before opening structure if preceded by another
19230                         # closing structure and a comma.  This is normally
19231                         # done by the previous closing brace, but not
19232                         # if it was a one-line block.
19233                         if ( $i_opening > 2 ) {
19234                             my $i_prev =
19235                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
19236                               ? $i_opening - 2
19237                               : $i_opening - 1;
19238
19239                             if (
19240                                 $types_to_go[$i_prev] eq ','
19241                                 && (   $types_to_go[ $i_prev - 1 ] eq ')'
19242                                     || $types_to_go[ $i_prev - 1 ] eq '}' )
19243                               )
19244                             {
19245                                 $self->set_forced_breakpoint($i_prev);
19246                             }
19247
19248                             # also break before something like ':('  or '?('
19249                             # if appropriate.
19250                             elsif (
19251                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
19252                             {
19253                                 my $token_prev = $tokens_to_go[$i_prev];
19254                                 if ( $want_break_before{$token_prev} ) {
19255                                     $self->set_forced_breakpoint($i_prev);
19256                                 }
19257                             } ## end elsif ( $types_to_go[$i_prev...])
19258                         } ## end if ( $i_opening > 2 )
19259                     } ## end if ( $minimum_depth <=...)
19260
19261                     # break after comma following closing structure
19262                     if ( $next_type eq ',' ) {
19263                         $self->set_forced_breakpoint( $i + 1 );
19264                     }
19265
19266                     # break before an '=' following closing structure
19267                     if (
19268                         $is_assignment{$next_nonblank_type}
19269                         && ( $breakpoint_stack[$current_depth] !=
19270                             get_forced_breakpoint_count() )
19271                       )
19272                     {
19273                         $self->set_forced_breakpoint($i);
19274                     } ## end if ( $is_assignment{$next_nonblank_type...})
19275
19276                     # break at any comma before the opening structure Added
19277                     # for -lp, but seems to be good in general.  It isn't
19278                     # obvious how far back to look; the '5' below seems to
19279                     # work well and will catch the comma in something like
19280                     #  push @list, myfunc( $param, $param, ..
19281
19282                     my $icomma = $last_comma_index[$depth];
19283                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
19284                         unless ( $forced_breakpoint_to_go[$icomma] ) {
19285                             $self->set_forced_breakpoint($icomma);
19286                         }
19287                     }
19288                 } ## end logic to open up a container
19289
19290                 # Break open a logical container open if it was already open
19291                 elsif ($is_simple_logical_expression
19292                     && $has_old_logical_breakpoints[$current_depth] )
19293                 {
19294                     $self->set_logical_breakpoints($current_depth);
19295                 }
19296
19297                 # Handle long container which does not get opened up
19298                 elsif ($is_long_term) {
19299
19300                     # must set fake breakpoint to alert outer containers that
19301                     # they are complex
19302                     set_fake_breakpoint();
19303                 } ## end elsif ($is_long_term)
19304
19305             } ## end elsif ( $depth < $current_depth)
19306
19307             #------------------------------------------------------------
19308             # Handle this token
19309             #------------------------------------------------------------
19310
19311             $current_depth = $depth;
19312
19313             # most token types can skip the rest of this loop
19314             next unless ( $quick_filter{$type} );
19315
19316             # handle comma-arrow
19317             if ( $type eq '=>' ) {
19318                 next if ( $last_nonblank_type eq '=>' );
19319                 next if $rOpts_break_at_old_comma_breakpoints;
19320                 next
19321                   if ( $rOpts_comma_arrow_breakpoints == 3
19322                     && !$override_cab3[$depth] );
19323                 $want_comma_break[$depth]   = 1;
19324                 $index_before_arrow[$depth] = $i_last_nonblank_token;
19325                 next;
19326             } ## end if ( $type eq '=>' )
19327
19328             elsif ( $type eq '.' ) {
19329                 $last_dot_index[$depth] = $i;
19330             }
19331
19332             # Turn off alignment if we are sure that this is not a list
19333             # environment.  To be safe, we will do this if we see certain
19334             # non-list tokens, such as ';', and also the environment is
19335             # not a list.  Note that '=' could be in any of the = operators
19336             # (lextest.t). We can't just use the reported environment
19337             # because it can be incorrect in some cases.
19338             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
19339                 && !$self->is_in_list_by_i($i) )
19340             {
19341                 $dont_align[$depth]         = 1;
19342                 $want_comma_break[$depth]   = 0;
19343                 $index_before_arrow[$depth] = -1;
19344             } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
19345
19346             # now just handle any commas
19347             next unless ( $type eq ',' );
19348
19349             $last_dot_index[$depth]   = undef;
19350             $last_comma_index[$depth] = $i;
19351
19352             # break here if this comma follows a '=>'
19353             # but not if there is a side comment after the comma
19354             if ( $want_comma_break[$depth] ) {
19355
19356                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
19357                     if ($rOpts_comma_arrow_breakpoints) {
19358                         $want_comma_break[$depth] = 0;
19359                         next;
19360                     }
19361                 }
19362
19363                 $self->set_forced_breakpoint($i)
19364                   unless ( $next_nonblank_type eq '#' );
19365
19366                 # break before the previous token if it looks safe
19367                 # Example of something that we will not try to break before:
19368                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
19369                 # Also we don't want to break at a binary operator (like +):
19370                 # $c->createOval(
19371                 #    $x + $R, $y +
19372                 #    $R => $x - $R,
19373                 #    $y - $R, -fill   => 'black',
19374                 # );
19375                 my $ibreak = $index_before_arrow[$depth] - 1;
19376                 if (   $ibreak > 0
19377                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
19378                 {
19379                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
19380                     if ( $types_to_go[$ibreak] eq 'b' )  { $ibreak-- }
19381                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
19382
19383                         # don't break pointer calls, such as the following:
19384                         #  File::Spec->curdir  => 1,
19385                         # (This is tokenized as adjacent 'w' tokens)
19386                         ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
19387
19388                         # And don't break before a comma, as in the following:
19389                         # ( LONGER_THAN,=> 1,
19390                         #    EIGHTY_CHARACTERS,=> 2,
19391                         #    CAUSES_FORMATTING,=> 3,
19392                         #    LIKE_THIS,=> 4,
19393                         # );
19394                         # This example is for -tso but should be general rule
19395                         if (   $tokens_to_go[ $ibreak + 1 ] ne '->'
19396                             && $tokens_to_go[ $ibreak + 1 ] ne ',' )
19397                         {
19398                             $self->set_forced_breakpoint($ibreak);
19399                         }
19400                     } ## end if ( $types_to_go[$ibreak...])
19401                 } ## end if ( $ibreak > 0 && $tokens_to_go...)
19402
19403                 $want_comma_break[$depth]   = 0;
19404                 $index_before_arrow[$depth] = -1;
19405
19406                 # handle list which mixes '=>'s and ','s:
19407                 # treat any list items so far as an interrupted list
19408                 $interrupted_list[$depth] = 1;
19409                 next;
19410             } ## end if ( $want_comma_break...)
19411
19412             # Break after all commas above starting depth...
19413             # But only if the last closing token was followed by a comma,
19414             #   to avoid breaking a list operator (issue c119)
19415             if (   $depth < $starting_depth
19416                 && $comma_follows_last_closing_token
19417                 && !$dont_align[$depth] )
19418             {
19419                 $self->set_forced_breakpoint($i)
19420                   unless ( $next_nonblank_type eq '#' );
19421                 next;
19422             }
19423
19424             # add this comma to the list..
19425             my $item_count = $item_count_stack[$depth];
19426             if ( $item_count == 0 ) {
19427
19428                 # but do not form a list with no opening structure
19429                 # for example:
19430
19431                 #            open INFILE_COPY, ">$input_file_copy"
19432                 #              or die ("very long message");
19433                 if ( ( $opening_structure_index_stack[$depth] < 0 )
19434                     && $self->is_in_block_by_i($i) )
19435                 {
19436                     $dont_align[$depth] = 1;
19437                 }
19438             } ## end if ( $item_count == 0 )
19439
19440             $comma_index[$depth][$item_count] = $i;
19441             ++$item_count_stack[$depth];
19442             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
19443                 $identifier_count_stack[$depth]++;
19444             }
19445         } ## end while ( ++$i <= $max_index_to_go)
19446
19447         #-------------------------------------------
19448         # end of loop over all tokens in this batch
19449         #-------------------------------------------
19450
19451         # set breaks for any unfinished lists ..
19452         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
19453
19454             $interrupted_list[$dd]   = 1;
19455             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
19456             $self->set_comma_breakpoints($dd);
19457             $self->set_logical_breakpoints($dd)
19458               if ( $has_old_logical_breakpoints[$dd] );
19459             $self->set_for_semicolon_breakpoints($dd);
19460
19461             # break open container...
19462             my $i_opening = $opening_structure_index_stack[$dd];
19463             if ( defined($i_opening) && $i_opening >= 0 ) {
19464                 $self->set_forced_breakpoint($i_opening)
19465                   unless (
19466                     is_unbreakable_container($dd)
19467
19468                     # Avoid a break which would place an isolated ' or "
19469                     # on a line
19470                     || (   $type eq 'Q'
19471                         && $i_opening >= $max_index_to_go - 2
19472                         && ( $token eq "'" || $token eq '"' ) )
19473                   );
19474             }
19475         } ## end for ( my $dd = $current_depth...)
19476
19477         # Return a flag indicating if the input file had some good breakpoints.
19478         # This flag will be used to force a break in a line shorter than the
19479         # allowed line length.
19480         if ( $has_old_logical_breakpoints[$current_depth] ) {
19481             $saw_good_breakpoint = 1;
19482         }
19483
19484         # A complex line with one break at an = has a good breakpoint.
19485         # This is not complex ($total_depth_variation=0):
19486         # $res1
19487         #   = 10;
19488         #
19489         # This is complex ($total_depth_variation=6):
19490         # $res2 =
19491         #  (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
19492         elsif ($i_old_assignment_break
19493             && $total_depth_variation > 4
19494             && $old_breakpoint_count == 1 )
19495         {
19496             $saw_good_breakpoint = 1;
19497         } ## end elsif ( $i_old_assignment_break...)
19498
19499         return $saw_good_breakpoint;
19500     } ## end sub break_lists
19501 } ## end closure break_lists
19502
19503 my %is_kwiZ;
19504 my %is_key_type;
19505
19506 BEGIN {
19507
19508     # Added 'w' to fix b1172
19509     my @q = qw(k w i Z ->);
19510     @is_kwiZ{@q} = (1) x scalar(@q);
19511
19512     # added = for b1211
19513     @q = qw<( [ { L R } ] ) = b>;
19514     push @q, ',';
19515     @is_key_type{@q} = (1) x scalar(@q);
19516 }
19517
19518 use constant DEBUG_FIND_START => 0;
19519
19520 sub find_token_starting_list {
19521
19522     # When testing to see if a block will fit on one line, some
19523     # previous token(s) may also need to be on the line; particularly
19524     # if this is a sub call.  So we will look back at least one
19525     # token.
19526     my ( $self, $i_opening_paren ) = @_;
19527
19528     # This will be the return index
19529     my $i_opening_minus = $i_opening_paren;
19530
19531     goto RETURN if ( $i_opening_minus <= 0 );
19532
19533     my $im1 = $i_opening_paren - 1;
19534     my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
19535     if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
19536         $iprev_nb -= 1;
19537         $type_prev_nb = $types_to_go[$iprev_nb];
19538     }
19539
19540     if ( $type_prev_nb eq ',' ) {
19541
19542         # a previous comma is a good break point
19543         # $i_opening_minus = $i_opening_paren;
19544     }
19545
19546     elsif (
19547         $tokens_to_go[$i_opening_paren] eq '('
19548
19549         # non-parens added here to fix case b1186
19550         || $is_kwiZ{$type_prev_nb}
19551       )
19552     {
19553         $i_opening_minus = $im1;
19554
19555         # Walk back to improve length estimate...
19556         # FIX for cases b1169 b1170 b1171: start walking back
19557         # at the previous nonblank. This makes the result insensitive
19558         # to the flag --space-function-paren, and similar.
19559         # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
19560         for ( my $j = $iprev_nb ; $j >= 0 ; $j-- ) {
19561             ##last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
19562             ##last if ( $is_key_type{ $types_to_go[$j] } );
19563             if ( $is_key_type{ $types_to_go[$j] } ) {
19564
19565                 # fix for b1211
19566                 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
19567                 last;
19568             }
19569             $i_opening_minus = $j;
19570         }
19571         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
19572     }
19573
19574   RETURN:
19575
19576     DEBUG_FIND_START && print <<EOM;
19577 FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
19578 EOM
19579
19580     return $i_opening_minus;
19581 }
19582
19583 {    ## begin closure set_comma_breakpoints_do
19584
19585     my %is_keyword_with_special_leading_term;
19586
19587     BEGIN {
19588
19589         # These keywords have prototypes which allow a special leading item
19590         # followed by a list
19591         my @q =
19592           qw(formline grep kill map printf sprintf push chmod join pack unshift);
19593         @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
19594     }
19595
19596     use constant DEBUG_SPARSE => 0;
19597
19598     sub set_comma_breakpoints_do {
19599
19600         # Given a list with some commas, set breakpoints at some of the
19601         # commas, if necessary, to make it easy to read.
19602
19603         my ( $self, $rinput_hash ) = @_;
19604
19605         my $depth               = $rinput_hash->{depth};
19606         my $i_opening_paren     = $rinput_hash->{i_opening_paren};
19607         my $i_closing_paren     = $rinput_hash->{i_closing_paren};
19608         my $item_count          = $rinput_hash->{item_count};
19609         my $identifier_count    = $rinput_hash->{identifier_count};
19610         my $rcomma_index        = $rinput_hash->{rcomma_index};
19611         my $next_nonblank_type  = $rinput_hash->{next_nonblank_type};
19612         my $list_type           = $rinput_hash->{list_type};
19613         my $interrupted         = $rinput_hash->{interrupted};
19614         my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
19615         my $must_break_open     = $rinput_hash->{must_break_open};
19616         my $has_broken_sublist  = $rinput_hash->{has_broken_sublist};
19617
19618         # nothing to do if no commas seen
19619         return if ( $item_count < 1 );
19620
19621         my $i_first_comma     = $rcomma_index->[0];
19622         my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
19623         my $i_last_comma      = $i_true_last_comma;
19624         if ( $i_last_comma >= $max_index_to_go ) {
19625             $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
19626             return if ( $item_count < 1 );
19627         }
19628         my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
19629
19630         #---------------------------------------------------------------
19631         # find lengths of all items in the list to calculate page layout
19632         #---------------------------------------------------------------
19633         my $comma_count = $item_count;
19634         my @item_lengths;
19635         my @i_term_begin;
19636         my @i_term_end;
19637         my @i_term_comma;
19638         my $i_prev_plus;
19639         my @max_length = ( 0, 0 );
19640         my $first_term_length;
19641         my $i      = $i_opening_paren;
19642         my $is_odd = 1;
19643
19644         foreach my $j ( 0 .. $comma_count - 1 ) {
19645             $is_odd      = 1 - $is_odd;
19646             $i_prev_plus = $i + 1;
19647             $i           = $rcomma_index->[$j];
19648
19649             my $i_term_end =
19650               ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
19651             my $i_term_begin =
19652               ( $types_to_go[$i_prev_plus] eq 'b' )
19653               ? $i_prev_plus + 1
19654               : $i_prev_plus;
19655             push @i_term_begin, $i_term_begin;
19656             push @i_term_end,   $i_term_end;
19657             push @i_term_comma, $i;
19658
19659             # note: currently adding 2 to all lengths (for comma and space)
19660             my $length =
19661               2 + token_sequence_length( $i_term_begin, $i_term_end );
19662             push @item_lengths, $length;
19663
19664             if ( $j == 0 ) {
19665                 $first_term_length = $length;
19666             }
19667             else {
19668
19669                 if ( $length > $max_length[$is_odd] ) {
19670                     $max_length[$is_odd] = $length;
19671                 }
19672             }
19673         }
19674
19675         # now we have to make a distinction between the comma count and item
19676         # count, because the item count will be one greater than the comma
19677         # count if the last item is not terminated with a comma
19678         my $i_b =
19679           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
19680           ? $i_last_comma + 1
19681           : $i_last_comma;
19682         my $i_e =
19683           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
19684           ? $i_closing_paren - 2
19685           : $i_closing_paren - 1;
19686         my $i_effective_last_comma = $i_last_comma;
19687
19688         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
19689
19690         if ( $last_item_length > 0 ) {
19691
19692             # add 2 to length because other lengths include a comma and a blank
19693             $last_item_length += 2;
19694             push @item_lengths, $last_item_length;
19695             push @i_term_begin, $i_b + 1;
19696             push @i_term_end,   $i_e;
19697             push @i_term_comma, undef;
19698
19699             my $i_odd = $item_count % 2;
19700
19701             if ( $last_item_length > $max_length[$i_odd] ) {
19702                 $max_length[$i_odd] = $last_item_length;
19703             }
19704
19705             $item_count++;
19706             $i_effective_last_comma = $i_e + 1;
19707
19708             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
19709                 $identifier_count++;
19710             }
19711         }
19712
19713         #---------------------------------------------------------------
19714         # End of length calculations
19715         #---------------------------------------------------------------
19716
19717         #---------------------------------------------------------------
19718         # Compound List Rule 1:
19719         # Break at (almost) every comma for a list containing a broken
19720         # sublist.  This has higher priority than the Interrupted List
19721         # Rule.
19722         #---------------------------------------------------------------
19723         if ($has_broken_sublist) {
19724
19725             # Break at every comma except for a comma between two
19726             # simple, small terms.  This prevents long vertical
19727             # columns of, say, just 0's.
19728             my $small_length = 10;    # 2 + actual maximum length wanted
19729
19730             # We'll insert a break in long runs of small terms to
19731             # allow alignment in uniform tables.
19732             my $skipped_count = 0;
19733             my $columns       = table_columns_available($i_first_comma);
19734             my $fields        = int( $columns / $small_length );
19735             if (   $rOpts_maximum_fields_per_table
19736                 && $fields > $rOpts_maximum_fields_per_table )
19737             {
19738                 $fields = $rOpts_maximum_fields_per_table;
19739             }
19740             my $max_skipped_count = $fields - 1;
19741
19742             my $is_simple_last_term = 0;
19743             my $is_simple_next_term = 0;
19744             foreach my $j ( 0 .. $item_count ) {
19745                 $is_simple_last_term = $is_simple_next_term;
19746                 $is_simple_next_term = 0;
19747                 if (   $j < $item_count
19748                     && $i_term_end[$j] == $i_term_begin[$j]
19749                     && $item_lengths[$j] <= $small_length )
19750                 {
19751                     $is_simple_next_term = 1;
19752                 }
19753                 next if $j == 0;
19754                 if (   $is_simple_last_term
19755                     && $is_simple_next_term
19756                     && $skipped_count < $max_skipped_count )
19757                 {
19758                     $skipped_count++;
19759                 }
19760                 else {
19761                     $skipped_count = 0;
19762                     my $i = $i_term_comma[ $j - 1 ];
19763                     last unless defined $i;
19764                     $self->set_forced_breakpoint($i);
19765                 }
19766             }
19767
19768             # always break at the last comma if this list is
19769             # interrupted; we wouldn't want to leave a terminal '{', for
19770             # example.
19771             if ($interrupted) {
19772                 $self->set_forced_breakpoint($i_true_last_comma);
19773             }
19774             return;
19775         }
19776
19777 #my ( $a, $b, $c ) = caller();
19778 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
19779 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
19780 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
19781
19782         #---------------------------------------------------------------
19783         # Interrupted List Rule:
19784         # A list is forced to use old breakpoints if it was interrupted
19785         # by side comments or blank lines, or requested by user.
19786         #---------------------------------------------------------------
19787         if (   $rOpts_break_at_old_comma_breakpoints
19788             || $interrupted
19789             || $i_opening_paren < 0 )
19790         {
19791             $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
19792             return;
19793         }
19794
19795         #---------------------------------------------------------------
19796         # Looks like a list of items.  We have to look at it and size it up.
19797         #---------------------------------------------------------------
19798
19799         my $opening_token       = $tokens_to_go[$i_opening_paren];
19800         my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
19801
19802         #-------------------------------------------------------------------
19803         # Return if this will fit on one line
19804         #-------------------------------------------------------------------
19805
19806         # The -bbxi=2 parameters can add an extra hidden level of indentation;
19807         # this needs a tolerance to avoid instability.  Fixes b1259, 1260.
19808         my $tol = 0;
19809         if (   $break_before_container_types{$opening_token}
19810             && $container_indentation_options{$opening_token}
19811             && $container_indentation_options{$opening_token} == 2 )
19812         {
19813             $tol = $rOpts_indent_columns;
19814         }
19815
19816         my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
19817         return
19818           unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
19819           + $tol > 0;
19820
19821         #-------------------------------------------------------------------
19822         # Now we know that this block spans multiple lines; we have to set
19823         # at least one breakpoint -- real or fake -- as a signal to break
19824         # open any outer containers.
19825         #-------------------------------------------------------------------
19826         set_fake_breakpoint();
19827
19828         # be sure we do not extend beyond the current list length
19829         if ( $i_effective_last_comma >= $max_index_to_go ) {
19830             $i_effective_last_comma = $max_index_to_go - 1;
19831         }
19832
19833         # Set a flag indicating if we need to break open to keep -lp
19834         # items aligned.  This is necessary if any of the list terms
19835         # exceeds the available space after the '('.
19836         my $need_lp_break_open = $must_break_open;
19837         if ( $is_lp_formatting && !$must_break_open ) {
19838             my $columns_if_unbroken =
19839               $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
19840               - total_line_length( $i_opening_minus, $i_opening_paren );
19841             $need_lp_break_open =
19842                  ( $max_length[0] > $columns_if_unbroken )
19843               || ( $max_length[1] > $columns_if_unbroken )
19844               || ( $first_term_length > $columns_if_unbroken );
19845         }
19846
19847         # Specify if the list must have an even number of fields or not.
19848         # It is generally safest to assume an even number, because the
19849         # list items might be a hash list.  But if we can be sure that
19850         # it is not a hash, then we can allow an odd number for more
19851         # flexibility.
19852         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
19853
19854         if (   $identifier_count >= $item_count - 1
19855             || $is_assignment{$next_nonblank_type}
19856             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
19857           )
19858         {
19859             $odd_or_even = 1;
19860         }
19861
19862         # do we have a long first term which should be
19863         # left on a line by itself?
19864         my $use_separate_first_term = (
19865             $odd_or_even == 1           # only if we can use 1 field/line
19866               && $item_count > 3        # need several items
19867               && $first_term_length >
19868               2 * $max_length[0] - 2    # need long first term
19869               && $first_term_length >
19870               2 * $max_length[1] - 2    # need long first term
19871         );
19872
19873         # or do we know from the type of list that the first term should
19874         # be placed alone?
19875         if ( !$use_separate_first_term ) {
19876             if ( $is_keyword_with_special_leading_term{$list_type} ) {
19877                 $use_separate_first_term = 1;
19878
19879                 # should the container be broken open?
19880                 if ( $item_count < 3 ) {
19881                     if ( $i_first_comma - $i_opening_paren < 4 ) {
19882                         ${$rdo_not_break_apart} = 1;
19883                     }
19884                 }
19885                 elsif ($first_term_length < 20
19886                     && $i_first_comma - $i_opening_paren < 4 )
19887                 {
19888                     my $columns = table_columns_available($i_first_comma);
19889                     if ( $first_term_length < $columns ) {
19890                         ${$rdo_not_break_apart} = 1;
19891                     }
19892                 }
19893             }
19894         }
19895
19896         # if so,
19897         if ($use_separate_first_term) {
19898
19899             # ..set a break and update starting values
19900             $use_separate_first_term = 1;
19901             $self->set_forced_breakpoint($i_first_comma);
19902             $i_opening_paren = $i_first_comma;
19903             $i_first_comma   = $rcomma_index->[1];
19904             $item_count--;
19905             return if $comma_count == 1;
19906             shift @item_lengths;
19907             shift @i_term_begin;
19908             shift @i_term_end;
19909             shift @i_term_comma;
19910         }
19911
19912         # if not, update the metrics to include the first term
19913         else {
19914             if ( $first_term_length > $max_length[0] ) {
19915                 $max_length[0] = $first_term_length;
19916             }
19917         }
19918
19919         # Field width parameters
19920         my $pair_width = ( $max_length[0] + $max_length[1] );
19921         my $max_width =
19922           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
19923
19924         # Number of free columns across the page width for laying out tables
19925         my $columns = table_columns_available($i_first_comma);
19926
19927         # Patch for b1210 and b1216-b1218 when -vmll is set.  If we are unable
19928         # to break after an opening paren, then the maximum line length for the
19929         # first line could be less than the later lines.  So we need to reduce
19930         # the line length.  Normally, we will get a break after an opening
19931         # paren, but in some cases we might not.
19932         if (   $rOpts_variable_maximum_line_length
19933             && $tokens_to_go[$i_opening_paren] eq '('
19934             && @i_term_begin )
19935           ##&& !$old_breakpoint_to_go[$i_opening_paren] )  ## in b1210 patch
19936         {
19937             my $ib   = $i_term_begin[0];
19938             my $type = $types_to_go[$ib];
19939
19940             # So far, the only known instance of this problem is when
19941             # a bareword follows an opening paren with -vmll
19942             if ( $type eq 'w' ) {
19943
19944                 # If a line starts with paren+space+terms, then its max length
19945                 # could be up to ci+2-i spaces less than if the term went out
19946                 # on a line after the paren.  So..
19947                 my $tol = max( 0,
19948                     2 + $rOpts_continuation_indentation -
19949                       $rOpts_indent_columns );
19950                 $columns = max( 0, $columns - $tol );
19951
19952                 ## Here is the original b1210 fix, but it failed on b1216-b1218
19953                 ##my $columns2 = table_columns_available($i_opening_paren);
19954                 ##$columns = min( $columns, $columns2 );
19955             }
19956         }
19957
19958         # Estimated maximum number of fields which fit this space
19959         # This will be our first guess
19960         my $number_of_fields_max =
19961           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
19962             $pair_width );
19963         my $number_of_fields = $number_of_fields_max;
19964
19965         # Find the best-looking number of fields
19966         # and make this our second guess if possible
19967         my ( $number_of_fields_best, $ri_ragged_break_list,
19968             $new_identifier_count )
19969           = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
19970             \@item_lengths, $max_width );
19971
19972         if (   $number_of_fields_best != 0
19973             && $number_of_fields_best < $number_of_fields_max )
19974         {
19975             $number_of_fields = $number_of_fields_best;
19976         }
19977
19978         # ----------------------------------------------------------------------
19979         # If we are crowded and the -lp option is being used, try to
19980         # undo some indentation
19981         # ----------------------------------------------------------------------
19982         if (
19983             $is_lp_formatting
19984             && (
19985                 $number_of_fields == 0
19986                 || (   $number_of_fields == 1
19987                     && $number_of_fields != $number_of_fields_best )
19988             )
19989           )
19990         {
19991             my $available_spaces =
19992               $self->get_available_spaces_to_go($i_first_comma);
19993             if ( $available_spaces > 0 ) {
19994
19995                 my $spaces_wanted = $max_width - $columns;    # for 1 field
19996
19997                 if ( $number_of_fields_best == 0 ) {
19998                     $number_of_fields_best =
19999                       get_maximum_fields_wanted( \@item_lengths );
20000                 }
20001
20002                 if ( $number_of_fields_best != 1 ) {
20003                     my $spaces_wanted_2 =
20004                       1 + $pair_width - $columns;    # for 2 fields
20005                     if ( $available_spaces > $spaces_wanted_2 ) {
20006                         $spaces_wanted = $spaces_wanted_2;
20007                     }
20008                 }
20009
20010                 if ( $spaces_wanted > 0 ) {
20011                     my $deleted_spaces =
20012                       $self->reduce_lp_indentation( $i_first_comma,
20013                         $spaces_wanted );
20014
20015                     # redo the math
20016                     if ( $deleted_spaces > 0 ) {
20017                         $columns = table_columns_available($i_first_comma);
20018                         $number_of_fields_max =
20019                           maximum_number_of_fields( $columns, $odd_or_even,
20020                             $max_width, $pair_width );
20021                         $number_of_fields = $number_of_fields_max;
20022
20023                         if (   $number_of_fields_best == 1
20024                             && $number_of_fields >= 1 )
20025                         {
20026                             $number_of_fields = $number_of_fields_best;
20027                         }
20028                     }
20029                 }
20030             }
20031         }
20032
20033         # try for one column if two won't work
20034         if ( $number_of_fields <= 0 ) {
20035             $number_of_fields = int( $columns / $max_width );
20036         }
20037
20038         # The user can place an upper bound on the number of fields,
20039         # which can be useful for doing maintenance on tables
20040         if (   $rOpts_maximum_fields_per_table
20041             && $number_of_fields > $rOpts_maximum_fields_per_table )
20042         {
20043             $number_of_fields = $rOpts_maximum_fields_per_table;
20044         }
20045
20046         # How many columns (characters) and lines would this container take
20047         # if no additional whitespace were added?
20048         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
20049             $i_effective_last_comma + 1 );
20050         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
20051         my $packed_lines = 1 + int( $packed_columns / $columns );
20052
20053         # are we an item contained in an outer list?
20054         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
20055
20056         if ( $number_of_fields <= 0 ) {
20057
20058 #         #---------------------------------------------------------------
20059 #         # We're in trouble.  We can't find a single field width that works.
20060 #         # There is no simple answer here; we may have a single long list
20061 #         # item, or many.
20062 #         #---------------------------------------------------------------
20063 #
20064 #         In many cases, it may be best to not force a break if there is just one
20065 #         comma, because the standard continuation break logic will do a better
20066 #         job without it.
20067 #
20068 #         In the common case that all but one of the terms can fit
20069 #         on a single line, it may look better not to break open the
20070 #         containing parens.  Consider, for example
20071 #
20072 #             $color =
20073 #               join ( '/',
20074 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
20075 #                 keys %colors );
20076 #
20077 #         which will look like this with the container broken:
20078 #
20079 #             $color = join (
20080 #                 '/',
20081 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
20082 #             );
20083 #
20084 #         Here is an example of this rule for a long last term:
20085 #
20086 #             log_message( 0, 256, 128,
20087 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
20088 #
20089 #         And here is an example with a long first term:
20090 #
20091 #         $s = sprintf(
20092 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
20093 #             $r, $pu, $ps, $cu, $cs, $tt
20094 #           )
20095 #           if $style eq 'all';
20096
20097             my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
20098             my $long_last_term =
20099               $self->excess_line_length( 0, $i_last_comma ) <= 0;
20100             my $long_first_term =
20101               $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
20102               <= 0;
20103
20104             # break at every comma ...
20105             if (
20106
20107                 # if requested by user or is best looking
20108                 $number_of_fields_best == 1
20109
20110                 # or if this is a sublist of a larger list
20111                 || $in_hierarchical_list
20112
20113                 # or if multiple commas and we don't have a long first or last
20114                 # term
20115                 || ( $comma_count > 1
20116                     && !( $long_last_term || $long_first_term ) )
20117               )
20118             {
20119                 foreach ( 0 .. $comma_count - 1 ) {
20120                     $self->set_forced_breakpoint( $rcomma_index->[$_] );
20121                 }
20122             }
20123             elsif ($long_last_term) {
20124
20125                 $self->set_forced_breakpoint($i_last_comma);
20126                 ${$rdo_not_break_apart} = 1 unless $must_break_open;
20127             }
20128             elsif ($long_first_term) {
20129
20130                 $self->set_forced_breakpoint($i_first_comma);
20131             }
20132             else {
20133
20134                 # let breaks be defined by default bond strength logic
20135             }
20136             return;
20137         }
20138
20139         # --------------------------------------------------------
20140         # We have a tentative field count that seems to work.
20141         # How many lines will this require?
20142         # --------------------------------------------------------
20143         my $formatted_lines = $item_count / ($number_of_fields);
20144         if ( $formatted_lines != int $formatted_lines ) {
20145             $formatted_lines = 1 + int $formatted_lines;
20146         }
20147
20148         # So far we've been trying to fill out to the right margin.  But
20149         # compact tables are easier to read, so let's see if we can use fewer
20150         # fields without increasing the number of lines.
20151         $number_of_fields =
20152           compactify_table( $item_count, $number_of_fields, $formatted_lines,
20153             $odd_or_even );
20154
20155         # How many spaces across the page will we fill?
20156         my $columns_per_line =
20157           ( int $number_of_fields / 2 ) * $pair_width +
20158           ( $number_of_fields % 2 ) * $max_width;
20159
20160         my $formatted_columns;
20161
20162         if ( $number_of_fields > 1 ) {
20163             $formatted_columns =
20164               ( $pair_width * ( int( $item_count / 2 ) ) +
20165                   ( $item_count % 2 ) * $max_width );
20166         }
20167         else {
20168             $formatted_columns = $max_width * $item_count;
20169         }
20170         if ( $formatted_columns < $packed_columns ) {
20171             $formatted_columns = $packed_columns;
20172         }
20173
20174         my $unused_columns = $formatted_columns - $packed_columns;
20175
20176         # set some empirical parameters to help decide if we should try to
20177         # align; high sparsity does not look good, especially with few lines
20178         my $sparsity = ($unused_columns) / ($formatted_columns);
20179         my $max_allowed_sparsity =
20180             ( $item_count < 3 )    ? 0.1
20181           : ( $packed_lines == 1 ) ? 0.15
20182           : ( $packed_lines == 2 ) ? 0.4
20183           :                          0.7;
20184
20185         my $two_line_word_wrap_ok;
20186         if ( $opening_token eq '(' ) {
20187
20188             # default is to allow wrapping of short paren lists
20189             $two_line_word_wrap_ok = 1;
20190
20191             # but turn off word wrap where requested
20192             if ($rOpts_break_open_paren_list) {
20193
20194                 # This parameter is a one-character flag, as follows:
20195                 #  '0' matches no parens  -> break open NOT OK -> word wrap OK
20196                 #  '1' matches all parens -> break open OK -> word wrap NOT OK
20197                 #  Other values are the same as used by the weld-exclusion-list
20198                 my $flag = $rOpts_break_open_paren_list;
20199                 if (   $flag eq '*'
20200                     || $flag eq '1' )
20201                 {
20202                     $two_line_word_wrap_ok = 0;
20203                 }
20204                 elsif ( $flag eq '0' ) {
20205                     $two_line_word_wrap_ok = 1;
20206                 }
20207                 else {
20208                     my $KK = $K_to_go[$i_opening_paren];
20209                     $two_line_word_wrap_ok =
20210                       !$self->match_paren_flag( $KK, $flag );
20211                 }
20212             }
20213         }
20214
20215         # Begin check for shortcut methods, which avoid treating a list
20216         # as a table for relatively small parenthesized lists.  These
20217         # are usually easier to read if not formatted as tables.
20218         if (
20219             $packed_lines <= 2           # probably can fit in 2 lines
20220             && $item_count < 9           # doesn't have too many items
20221             && $opening_is_in_block      # not a sub-container
20222             && $two_line_word_wrap_ok    # ok to wrap this paren list
20223             ##&& $opening_token eq '('    # is paren list
20224           )
20225         {
20226
20227             # Shortcut method 1: for -lp and just one comma:
20228             # This is a no-brainer, just break at the comma.
20229             if (
20230                 $is_lp_formatting      # -lp
20231                 && $item_count == 2    # two items, one comma
20232                 && !$must_break_open
20233               )
20234             {
20235                 my $i_break = $rcomma_index->[0];
20236                 $self->set_forced_breakpoint($i_break);
20237                 ${$rdo_not_break_apart} = 1;
20238                 return;
20239
20240             }
20241
20242             # method 2 is for most small ragged lists which might look
20243             # best if not displayed as a table.
20244             if (
20245                 ( $number_of_fields == 2 && $item_count == 3 )
20246                 || (
20247                     $new_identifier_count > 0    # isn't all quotes
20248                     && $sparsity > 0.15
20249                 )    # would be fairly spaced gaps if aligned
20250               )
20251             {
20252
20253                 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20254                     $ri_ragged_break_list );
20255                 ++$break_count if ($use_separate_first_term);
20256
20257                 # NOTE: we should really use the true break count here,
20258                 # which can be greater if there are large terms and
20259                 # little space, but usually this will work well enough.
20260                 unless ($must_break_open) {
20261
20262                     if ( $break_count <= 1 ) {
20263                         ${$rdo_not_break_apart} = 1;
20264                     }
20265                     elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20266                         ${$rdo_not_break_apart} = 1;
20267                     }
20268                 }
20269                 return;
20270             }
20271
20272         } ## end shortcut methods
20273
20274         # debug stuff
20275         DEBUG_SPARSE && do {
20276             print STDOUT
20277 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
20278
20279         };
20280
20281         #---------------------------------------------------------------
20282         # Compound List Rule 2:
20283         # If this list is too long for one line, and it is an item of a
20284         # larger list, then we must format it, regardless of sparsity
20285         # (ian.t).  One reason that we have to do this is to trigger
20286         # Compound List Rule 1, above, which causes breaks at all commas of
20287         # all outer lists.  In this way, the structure will be properly
20288         # displayed.
20289         #---------------------------------------------------------------
20290
20291         # Decide if this list is too long for one line unless broken
20292         my $total_columns = table_columns_available($i_opening_paren);
20293         my $too_long      = $packed_columns > $total_columns;
20294
20295         # For a paren list, include the length of the token just before the
20296         # '(' because this is likely a sub call, and we would have to
20297         # include the sub name on the same line as the list.  This is still
20298         # imprecise, but not too bad.  (steve.t)
20299         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
20300
20301             $too_long = $self->excess_line_length( $i_opening_minus,
20302                 $i_effective_last_comma + 1 ) > 0;
20303         }
20304
20305         # FIXME: For an item after a '=>', try to include the length of the
20306         # thing before the '=>'.  This is crude and should be improved by
20307         # actually looking back token by token.
20308         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
20309             my $i_opening_minus = $i_opening_paren - 4;
20310             if ( $i_opening_minus >= 0 ) {
20311                 $too_long = $self->excess_line_length( $i_opening_minus,
20312                     $i_effective_last_comma + 1 ) > 0;
20313             }
20314         }
20315
20316         # Always break lists contained in '[' and '{' if too long for 1 line,
20317         # and always break lists which are too long and part of a more complex
20318         # structure.
20319         my $must_break_open_container = $must_break_open
20320           || ( $too_long
20321             && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
20322
20323 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
20324
20325         #---------------------------------------------------------------
20326         # The main decision:
20327         # Now decide if we will align the data into aligned columns.  Do not
20328         # attempt to align columns if this is a tiny table or it would be
20329         # too spaced.  It seems that the more packed lines we have, the
20330         # sparser the list that can be allowed and still look ok.
20331         #---------------------------------------------------------------
20332
20333         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
20334             || ( $formatted_lines < 2 )
20335             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
20336           )
20337         {
20338
20339             #---------------------------------------------------------------
20340             # too sparse: would look ugly if aligned in a table;
20341             #---------------------------------------------------------------
20342
20343             # use old breakpoints if this is a 'big' list
20344             if ( $packed_lines > 2 && $item_count > 10 ) {
20345                 write_logfile_entry("List sparse: using old breakpoints\n");
20346                 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
20347             }
20348
20349             # let the continuation logic handle it if 2 lines
20350             else {
20351
20352                 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20353                     $ri_ragged_break_list );
20354                 ++$break_count if ($use_separate_first_term);
20355
20356                 unless ($must_break_open_container) {
20357                     if ( $break_count <= 1 ) {
20358                         ${$rdo_not_break_apart} = 1;
20359                     }
20360                     elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20361                         ${$rdo_not_break_apart} = 1;
20362                     }
20363                 }
20364             }
20365             return;
20366         }
20367
20368         #---------------------------------------------------------------
20369         # go ahead and format as a table
20370         #---------------------------------------------------------------
20371         write_logfile_entry(
20372             "List: auto formatting with $number_of_fields fields/row\n");
20373
20374         my $j_first_break =
20375           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
20376
20377         for (
20378             my $j = $j_first_break ;
20379             $j < $comma_count ;
20380             $j += $number_of_fields
20381           )
20382         {
20383             my $i = $rcomma_index->[$j];
20384             $self->set_forced_breakpoint($i);
20385         }
20386         return;
20387     }
20388 } ## end closure set_comma_breakpoints_do
20389
20390 sub study_list_complexity {
20391
20392     # Look for complex tables which should be formatted with one term per line.
20393     # Returns the following:
20394     #
20395     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
20396     #    which are hard to read
20397     #  $number_of_fields_best = suggested number of fields based on
20398     #    complexity; = 0 if any number may be used.
20399     #
20400     my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
20401     my $item_count            = @{$ri_term_begin};
20402     my $complex_item_count    = 0;
20403     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
20404     my $i_max                 = @{$ritem_lengths} - 1;
20405     ##my @item_complexity;
20406
20407     my $i_last_last_break = -3;
20408     my $i_last_break      = -2;
20409     my @i_ragged_break_list;
20410
20411     my $definitely_complex = 30;
20412     my $definitely_simple  = 12;
20413     my $quote_count        = 0;
20414
20415     for my $i ( 0 .. $i_max ) {
20416         my $ib = $ri_term_begin->[$i];
20417         my $ie = $ri_term_end->[$i];
20418
20419         # define complexity: start with the actual term length
20420         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
20421
20422         ##TBD: join types here and check for variations
20423         ##my $str=join "", @tokens_to_go[$ib..$ie];
20424
20425         my $is_quote = 0;
20426         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
20427             $is_quote = 1;
20428             $quote_count++;
20429         }
20430         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
20431             $quote_count++;
20432         }
20433
20434         if ( $ib eq $ie ) {
20435             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
20436                 $complex_item_count++;
20437                 $weighted_length *= 2;
20438             }
20439             else {
20440             }
20441         }
20442         else {
20443             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
20444                 $complex_item_count++;
20445                 $weighted_length *= 2;
20446             }
20447             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
20448                 $weighted_length += 4;
20449             }
20450         }
20451
20452         # add weight for extra tokens.
20453         $weighted_length += 2 * ( $ie - $ib );
20454
20455 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
20456 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
20457
20458 ##push @item_complexity, $weighted_length;
20459
20460         # now mark a ragged break after this item it if it is 'long and
20461         # complex':
20462         if ( $weighted_length >= $definitely_complex ) {
20463
20464             # if we broke after the previous term
20465             # then break before it too
20466             if (   $i_last_break == $i - 1
20467                 && $i > 1
20468                 && $i_last_last_break != $i - 2 )
20469             {
20470
20471                 ## FIXME: don't strand a small term
20472                 pop @i_ragged_break_list;
20473                 push @i_ragged_break_list, $i - 2;
20474                 push @i_ragged_break_list, $i - 1;
20475             }
20476
20477             push @i_ragged_break_list, $i;
20478             $i_last_last_break = $i_last_break;
20479             $i_last_break      = $i;
20480         }
20481
20482         # don't break before a small last term -- it will
20483         # not look good on a line by itself.
20484         elsif ($i == $i_max
20485             && $i_last_break == $i - 1
20486             && $weighted_length <= $definitely_simple )
20487         {
20488             pop @i_ragged_break_list;
20489         }
20490     }
20491
20492     my $identifier_count = $i_max + 1 - $quote_count;
20493
20494     # Need more tuning here..
20495     if (   $max_width > 12
20496         && $complex_item_count > $item_count / 2
20497         && $number_of_fields_best != 2 )
20498     {
20499         $number_of_fields_best = 1;
20500     }
20501
20502     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
20503 }
20504
20505 sub get_maximum_fields_wanted {
20506
20507     # Not all tables look good with more than one field of items.
20508     # This routine looks at a table and decides if it should be
20509     # formatted with just one field or not.
20510     # This coding is still under development.
20511     my ($ritem_lengths) = @_;
20512
20513     my $number_of_fields_best = 0;
20514
20515     # For just a few items, we tentatively assume just 1 field.
20516     my $item_count = @{$ritem_lengths};
20517     if ( $item_count <= 5 ) {
20518         $number_of_fields_best = 1;
20519     }
20520
20521     # For larger tables, look at it both ways and see what looks best
20522     else {
20523
20524         my $is_odd            = 1;
20525         my @max_length        = ( 0,     0 );
20526         my @last_length_2     = ( undef, undef );
20527         my @first_length_2    = ( undef, undef );
20528         my $last_length       = undef;
20529         my $total_variation_1 = 0;
20530         my $total_variation_2 = 0;
20531         my @total_variation_2 = ( 0, 0 );
20532
20533         foreach my $j ( 0 .. $item_count - 1 ) {
20534
20535             $is_odd = 1 - $is_odd;
20536             my $length = $ritem_lengths->[$j];
20537             if ( $length > $max_length[$is_odd] ) {
20538                 $max_length[$is_odd] = $length;
20539             }
20540
20541             if ( defined($last_length) ) {
20542                 my $dl = abs( $length - $last_length );
20543                 $total_variation_1 += $dl;
20544             }
20545             $last_length = $length;
20546
20547             my $ll = $last_length_2[$is_odd];
20548             if ( defined($ll) ) {
20549                 my $dl = abs( $length - $ll );
20550                 $total_variation_2[$is_odd] += $dl;
20551             }
20552             else {
20553                 $first_length_2[$is_odd] = $length;
20554             }
20555             $last_length_2[$is_odd] = $length;
20556         }
20557         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
20558
20559         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
20560         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
20561             $number_of_fields_best = 1;
20562         }
20563     }
20564     return ($number_of_fields_best);
20565 }
20566
20567 sub table_columns_available {
20568     my $i_first_comma = shift;
20569     my $columns =
20570       $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
20571       leading_spaces_to_go($i_first_comma);
20572
20573     # Patch: the vertical formatter does not line up lines whose lengths
20574     # exactly equal the available line length because of allowances
20575     # that must be made for side comments.  Therefore, the number of
20576     # available columns is reduced by 1 character.
20577     $columns -= 1;
20578     return $columns;
20579 }
20580
20581 sub maximum_number_of_fields {
20582
20583     # how many fields will fit in the available space?
20584     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
20585     my $max_pairs        = int( $columns / $pair_width );
20586     my $number_of_fields = $max_pairs * 2;
20587     if (   $odd_or_even == 1
20588         && $max_pairs * $pair_width + $max_width <= $columns )
20589     {
20590         $number_of_fields++;
20591     }
20592     return $number_of_fields;
20593 }
20594
20595 sub compactify_table {
20596
20597     # given a table with a certain number of fields and a certain number
20598     # of lines, see if reducing the number of fields will make it look
20599     # better.
20600     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
20601     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
20602         my $min_fields;
20603
20604         for (
20605             $min_fields = $number_of_fields ;
20606             $min_fields >= $odd_or_even
20607             && $min_fields * $formatted_lines >= $item_count ;
20608             $min_fields -= $odd_or_even
20609           )
20610         {
20611             $number_of_fields = $min_fields;
20612         }
20613     }
20614     return $number_of_fields;
20615 }
20616
20617 sub set_ragged_breakpoints {
20618
20619     # Set breakpoints in a list that cannot be formatted nicely as a
20620     # table.
20621     my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
20622
20623     my $break_count = 0;
20624     foreach ( @{$ri_ragged_break_list} ) {
20625         my $j = $ri_term_comma->[$_];
20626         if ($j) {
20627             $self->set_forced_breakpoint($j);
20628             $break_count++;
20629         }
20630     }
20631     return $break_count;
20632 }
20633
20634 sub copy_old_breakpoints {
20635     my ( $self, $i_first_comma, $i_last_comma ) = @_;
20636     for my $i ( $i_first_comma .. $i_last_comma ) {
20637         if ( $old_breakpoint_to_go[$i] ) {
20638             $self->set_forced_breakpoint($i);
20639         }
20640     }
20641     return;
20642 }
20643
20644 sub set_nobreaks {
20645     my ( $self, $i, $j ) = @_;
20646     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
20647
20648         0 && do {
20649             my ( $a, $b, $c ) = caller();
20650             my $forced_breakpoint_count = get_forced_breakpoint_count();
20651             print STDOUT
20652 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
20653         };
20654
20655         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
20656     }
20657
20658     # shouldn't happen; non-critical error
20659     else {
20660         0 && do {
20661             my ( $a, $b, $c ) = caller();
20662             print STDOUT
20663               "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
20664         };
20665     }
20666     return;
20667 }
20668
20669 ###############################################
20670 # CODE SECTION 12: Code for setting indentation
20671 ###############################################
20672
20673 sub token_sequence_length {
20674
20675     # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
20676     # returns 0 if $ibeg > $iend (shouldn't happen)
20677     my ( $ibeg, $iend ) = @_;
20678     return 0 if ( !defined($iend) || $iend < 0 || $ibeg > $iend );
20679     return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
20680     return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
20681 }
20682
20683 sub total_line_length {
20684
20685     # return length of a line of tokens ($ibeg .. $iend)
20686     my ( $ibeg, $iend ) = @_;
20687
20688     # original coding:
20689     #return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
20690
20691     # this is basically sub 'leading_spaces_to_go':
20692     my $indentation = $leading_spaces_to_go[$ibeg];
20693     if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
20694
20695     return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
20696       $summed_lengths_to_go[$ibeg];
20697 }
20698
20699 sub excess_line_length {
20700
20701     # return number of characters by which a line of tokens ($ibeg..$iend)
20702     # exceeds the allowable line length.
20703
20704     # NOTE: Profiling shows that this is a critical routine for efficiency.
20705     # Therefore I have eliminated additional calls to subs from it.
20706     my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
20707
20708     # Original expression for line length
20709     ##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
20710
20711     # This is basically sub 'leading_spaces_to_go':
20712     my $indentation = $leading_spaces_to_go[$ibeg];
20713     if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
20714
20715     my $length =
20716       $indentation +
20717       $summed_lengths_to_go[ $iend + 1 ] -
20718       $summed_lengths_to_go[$ibeg];
20719
20720     # Include right weld lengths unless requested not to.
20721     if (   $total_weld_count
20722         && !$ignore_right_weld
20723         && $type_sequence_to_go[$iend] )
20724     {
20725         my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
20726         $length += $wr if defined($wr);
20727     }
20728
20729     # return the excess
20730     return $length - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
20731 }
20732
20733 sub get_spaces {
20734
20735     # return the number of leading spaces associated with an indentation
20736     # variable $indentation is either a constant number of spaces or an object
20737     # with a get_spaces method.
20738     my $indentation = shift;
20739     return ref($indentation) ? $indentation->get_spaces() : $indentation;
20740 }
20741
20742 sub get_recoverable_spaces {
20743
20744     # return the number of spaces (+ means shift right, - means shift left)
20745     # that we would like to shift a group of lines with the same indentation
20746     # to get them to line up with their opening parens
20747     my $indentation = shift;
20748     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
20749 }
20750
20751 sub get_available_spaces_to_go {
20752
20753     my ( $self, $ii ) = @_;
20754     my $item = $leading_spaces_to_go[$ii];
20755
20756     # return the number of available leading spaces associated with an
20757     # indentation variable.  $indentation is either a constant number of
20758     # spaces or an object with a get_available_spaces method.
20759     return ref($item) ? $item->get_available_spaces() : 0;
20760 }
20761
20762 {    ## begin closure set_lp_indentation
20763
20764     use constant DEBUG_LP => 0;
20765
20766     # Stack of -lp index objects which survives between batches.
20767     my $rLP;
20768     my $max_lp_stack;
20769
20770     # The predicted position of the next opening container which may start
20771     # an -lp indentation level.  This survives between batches.
20772     my $lp_position_predictor;
20773
20774     # A level at which the lp format becomes too highly stressed to continue
20775     my $lp_cutoff_level;
20776
20777     BEGIN {
20778
20779         # Index names for the -lp stack variables.
20780         # Do not combine with other BEGIN blocks (c101).
20781
20782         my $i = 0;
20783         use constant {
20784             _lp_ci_level_        => $i++,
20785             _lp_level_           => $i++,
20786             _lp_object_          => $i++,
20787             _lp_container_seqno_ => $i++,
20788             _lp_space_count_     => $i++,
20789         };
20790     }
20791
20792     sub initialize_lp_vars {
20793
20794         # initialize gnu variables for a new file;
20795         # must be called once at the start of a new file.
20796
20797         $lp_position_predictor = 0;
20798         $max_lp_stack          = 0;
20799         $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 );
20800
20801         # we can turn off -lp if all levels will be at or above the cutoff
20802         if ( $lp_cutoff_level <= 1 ) {
20803             $rOpts_line_up_parentheses          = 0;
20804             $rOpts_extended_line_up_parentheses = 0;
20805         }
20806
20807         $rLP = [];
20808
20809         # initialize the leading whitespace stack to negative levels
20810         # so that we can never run off the end of the stack
20811         $rLP->[$max_lp_stack]->[_lp_ci_level_]        = -1;
20812         $rLP->[$max_lp_stack]->[_lp_level_]           = -1;
20813         $rLP->[$max_lp_stack]->[_lp_object_]          = undef;
20814         $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
20815         $rLP->[$max_lp_stack]->[_lp_space_count_]     = 0;
20816
20817         return;
20818     }
20819
20820     # hashes for efficient testing
20821     my %hash_test1;
20822     my %hash_test2;
20823     my %hash_test3;
20824
20825     BEGIN {
20826         my @q = qw< } ) ] >;
20827         @hash_test1{@q} = (1) x scalar(@q);
20828         @q = qw(: ? f);
20829         push @q, ',';
20830         @hash_test2{@q} = (1) x scalar(@q);
20831         @q              = qw( . || && );
20832         @hash_test3{@q} = (1) x scalar(@q);
20833     }
20834
20835     sub set_lp_indentation {
20836
20837         #------------------------------------------------------------------
20838         # Define the leading whitespace for all tokens in the current batch
20839         # when the -lp formatting is selected.
20840         #------------------------------------------------------------------
20841
20842         my ($self) = @_;
20843
20844         return unless ($rOpts_line_up_parentheses);
20845         return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
20846
20847         # List of -lp indentation objects created in this batch
20848         my $rlp_object_list    = [];
20849         my $max_lp_object_list = UNDEFINED_INDEX;
20850
20851         my %last_lp_equals;
20852         my %lp_comma_count;
20853         my %lp_arrow_count;
20854         my $ii_begin_line = 0;
20855
20856         my $rLL                       = $self->[_rLL_];
20857         my $Klimit                    = $self->[_Klimit_];
20858         my $rbreak_container          = $self->[_rbreak_container_];
20859         my $rshort_nested             = $self->[_rshort_nested_];
20860         my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
20861         my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
20862         my $starting_in_quote   = $self->[_this_batch_]->[_starting_in_quote_];
20863         my $K_opening_container = $self->[_K_opening_container_];    ##TESTING
20864         my $K_closing_container = $self->[_K_closing_container_];
20865         my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_];
20866         my $radjusted_levels    = $self->[_radjusted_levels_];
20867         my $rbreak_before_container_by_seqno =
20868           $self->[_rbreak_before_container_by_seqno_];
20869         my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
20870
20871         my $nws  = @{$radjusted_levels};
20872         my $imin = 0;
20873
20874         # The 'starting_in_quote' flag means that the first token is the first
20875         # token of a line and it is also the continuation of some kind of
20876         # multi-line quote or pattern.  It must have no added leading
20877         # whitespace, so we can skip it.
20878         if ($starting_in_quote) {
20879             $imin += 1;
20880         }
20881
20882         my $K_last_nonblank;
20883         my $Kpnb = $K_to_go[0] - 1;
20884         if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
20885             $Kpnb -= 1;
20886         }
20887         if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
20888             $K_last_nonblank = $Kpnb;
20889         }
20890
20891         my $last_nonblank_token     = '';
20892         my $last_nonblank_type      = '';
20893         my $last_last_nonblank_type = '';
20894
20895         if ( defined($K_last_nonblank) ) {
20896             $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
20897             $last_nonblank_type  = $rLL->[$K_last_nonblank]->[_TYPE_];
20898         }
20899
20900         my ( $space_count, $current_level, $current_ci_level, $in_lp_mode );
20901         my $stack_changed = 1;
20902
20903         #-----------------------------------
20904         # Loop over all tokens in this batch
20905         #-----------------------------------
20906         foreach my $ii ( $imin .. $max_index_to_go ) {
20907
20908             my $KK          = $K_to_go[$ii];
20909             my $type        = $types_to_go[$ii];
20910             my $token       = $tokens_to_go[$ii];
20911             my $level       = $levels_to_go[$ii];
20912             my $ci_level    = $ci_levels_to_go[$ii];
20913             my $total_depth = $nesting_depth_to_go[$ii];
20914
20915             #--------------------------------------------------
20916             # Adjust levels if necessary to recycle whitespace:
20917             #--------------------------------------------------
20918             if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
20919             {
20920                 $level = $radjusted_levels->[$KK];
20921                 if ( $level < 0 ) { $level = 0 }  # note: this should not happen
20922             }
20923
20924             # get the top state from the stack if it has changed
20925             if ($stack_changed) {
20926                 my $rLP_top   = $rLP->[$max_lp_stack];
20927                 my $lp_object = $rLP_top->[_lp_object_];
20928                 if ($lp_object) {
20929                     ( $space_count, $current_level, $current_ci_level ) =
20930                       @{ $lp_object->get_spaces_level_ci() };
20931                 }
20932                 else {
20933                     $current_ci_level = $rLP_top->[_lp_ci_level_];
20934                     $current_level    = $rLP_top->[_lp_level_];
20935                     $space_count      = $rLP_top->[_lp_space_count_];
20936                 }
20937                 $stack_changed = 0;
20938             }
20939
20940             #------------------------------
20941             # update the position predictor
20942             #------------------------------
20943             if ( $type eq '{' || $type eq '(' ) {
20944
20945                 $lp_comma_count{ $total_depth + 1 } = 0;
20946                 $lp_arrow_count{ $total_depth + 1 } = 0;
20947
20948                 # If we come to an opening token after an '=' token of some
20949                 # type, see if it would be helpful to 'break' after the '=' to
20950                 # save space
20951                 my $last_equals = $last_lp_equals{$total_depth};
20952                 if ( $last_equals && $last_equals > $ii_begin_line ) {
20953
20954                     my $seqno = $type_sequence_to_go[$ii];
20955
20956                     # find the position if we break at the '='
20957                     my $i_test = $last_equals;
20958
20959                     # Fix for issue b1229, check for break before
20960                     if ( $want_break_before{ $types_to_go[$i_test] } ) {
20961                         if ( $i_test > 0 ) { $i_test-- }
20962                     }
20963                     elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
20964
20965                     # TESTING
20966                     ##my $too_close = ($i_test==$ii-1);
20967
20968                     my $test_position = total_line_length( $i_test, $ii );
20969                     my $mll =
20970                       $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
20971
20972                     #------------------------------------------------------
20973                     # Break if structure will reach the maximum line length
20974                     #------------------------------------------------------
20975
20976                     # Historically, -lp just used one-half line length here
20977                     my $len_increase = $rOpts_maximum_line_length / 2;
20978
20979                     # For -xlp, we can also use the pre-computed lengths
20980                     my $min_len = $rcollapsed_length_by_seqno->{$seqno};
20981                     if ( $min_len && $min_len > $len_increase ) {
20982                         $len_increase = $min_len;
20983                     }
20984
20985                     if (
20986
20987                         # the equals is not just before an open paren (testing)
20988                         ##!$too_close &&
20989
20990                         # if we might exceed the maximum line length
20991                         $lp_position_predictor + $len_increase > $mll
20992
20993                         # if a -bbx flag WANTS a break before this opening token
20994                         || (   $seqno
20995                             && $rbreak_before_container_by_seqno->{$seqno} )
20996
20997                         # or we are beyond the 1/4 point and there was an old
20998                         # break at an assignment (not '=>') [fix for b1035]
20999                         || (
21000                             $lp_position_predictor >
21001                             $mll - $rOpts_maximum_line_length * 3 / 4
21002                             && $types_to_go[$last_equals] ne '=>'
21003                             && (
21004                                 $old_breakpoint_to_go[$last_equals]
21005                                 || (   $last_equals > 0
21006                                     && $old_breakpoint_to_go[ $last_equals - 1 ]
21007                                 )
21008                                 || (   $last_equals > 1
21009                                     && $types_to_go[ $last_equals - 1 ] eq 'b'
21010                                     && $old_breakpoint_to_go[ $last_equals - 2 ]
21011                                 )
21012                             )
21013                         )
21014                       )
21015                     {
21016
21017                         # then make the switch -- note that we do not set a
21018                         # real breakpoint here because we may not really need
21019                         # one; sub break_lists will do that if necessary.
21020
21021                         my $Kc = $K_closing_container->{$seqno};
21022                         if (
21023
21024                             # For -lp, only if the closing token is in this
21025                             # batch (c117).  Otherwise it cannot be done by sub
21026                             # break_lists.
21027                             defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
21028
21029                             # For -xlp, we only need one nonblank token after
21030                             # the opening token.
21031                             || $rOpts_extended_line_up_parentheses
21032                           )
21033                         {
21034                             $ii_begin_line         = $i_test + 1;
21035                             $lp_position_predictor = $test_position;
21036
21037                             #--------------------------------------------------
21038                             # Fix for an opening container terminating a batch:
21039                             #--------------------------------------------------
21040                             # To get alignment of a -lp container with its
21041                             # contents, we have to put a break after $i_test.
21042                             # For $ii<$max_index_to_go, this will be done by
21043                             # sub break_lists based on the indentation object.
21044                             # But for $ii=$max_index_to_go, the indentation
21045                             # object for this seqno will not be created until
21046                             # the next batch, so we have to set a break at
21047                             # $i_test right now in order to get one.
21048                             if (   $ii == $max_index_to_go
21049                                 && !$block_type_to_go[$ii]
21050                                 && $type eq '{'
21051                                 && $seqno
21052                                 && !$ris_excluded_lp_container->{$seqno} )
21053                             {
21054                                 $self->set_forced_lp_break( $ii_begin_line,
21055                                     $ii );
21056                             }
21057                         }
21058                     }
21059                 }
21060             } ## end update position predictor
21061
21062             #------------------------
21063             # Handle decreasing depth
21064             #------------------------
21065             # Note that one token may have both decreasing and then increasing
21066             # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
21067             # in this example we would first go back to (1,0) then up to (2,0)
21068             # in a single call.
21069             if ( $level < $current_level || $ci_level < $current_ci_level ) {
21070
21071                 # loop to find the first entry at or completely below this level
21072                 my ( $lev, $ci_lev );
21073                 while (1) {
21074                     if ($max_lp_stack) {
21075
21076                         # save index of token which closes this level
21077                         if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21078                             my $lp_object =
21079                               $rLP->[$max_lp_stack]->[_lp_object_];
21080
21081                             $lp_object->set_closed($ii);
21082
21083                             my $comma_count = 0;
21084                             my $arrow_count = 0;
21085                             if ( $type eq '}' || $type eq ')' ) {
21086                                 $comma_count = $lp_comma_count{$total_depth};
21087                                 $arrow_count = $lp_arrow_count{$total_depth};
21088                                 $comma_count = 0 unless $comma_count;
21089                                 $arrow_count = 0 unless $arrow_count;
21090                             }
21091
21092                             $lp_object->set_comma_count($comma_count);
21093                             $lp_object->set_arrow_count($arrow_count);
21094
21095                             # Undo any extra indentation if we saw no commas
21096                             my $available_spaces =
21097                               $lp_object->get_available_spaces();
21098                             my $K_start = $lp_object->get_K_begin_line();
21099
21100                             if (   $available_spaces > 0
21101                                 && $K_start >= $K_to_go[0]
21102                                 && ( $comma_count <= 0 || $arrow_count > 0 ) )
21103                             {
21104
21105                                 my $i = $lp_object->get_lp_item_index();
21106
21107                                 # Safety check for a valid stack index. It
21108                                 # should be ok because we just checked that the
21109                                 # index K of the token associated with this
21110                                 # indentation is in this batch.
21111                                 if ( $i < 0 || $i > $max_lp_object_list ) {
21112                                     if (DEVEL_MODE) {
21113                                         my $lno = $rLL->[$KK]->[_LINE_INDEX_];
21114                                         Fault(<<EOM);
21115 Program bug with -lp near line $lno.  Stack index i=$i should be >=0 and <= max=$max_lp_object_list
21116 EOM
21117                                     }
21118                                 }
21119                                 else {
21120                                     if ( $arrow_count == 0 ) {
21121                                         $rlp_object_list->[$i]
21122                                           ->permanently_decrease_available_spaces
21123                                           ($available_spaces);
21124                                     }
21125                                     else {
21126                                         $rlp_object_list->[$i]
21127                                           ->tentatively_decrease_available_spaces
21128                                           ($available_spaces);
21129                                     }
21130                                     foreach
21131                                       my $j ( $i + 1 .. $max_lp_object_list )
21132                                     {
21133                                         $rlp_object_list->[$j]
21134                                           ->decrease_SPACES($available_spaces);
21135                                     }
21136                                 }
21137                             }
21138                         }
21139
21140                         # go down one level
21141                         --$max_lp_stack;
21142
21143                         my $rLP_top = $rLP->[$max_lp_stack];
21144                         my $ci_lev  = $rLP_top->[_lp_ci_level_];
21145                         my $lev     = $rLP_top->[_lp_level_];
21146                         my $spaces  = $rLP_top->[_lp_space_count_];
21147                         if ( $rLP_top->[_lp_object_] ) {
21148                             my $lp_obj = $rLP_top->[_lp_object_];
21149                             ( $spaces, $lev, $ci_lev ) =
21150                               @{ $lp_obj->get_spaces_level_ci() };
21151                         }
21152
21153                         # stop when we reach a level at or below the current
21154                         # level
21155                         if ( $lev <= $level && $ci_lev <= $ci_level ) {
21156                             $space_count      = $spaces;
21157                             $current_level    = $lev;
21158                             $current_ci_level = $ci_lev;
21159                             last;
21160                         }
21161                     }
21162
21163                     # reached bottom of stack .. should never happen because
21164                     # only negative levels can get here, and $level was forced
21165                     # to be positive above.
21166                     else {
21167
21168                         # non-fatal, keep going except in DEVEL_MODE
21169                         if (DEVEL_MODE) {
21170                             Fault(<<EOM);
21171 program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp
21172 EOM
21173                         }
21174                         last;
21175                     }
21176                 }
21177             } ## end decreasing depth
21178
21179             #------------------------
21180             # handle increasing depth
21181             #------------------------
21182             if ( $level > $current_level || $ci_level > $current_ci_level ) {
21183
21184                 $stack_changed = 1;
21185
21186                 # Compute the standard incremental whitespace.  This will be
21187                 # the minimum incremental whitespace that will be used.  This
21188                 # choice results in a smooth transition between the gnu-style
21189                 # and the standard style.
21190                 my $standard_increment =
21191                   ( $level - $current_level ) *
21192                   $rOpts_indent_columns +
21193                   ( $ci_level - $current_ci_level ) *
21194                   $rOpts_continuation_indentation;
21195
21196                 # Now we have to define how much extra incremental space
21197                 # ("$available_space") we want.  This extra space will be
21198                 # reduced as necessary when long lines are encountered or when
21199                 # it becomes clear that we do not have a good list.
21200                 my $available_spaces = 0;
21201                 my $align_seqno      = 0;
21202                 my $excess           = 0;
21203
21204                 my $last_nonblank_seqno;
21205                 my $last_nonblank_block_type;
21206                 if ( defined($K_last_nonblank) ) {
21207                     $last_nonblank_seqno =
21208                       $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
21209                     $last_nonblank_block_type =
21210                         $last_nonblank_seqno
21211                       ? $rblock_type_of_seqno->{$last_nonblank_seqno}
21212                       : undef;
21213                 }
21214
21215                 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
21216
21217                 #-----------------------------------------------
21218                 # Initialize indentation spaces on empty stack..
21219                 #-----------------------------------------------
21220                 if ( $max_lp_stack == 0 ) {
21221                     $space_count = $level * $rOpts_indent_columns;
21222                 }
21223
21224                 #----------------------------------------
21225                 # Add the standard space increment if ...
21226                 #----------------------------------------
21227                 elsif (
21228
21229                     # if this is a BLOCK, add the standard increment
21230                     $last_nonblank_block_type
21231
21232                     # or if this is not a sequenced item
21233                     || !$last_nonblank_seqno
21234
21235                     # or this continer is excluded by user rules
21236                     # or contains here-docs or multiline qw text
21237                     || defined($last_nonblank_seqno)
21238                     && $ris_excluded_lp_container->{$last_nonblank_seqno}
21239
21240                     # or if last nonblank token was not structural indentation
21241                     || $last_nonblank_type ne '{'
21242
21243                     # and do not start -lp under stress .. fixes b1244, b1255
21244                     || !$in_lp_mode && $level >= $lp_cutoff_level
21245
21246                   )
21247                 {
21248
21249                     # If we have entered lp mode, use the top lp object to get
21250                     # the current indentation spaces because it may have
21251                     # changed.  Fixes b1285, b1286.
21252                     if ($in_lp_mode) {
21253                         $space_count = $in_lp_mode->get_spaces();
21254                     }
21255                     $space_count += $standard_increment;
21256                 }
21257
21258                 #---------------------------------------------------------------
21259                 # -lp mode: try to use space to the first non-blank level change
21260                 #---------------------------------------------------------------
21261                 else {
21262
21263                     # see how much space we have available
21264                     my $test_space_count = $lp_position_predictor;
21265                     my $excess           = 0;
21266                     my $min_len =
21267                       $rcollapsed_length_by_seqno->{$last_nonblank_seqno};
21268                     my $next_opening_too_far;
21269
21270                     if ( defined($min_len) ) {
21271                         $excess =
21272                           $test_space_count +
21273                           $min_len -
21274                           $maximum_line_length_at_level[$level];
21275                         if ( $excess > 0 ) {
21276                             $test_space_count -= $excess;
21277
21278                             # will the next opening token be a long way out?
21279                             $next_opening_too_far =
21280                               $lp_position_predictor + $excess >
21281                               $maximum_line_length_at_level[$level];
21282                         }
21283                     }
21284
21285                     my $rLP_top             = $rLP->[$max_lp_stack];
21286                     my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
21287                     if ( $rLP_top->[_lp_object_] ) {
21288                         $min_gnu_indentation =
21289                           $rLP_top->[_lp_object_]->get_spaces();
21290                     }
21291                     $available_spaces =
21292                       $test_space_count - $min_gnu_indentation;
21293
21294                     # Do not startup -lp indentation mode if no space ...
21295                     # ... or if it puts the opening far to the right
21296                     if ( !$in_lp_mode
21297                         && ( $available_spaces <= 0 || $next_opening_too_far ) )
21298                     {
21299                         $space_count += $standard_increment;
21300                         $available_spaces = 0;
21301                     }
21302
21303                     # Use -lp mode
21304                     else {
21305                         $space_count = $test_space_count;
21306
21307                         $in_lp_mode = 1;
21308                         if ( $available_spaces >= $standard_increment ) {
21309                             $min_gnu_indentation += $standard_increment;
21310                         }
21311                         elsif ( $available_spaces > 1 ) {
21312                             $min_gnu_indentation += $available_spaces + 1;
21313                         }
21314                         elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
21315                             if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
21316                                 $min_gnu_indentation += 2;
21317                             }
21318                             else {
21319                                 $min_gnu_indentation += 1;
21320                             }
21321                         }
21322                         else {
21323                             $min_gnu_indentation += $standard_increment;
21324                         }
21325                         $available_spaces = $space_count - $min_gnu_indentation;
21326
21327                         if ( $available_spaces < 0 ) {
21328                             $space_count      = $min_gnu_indentation;
21329                             $available_spaces = 0;
21330                         }
21331                         $align_seqno = $last_nonblank_seqno;
21332                     }
21333                 }
21334
21335                 #-------------------------------------------
21336                 # update the state, but not on a blank token
21337                 #-------------------------------------------
21338                 if ( $type ne 'b' ) {
21339
21340                     if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21341                         $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
21342                         $in_lp_mode = 1;
21343                     }
21344
21345                     #----------------------------------------
21346                     # Create indentation object if in lp-mode
21347                     #----------------------------------------
21348                     ++$max_lp_stack;
21349                     my $lp_object;
21350                     if ($in_lp_mode) {
21351
21352                         # A negative level implies not to store the item in the
21353                         # item_list
21354                         my $lp_item_index = 0;
21355                         if ( $level >= 0 ) {
21356                             $lp_item_index = ++$max_lp_object_list;
21357                         }
21358
21359                         my $K_begin_line = 0;
21360                         if (   $ii_begin_line >= 0
21361                             && $ii_begin_line <= $max_index_to_go )
21362                         {
21363                             $K_begin_line = $K_to_go[$ii_begin_line];
21364                         }
21365
21366                         # Minor Fix: when creating indentation at a side
21367                         # comment we don't know what the space to the actual
21368                         # next code token will be.  We will allow a space for
21369                         # sub correct_lp to move it in if necessary.
21370                         if (   $type eq '#'
21371                             && $max_index_to_go > 0
21372                             && $align_seqno )
21373                         {
21374                             $available_spaces += 1;
21375                         }
21376
21377                         $lp_object = Perl::Tidy::IndentationItem->new(
21378                             spaces           => $space_count,
21379                             level            => $level,
21380                             ci_level         => $ci_level,
21381                             available_spaces => $available_spaces,
21382                             lp_item_index    => $lp_item_index,
21383                             align_seqno      => $align_seqno,
21384                             stack_depth      => $max_lp_stack,
21385                             K_begin_line     => $K_begin_line,
21386                         );
21387
21388                         DEBUG_LP && do {
21389                             my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
21390                             print STDERR <<EOM;
21391 DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
21392 EOM
21393                         };
21394
21395                         if ( $level >= 0 ) {
21396                             $rlp_object_list->[$max_lp_object_list] =
21397                               $lp_object;
21398                         }
21399
21400                         if (   $last_nonblank_token =~ /^[\{\[\(]$/
21401                             && $last_nonblank_seqno )
21402                         {
21403                             $rlp_object_by_seqno->{$last_nonblank_seqno} =
21404                               $lp_object;
21405                         }
21406                     }
21407
21408                     #------------------------------------
21409                     # Store this indentation on the stack
21410                     #------------------------------------
21411                     $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
21412                     $rLP->[$max_lp_stack]->[_lp_level_]    = $level;
21413                     $rLP->[$max_lp_stack]->[_lp_object_]   = $lp_object;
21414                     $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
21415                       $last_nonblank_seqno;
21416                     $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
21417
21418                     # If the opening paren is beyond the half-line length, then
21419                     # we will use the minimum (standard) indentation.  This will
21420                     # help avoid problems associated with running out of space
21421                     # near the end of a line.  As a result, in deeply nested
21422                     # lists, there will be some indentations which are limited
21423                     # to this minimum standard indentation. But the most deeply
21424                     # nested container will still probably be able to shift its
21425                     # parameters to the right for proper alignment, so in most
21426                     # cases this will not be noticeable.
21427                     if ( $available_spaces > 0 && $lp_object ) {
21428                         my $halfway =
21429                           $maximum_line_length_at_level[$level] -
21430                           $rOpts_maximum_line_length / 2;
21431                         $lp_object->tentatively_decrease_available_spaces(
21432                             $available_spaces)
21433                           if ( $space_count > $halfway );
21434                     }
21435                 }
21436             } ## end increasing depth
21437
21438             #------------------
21439             # Handle all tokens
21440             #------------------
21441             if ( $type ne 'b' ) {
21442
21443                 # Count commas and look for non-list characters.  Once we see a
21444                 # non-list character, we give up and don't look for any more
21445                 # commas.
21446                 if ( $type eq '=>' ) {
21447                     $lp_arrow_count{$total_depth}++;
21448
21449                     # remember '=>' like '=' for estimating breaks (but see
21450                     # above note for b1035)
21451                     $last_lp_equals{$total_depth} = $ii;
21452                 }
21453
21454                 elsif ( $type eq ',' ) {
21455                     $lp_comma_count{$total_depth}++;
21456                 }
21457
21458                 elsif ( $is_assignment{$type} ) {
21459                     $last_lp_equals{$total_depth} = $ii;
21460                 }
21461
21462                 # this token might start a new line if ..
21463                 if (
21464
21465                     # this is the first nonblank token of the line
21466                     $ii == 1 && $types_to_go[0] eq 'b'
21467
21468                     # or previous character was one of these:
21469                     #  /^([\:\?\,f])$/
21470                     || $hash_test2{$last_nonblank_type}
21471
21472                     # or previous character was opening and this is not closing
21473                     || ( $last_nonblank_type eq '{' && $type ne '}' )
21474                     || ( $last_nonblank_type eq '(' and $type ne ')' )
21475
21476                     # or this token is one of these:
21477                     #  /^([\.]|\|\||\&\&)$/
21478                     || $hash_test3{$type}
21479
21480                     # or this is a closing structure
21481                     || (   $last_nonblank_type eq '}'
21482                         && $last_nonblank_token eq $last_nonblank_type )
21483
21484                     # or previous token was keyword 'return'
21485                     || (
21486                         $last_nonblank_type eq 'k'
21487                         && (   $last_nonblank_token eq 'return'
21488                             && $type ne '{' )
21489                     )
21490
21491                     # or starting a new line at certain keywords is fine
21492                     || (   $type eq 'k'
21493                         && $is_if_unless_and_or_last_next_redo_return{$token} )
21494
21495                     # or this is after an assignment after a closing structure
21496                     || (
21497                         $is_assignment{$last_nonblank_type}
21498                         && (
21499                             # /^[\}\)\]]$/
21500                             $hash_test1{$last_last_nonblank_type}
21501
21502                             # and it is significantly to the right
21503                             || $lp_position_predictor > (
21504                                 $maximum_line_length_at_level[$level] -
21505                                   $rOpts_maximum_line_length / 2
21506                             )
21507                         )
21508                     )
21509                   )
21510                 {
21511                     check_for_long_gnu_style_lines( $ii, $rlp_object_list );
21512                     $ii_begin_line = $ii;
21513
21514                     # back up 1 token if we want to break before that type
21515                     # otherwise, we may strand tokens like '?' or ':' on a line
21516                     if ( $ii_begin_line > 0 ) {
21517                         if ( $last_nonblank_type eq 'k' ) {
21518
21519                             if ( $want_break_before{$last_nonblank_token} ) {
21520                                 $ii_begin_line--;
21521                             }
21522                         }
21523                         elsif ( $want_break_before{$last_nonblank_type} ) {
21524                             $ii_begin_line--;
21525                         }
21526                     }
21527                 } ## end if ( $ii == 1 && $types_to_go...)
21528
21529                 $K_last_nonblank = $KK;
21530
21531                 $last_last_nonblank_type = $last_nonblank_type;
21532                 $last_nonblank_type      = $type;
21533                 $last_nonblank_token     = $token;
21534
21535             } ## end if ( $type ne 'b' )
21536
21537             # remember the predicted position of this token on the output line
21538             if ( $ii > $ii_begin_line ) {
21539
21540                 ## NOTE: this is a critical loop - the following call has been
21541                 ## expanded for about 2x speedup:
21542                 ## $lp_position_predictor =
21543                 ##    total_line_length( $ii_begin_line, $ii );
21544
21545                 my $indentation = $leading_spaces_to_go[$ii_begin_line];
21546                 if ( ref($indentation) ) {
21547                     $indentation = $indentation->get_spaces();
21548                 }
21549                 $lp_position_predictor =
21550                   $indentation +
21551                   $summed_lengths_to_go[ $ii + 1 ] -
21552                   $summed_lengths_to_go[$ii_begin_line];
21553             }
21554             else {
21555                 $lp_position_predictor =
21556                   $space_count + $token_lengths_to_go[$ii];
21557             }
21558
21559             # Store the indentation object for this token.
21560             # This allows us to manipulate the leading whitespace
21561             # (in case we have to reduce indentation to fit a line) without
21562             # having to change any token values.
21563
21564             #---------------------------------------------------------------
21565             # replace leading whitespace with indentation objects where used
21566             #---------------------------------------------------------------
21567             if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21568                 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
21569                 $leading_spaces_to_go[$ii] = $lp_object;
21570                 if (   $max_lp_stack > 0
21571                     && $ci_level
21572                     && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
21573                 {
21574                     $reduced_spaces_to_go[$ii] =
21575                       $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
21576                 }
21577                 else {
21578                     $reduced_spaces_to_go[$ii] = $lp_object;
21579                 }
21580             }
21581         } ## end loop over all tokens in this batch
21582
21583         undo_incomplete_lp_indentation($rlp_object_list)
21584           if ( !$rOpts_extended_line_up_parentheses );
21585
21586         return;
21587     }
21588
21589     sub check_for_long_gnu_style_lines {
21590
21591         # look at the current estimated maximum line length, and
21592         # remove some whitespace if it exceeds the desired maximum
21593         my ( $mx_index_to_go, $rlp_object_list ) = @_;
21594
21595         my $max_lp_object_list = @{$rlp_object_list} - 1;
21596
21597         # nothing can be done if no stack items defined for this line
21598         return if ( $max_lp_object_list < 0 );
21599
21600         # see if we have exceeded the maximum desired line length
21601         # keep 2 extra free because they are needed in some cases
21602         # (result of trial-and-error testing)
21603         my $spaces_needed =
21604           $lp_position_predictor -
21605           $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
21606
21607         return if ( $spaces_needed <= 0 );
21608
21609         # We are over the limit, so try to remove a requested number of
21610         # spaces from leading whitespace.  We are only allowed to remove
21611         # from whitespace items created on this batch, since others have
21612         # already been used and cannot be undone.
21613         my @candidates = ();
21614         my $i;
21615
21616         # loop over all whitespace items created for the current batch
21617         for ( $i = 0 ; $i <= $max_lp_object_list ; $i++ ) {
21618             my $item = $rlp_object_list->[$i];
21619
21620             # item must still be open to be a candidate (otherwise it
21621             # cannot influence the current token)
21622             next if ( $item->get_closed() >= 0 );
21623
21624             my $available_spaces = $item->get_available_spaces();
21625
21626             if ( $available_spaces > 0 ) {
21627                 push( @candidates, [ $i, $available_spaces ] );
21628             }
21629         }
21630
21631         return unless (@candidates);
21632
21633         # sort by available whitespace so that we can remove whitespace
21634         # from the maximum available first
21635         @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
21636
21637         # keep removing whitespace until we are done or have no more
21638         foreach my $candidate (@candidates) {
21639             my ( $i, $available_spaces ) = @{$candidate};
21640             my $deleted_spaces =
21641               ( $available_spaces > $spaces_needed )
21642               ? $spaces_needed
21643               : $available_spaces;
21644
21645             # remove the incremental space from this item
21646             $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
21647
21648             my $i_debug = $i;
21649
21650             # update the leading whitespace of this item and all items
21651             # that came after it
21652             for ( ; $i <= $max_lp_object_list ; $i++ ) {
21653
21654                 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
21655                 if ( $old_spaces >= $deleted_spaces ) {
21656                     $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
21657                 }
21658
21659                 # shouldn't happen except for code bug:
21660                 else {
21661                     # non-fatal, keep going except in DEVEL_MODE
21662                     if (DEVEL_MODE) {
21663                         my $level = $rlp_object_list->[$i_debug]->get_level();
21664                         my $ci_level =
21665                           $rlp_object_list->[$i_debug]->get_ci_level();
21666                         my $old_level = $rlp_object_list->[$i]->get_level();
21667                         my $old_ci_level =
21668                           $rlp_object_list->[$i]->get_ci_level();
21669                         Fault(<<EOM);
21670 program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level
21671 EOM
21672                     }
21673                 }
21674             }
21675             $lp_position_predictor -= $deleted_spaces;
21676             $spaces_needed         -= $deleted_spaces;
21677             last unless ( $spaces_needed > 0 );
21678         }
21679         return;
21680     }
21681
21682     sub undo_incomplete_lp_indentation {
21683
21684         #------------------------------------------------------------------
21685         # Undo indentation for all incomplete -lp indentation levels of the
21686         # current batch unless -xlp is set.
21687         #------------------------------------------------------------------
21688
21689         # This routine is called once after each output stream batch is
21690         # finished to undo indentation for all incomplete -lp indentation
21691         # levels.  If this routine is called then comments and blank lines will
21692         # disrupt this indentation style.  In older versions of perltidy this
21693         # was always done because it could cause problems otherwise, but recent
21694         # improvements allow fairly good results to be obtained by skipping
21695         # this step with the -xlp flag.
21696         my ($rlp_object_list) = @_;
21697
21698         my $max_lp_object_list = @{$rlp_object_list} - 1;
21699
21700         # nothing to do if no stack items defined for this line
21701         return if ( $max_lp_object_list < 0 );
21702
21703         # loop over all whitespace items created for the current batch
21704         foreach my $i ( 0 .. $max_lp_object_list ) {
21705             my $item = $rlp_object_list->[$i];
21706
21707             # only look for open items
21708             next if ( $item->get_closed() >= 0 );
21709
21710             # Tentatively remove all of the available space
21711             # (The vertical aligner will try to get it back later)
21712             my $available_spaces = $item->get_available_spaces();
21713             if ( $available_spaces > 0 ) {
21714
21715                 # delete incremental space for this item
21716                 $rlp_object_list->[$i]
21717                   ->tentatively_decrease_available_spaces($available_spaces);
21718
21719                 # Reduce the total indentation space of any nodes that follow
21720                 # Note that any such nodes must necessarily be dependents
21721                 # of this node.
21722                 foreach ( $i + 1 .. $max_lp_object_list ) {
21723                     $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
21724                 }
21725             }
21726         }
21727         return;
21728     }
21729 } ## end closure set_lp_indentation
21730
21731 #----------------------------------------------------------------------
21732 # sub to set a requested break before an opening container in -lp mode.
21733 #----------------------------------------------------------------------
21734 sub set_forced_lp_break {
21735
21736     my ( $self, $i_begin_line, $i_opening ) = @_;
21737
21738     # Given:
21739     #   $i_begin_line = index of break in the _to_go arrays
21740     #   $i_opening = index of the opening container
21741
21742     # Set any requested break at a token before this opening container
21743     # token. This is often an '=' or '=>' but can also be things like
21744     # '.', ',', 'return'.  It was defined by sub set_lp_indentation.
21745
21746     # Important:
21747     #   For intact containers, call this at the closing token.
21748     #   For broken containers, call this at the opening token.
21749     # This will avoid needless breaks when it turns out that the
21750     # container does not actually get broken.  This isn't known until
21751     # the closing container for intact blocks.
21752
21753     return
21754       if ( $i_begin_line < 0
21755         || $i_begin_line > $max_index_to_go );
21756
21757     # Handle request to put a break break immediately before this token.
21758     # We may not want to do that since we are also breaking after it.
21759     if ( $i_begin_line == $i_opening ) {
21760
21761         # The following rules should be reviewed.  We may want to always
21762         # allow the break.  If we do not do the break, the indentation
21763         # may be off.
21764
21765         # RULE: don't break before it unless it is welded to a qw.
21766         # This works well, but we may want to relax this to allow
21767         # breaks in additional cases.
21768         return
21769           if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
21770         return unless ( $types_to_go[$max_index_to_go] eq 'q' );
21771     }
21772
21773     # Only break for breakpoints at the same
21774     # indentation level as the opening paren
21775     my $test1 = $nesting_depth_to_go[$i_opening];
21776     my $test2 = $nesting_depth_to_go[$i_begin_line];
21777     return if ( $test2 != $test1 );
21778
21779     # Back up at a blank (fixes case b932)
21780     my $ibr = $i_begin_line - 1;
21781     if (   $ibr > 0
21782         && $types_to_go[$ibr] eq 'b' )
21783     {
21784         $ibr--;
21785     }
21786     if ( $ibr >= 0 ) {
21787         my $i_nonblank = $self->set_forced_breakpoint($ibr);
21788
21789         # Crude patch to prevent sub recombine_breakpoints from undoing
21790         # this break, especially after an '='.  It will leave old
21791         # breakpoints alone. See c098/x045 for some examples.
21792         if ( defined($i_nonblank) ) {
21793             $old_breakpoint_to_go[$i_nonblank] = 1;
21794         }
21795     }
21796     return;
21797 }
21798
21799 sub reduce_lp_indentation {
21800
21801     # reduce the leading whitespace at token $i if possible by $spaces_needed
21802     # (a large value of $spaces_needed will remove all excess space)
21803     # NOTE: to be called from break_lists only for a sequence of tokens
21804     # contained between opening and closing parens/braces/brackets
21805
21806     my ( $self, $i, $spaces_wanted ) = @_;
21807     my $deleted_spaces = 0;
21808
21809     my $item             = $leading_spaces_to_go[$i];
21810     my $available_spaces = $item->get_available_spaces();
21811
21812     if (
21813         $available_spaces > 0
21814         && ( ( $spaces_wanted <= $available_spaces )
21815             || !$item->get_have_child() )
21816       )
21817     {
21818
21819         # we'll remove these spaces, but mark them as recoverable
21820         $deleted_spaces =
21821           $item->tentatively_decrease_available_spaces($spaces_wanted);
21822     }
21823
21824     return $deleted_spaces;
21825 }
21826
21827 ###########################################################
21828 # CODE SECTION 13: Preparing batches for vertical alignment
21829 ###########################################################
21830
21831 sub check_convey_batch_input {
21832
21833     # Check for valid input to sub convey_batch_to_vertical_aligner.  An
21834     # error here would most likely be due to an error in the calling
21835     # routine 'sub grind_batch_of_CODE'.
21836     my ( $self, $ri_first, $ri_last ) = @_;
21837
21838     if ( !defined($ri_first) || !defined($ri_last) ) {
21839         Fault(<<EOM);
21840 Undefined line ranges ri_first and/r ri_last
21841 EOM
21842     }
21843
21844     my $nmax       = @{$ri_first} - 1;
21845     my $nmax_check = @{$ri_last} - 1;
21846     if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
21847         Fault(<<EOM);
21848 Line range index error: nmax=$nmax but nmax_check=$nmax_check
21849 These should be equal and >=0
21850 EOM
21851     }
21852     my ( $ibeg, $iend );
21853     foreach my $n ( 0 .. $nmax ) {
21854         my $ibeg_m = $ibeg;
21855         my $iend_m = $iend;
21856         $ibeg = $ri_first->[$n];
21857         $iend = $ri_last->[$n];
21858         if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
21859             Fault(<<EOM);
21860 Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
21861 These should have iend >= ibeg and be in the range (0..$max_index_to_go)
21862 EOM
21863         }
21864         next if ( $n == 0 );
21865         if ( $ibeg <= $iend_m ) {
21866             Fault(<<EOM);
21867 Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
21868 EOM
21869         }
21870     }
21871     return;
21872 }
21873
21874 sub convey_batch_to_vertical_aligner {
21875
21876     my ($self) = @_;
21877
21878     # This routine receives a batch of code for which the final line breaks
21879     # have been defined. Here we prepare the lines for passing to the vertical
21880     # aligner.  We do the following tasks:
21881     # - mark certain vertical alignment tokens, such as '=', in each line
21882     # - make minor indentation adjustments
21883     # - do logical padding: insert extra blank spaces to help display certain
21884     #   logical constructions
21885
21886     my $this_batch = $self->[_this_batch_];
21887     my $ri_first   = $this_batch->[_ri_first_];
21888     my $ri_last    = $this_batch->[_ri_last_];
21889
21890     $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
21891
21892     my $n_last_line = @{$ri_first} - 1;
21893
21894     my $do_not_pad               = $this_batch->[_do_not_pad_];
21895     my $peak_batch_size          = $this_batch->[_peak_batch_size_];
21896     my $starting_in_quote        = $this_batch->[_starting_in_quote_];
21897     my $ending_in_quote          = $this_batch->[_ending_in_quote_];
21898     my $is_static_block_comment  = $this_batch->[_is_static_block_comment_];
21899     my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
21900     my $batch_CODE_type          = $this_batch->[_batch_CODE_type_];
21901
21902     my $rLL                  = $self->[_rLL_];
21903     my $Klimit               = $self->[_Klimit_];
21904     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
21905     my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
21906
21907     my $ibeg_next = $ri_first->[0];
21908     my $iend_next = $ri_last->[0];
21909
21910     my $type_beg_next  = $types_to_go[$ibeg_next];
21911     my $type_end_next  = $types_to_go[$iend_next];
21912     my $token_beg_next = $tokens_to_go[$ibeg_next];
21913
21914     my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#';
21915
21916     my $rindentation_list = [0];    # ref to indentations for each line
21917     my ( $cscw_block_comment, $closing_side_comment );
21918     if ($rOpts_closing_side_comments) {
21919         ( $closing_side_comment, $cscw_block_comment ) =
21920           $self->add_closing_side_comment( $ri_first, $ri_last );
21921     }
21922
21923     # flush before a long if statement to avoid unwanted alignment
21924     if (   $n_last_line > 0
21925         && $type_beg_next eq 'k'
21926         && $token_beg_next =~ /^(if|unless)$/ )
21927     {
21928         $self->flush_vertical_aligner();
21929     }
21930
21931     $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci )
21932       if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation );
21933
21934     $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
21935         $starting_in_quote )
21936       if ( $n_last_line > 0 && $rOpts_logical_padding );
21937
21938     if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
21939
21940     # ----------------------------------------------------------
21941     # define the vertical alignments for all lines of this batch
21942     # ----------------------------------------------------------
21943     my $rline_alignments =
21944       $self->make_vertical_alignments( $ri_first, $ri_last );
21945
21946     # ----------------------------------------------
21947     # loop to send each line to the vertical aligner
21948     # ----------------------------------------------
21949     my ( $type_beg, $token_beg );
21950     my ($type_end);
21951     my ( $ibeg, $iend );
21952     for my $n ( 0 .. $n_last_line ) {
21953
21954         # ----------------------------------------------------------------
21955         # This hash will hold the args for vertical alignment of this line
21956         # We will populate it as we go.
21957         # ----------------------------------------------------------------
21958         my $rvao_args = {};
21959
21960         my $type_beg_last = $type_beg;
21961         my $type_end_last = $type_end;
21962
21963         my $ibeg = $ibeg_next;
21964         my $iend = $iend_next;
21965         my $Kbeg = $K_to_go[$ibeg];
21966         my $Kend = $K_to_go[$iend];
21967
21968         $type_beg  = $type_beg_next;
21969         $type_end  = $type_end_next;
21970         $token_beg = $token_beg_next;
21971
21972         # ---------------------------------------------------
21973         # Define the check value 'Kend' to send for this line
21974         # ---------------------------------------------------
21975         # The 'Kend' value is an integer for checking that lines come out of
21976         # the far end of the pipeline in the right order.  It increases
21977         # linearly along the token stream.  But we only send ending K values of
21978         # non-comments down the pipeline.  This is equivalent to checking that
21979         # the last CODE_type is blank or equal to 'VER'. See also sub
21980         # resync_lines_and_tokens for related coding.  Note that
21981         # '$batch_CODE_type' is the code type of the line to which the ending
21982         # token belongs.
21983         my $Kend_code =
21984           $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
21985
21986         #  $ljump is a level jump needed by 'sub final_indentation_adjustment'
21987         my $ljump = 0;
21988
21989         # Get some vars on line [n+1], if any:
21990         if ( $n < $n_last_line ) {
21991             $ibeg_next = $ri_first->[ $n + 1 ];
21992             $iend_next = $ri_last->[ $n + 1 ];
21993
21994             $type_beg_next  = $types_to_go[$ibeg_next];
21995             $type_end_next  = $types_to_go[$iend_next];
21996             $token_beg_next = $tokens_to_go[$ibeg_next];
21997
21998             my $Kbeg_next = $K_to_go[$ibeg_next];
21999             $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22000         }
22001         elsif ( !$is_block_comment && $Kend < $Klimit ) {
22002
22003             # Patch for git #51, a bare closing qw paren was not outdented
22004             # if the flag '-nodelete-old-newlines is set
22005             # Note that we are just looking ahead for the next nonblank
22006             # character. We could scan past an arbitrary number of block
22007             # comments or hanging side comments by calling K_next_code, but it
22008             # could add significant run time with very little to be gained.
22009             my $Kbeg_next = $Kend + 1;
22010             if (   $Kbeg_next < $Klimit
22011                 && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
22012             {
22013                 $Kbeg_next += 1;
22014             }
22015             $ljump =
22016               $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22017         }
22018
22019         # ---------------------------------------------
22020         # get the vertical alignment info for this line
22021         # ---------------------------------------------
22022
22023         # The lines are broken into fields which can be spaced by the vertical
22024         # to achieve vertical alignment.  These fields are the actual text
22025         # which will be output, so from here on no more changes can be made to
22026         # the text.
22027         my $rline_alignment = $rline_alignments->[$n];
22028         my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
22029           @{$rline_alignment};
22030
22031         # Programming check: (shouldn't happen)
22032         # The number of tokens which separate the fields must always be
22033         # one less than the number of fields. If this is not true then
22034         # an error has been introduced in sub make_alignment_patterns.
22035         if (DEVEL_MODE) {
22036             if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
22037                 my $nt  = @{$rtokens};
22038                 my $nf  = @{$rfields};
22039                 my $msg = <<EOM;
22040 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
22041 The number of tokens = $nt should be one less than number of fields: $nf
22042 EOM
22043                 Fault($msg);
22044             }
22045         }
22046
22047         # --------------------------------------
22048         # get the final indentation of this line
22049         # --------------------------------------
22050         my ( $indentation, $lev, $level_end, $terminal_type,
22051             $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
22052           = $self->final_indentation_adjustment( $ibeg, $iend, $rfields,
22053             $rpatterns,         $ri_first, $ri_last,
22054             $rindentation_list, $ljump,    $starting_in_quote,
22055             $is_static_block_comment, );
22056
22057         # --------------------------------
22058         # define flag 'outdent_long_lines'
22059         # --------------------------------
22060         if (
22061             # we will allow outdenting of long lines..
22062             # which are long quotes, if allowed
22063             ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
22064
22065             # which are long block comments, if allowed
22066             || (
22067                    $type_beg eq '#'
22068                 && $rOpts_outdent_long_comments
22069
22070                 # but not if this is a static block comment
22071                 && !$is_static_block_comment
22072             )
22073           )
22074         {
22075             $rvao_args->{outdent_long_lines} = 1;
22076
22077             # convert -lp indentation objects to spaces to allow outdenting
22078             if ( ref($indentation) ) {
22079                 $indentation = $indentation->get_spaces();
22080             }
22081         }
22082
22083         # --------------------------------------------------
22084         # define flags 'break_alignment_before' and '_after'
22085         # --------------------------------------------------
22086
22087         # These flags tell the vertical aligner to stop alignment before or
22088         # after this line.
22089         if ($is_outdented_line) {
22090             $rvao_args->{break_alignment_before} = 1;
22091             $rvao_args->{break_alignment_after}  = 1;
22092         }
22093         elsif ($do_not_pad) {
22094             $rvao_args->{break_alignment_before} = 1;
22095         }
22096
22097         # flush at an 'if' which follows a line with (1) terminal semicolon
22098         # or (2) terminal block_type which is not an 'if'.  This prevents
22099         # unwanted alignment between the lines.
22100         elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
22101             my $type_m = 'b';
22102             my $block_type_m;
22103
22104             if ( $Kbeg > 0 ) {
22105                 my $Km = $Kbeg - 1;
22106                 $type_m = $rLL->[$Km]->[_TYPE_];
22107                 if ( $type_m eq 'b' && $Km > 0 ) {
22108                     $Km -= 1;
22109                     $type_m = $rLL->[$Km]->[_TYPE_];
22110                 }
22111                 if ( $type_m eq '#' && $Km > 0 ) {
22112                     $Km -= 1;
22113                     $type_m = $rLL->[$Km]->[_TYPE_];
22114                     if ( $type_m eq 'b' && $Km > 0 ) {
22115                         $Km -= 1;
22116                         $type_m = $rLL->[$Km]->[_TYPE_];
22117                     }
22118                 }
22119
22120                 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
22121                 if ($seqno_m) {
22122                     $block_type_m = $rblock_type_of_seqno->{$seqno_m};
22123                 }
22124             }
22125
22126             # break after anything that is not if-like
22127             if (
22128                 $type_m eq ';'
22129                 || (   $type_m eq '}'
22130                     && $block_type_m
22131                     && $block_type_m ne 'if'
22132                     && $block_type_m ne 'unless'
22133                     && $block_type_m ne 'elsif'
22134                     && $block_type_m ne 'else' )
22135               )
22136             {
22137                 $rvao_args->{break_alignment_before} = 1;
22138             }
22139         }
22140
22141         # ----------------------------------
22142         # define 'rvertical_tightness_flags'
22143         # ----------------------------------
22144         # These flags tell the vertical aligner if/when to combine consecutive
22145         # lines, based on the user input parameters.
22146         $rvao_args->{rvertical_tightness_flags} =
22147           $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
22148             $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
22149           if ( !$is_block_comment );
22150
22151         # ----------------------------------
22152         # define 'is_terminal_ternary'  flag
22153         # ----------------------------------
22154
22155         # This flag is set at the final ':' of a ternary chain to request
22156         # vertical alignment of the final term.  Here is a slightly complex
22157         # example:
22158         #
22159         # $self->{_text} = (
22160         #    !$section        ? ''
22161         #   : $type eq 'item' ? "the $section entry"
22162         #   :                   "the section on $section"
22163         # )
22164         # . (
22165         #   $page
22166         #   ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
22167         #   : ' elsewhere in this document'
22168         # );
22169         #
22170         if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
22171
22172             my $is_terminal_ternary = 0;
22173             my $last_leading_type   = $n > 0 ? $type_beg_last : ':';
22174             if (   $terminal_type ne ';'
22175                 && $n_last_line > $n
22176                 && $level_end == $lev )
22177             {
22178                 my $Kbeg_next = $K_to_go[$ibeg_next];
22179                 $level_end     = $rLL->[$Kbeg_next]->[_LEVEL_];
22180                 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
22181             }
22182             if (
22183                 $last_leading_type eq ':'
22184                 && (   ( $terminal_type eq ';' && $level_end <= $lev )
22185                     || ( $terminal_type ne ':' && $level_end < $lev ) )
22186               )
22187             {
22188
22189                 # the terminal term must not contain any ternary terms, as in
22190                 # my $ECHO = (
22191                 #       $Is_MSWin32 ? ".\\echo$$"
22192                 #     : $Is_MacOS   ? ":echo$$"
22193                 #     : ( $Is_NetWare ? "echo$$" : "./echo$$" )
22194                 # );
22195                 $is_terminal_ternary = 1;
22196
22197                 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
22198                 while ( defined($KP) && $KP <= $Kend ) {
22199                     my $type_KP = $rLL->[$KP]->[_TYPE_];
22200                     if ( $type_KP eq '?' || $type_KP eq ':' ) {
22201                         $is_terminal_ternary = 0;
22202                         last;
22203                     }
22204                     $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
22205                 }
22206             }
22207             $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
22208         }
22209
22210         # -------------------------------------------------
22211         # add any new closing side comment to the last line
22212         # -------------------------------------------------
22213         if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
22214
22215             $rfields->[-1] .= " $closing_side_comment";
22216
22217             # NOTE: Patch for csc. We can just use 1 for the length of the csc
22218             # because its length should not be a limiting factor from here on.
22219             $rfield_lengths->[-1] += 2;
22220
22221             # repack
22222             $rline_alignment =
22223               [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
22224         }
22225
22226         # ------------------------
22227         # define flag 'list_seqno'
22228         # ------------------------
22229
22230         # This flag indicates if this line is contained in a multi-line list
22231         if ( !$is_block_comment ) {
22232             my $parent_seqno = $parent_seqno_to_go[$ibeg];
22233             $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
22234         }
22235
22236         # The alignment tokens have been marked with nesting_depths, so we need
22237         # to pass nesting depths to the vertical aligner. They remain invariant
22238         # under all formatting operations.  Previously, level values were sent
22239         # to the aligner.  But they can be altered in welding and other
22240         # opeartions, and this can lead to alignement errors.
22241         my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
22242         my $nesting_depth_end = $nesting_depth_to_go[$iend];
22243
22244         # A quirk in the definition of nesting depths is that the closing token
22245         # has the same depth as internal tokens.  The vertical aligner is
22246         # programmed to expect them to have the lower depth, so we fix this.
22247         if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
22248         if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
22249
22250         # Adjust nesting depths to keep -lp indentation for qw lists.  This is
22251         # required because qw lists contained in brackets do not get nesting
22252         # depths, but the vertical aligner is watching nesting depth changes to
22253         # decide if a -lp block is intact.  Without this patch, qw lists
22254         # enclosed in angle brackets will not get the correct -lp indentation.
22255
22256         # Looking for line with isolated qw ...
22257         if (   $rOpts_line_up_parentheses
22258             && $type_beg eq 'q'
22259             && $ibeg == $iend )
22260         {
22261
22262             # ... which is part of a multiline qw
22263             my $Km = $self->K_previous_nonblank($Kbeg);
22264             my $Kp = $self->K_next_nonblank($Kbeg);
22265             if (   defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
22266                 || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
22267             {
22268                 $nesting_depth_beg++;
22269                 $nesting_depth_end++;
22270             }
22271         }
22272
22273         # ---------------------------------
22274         # define flag 'forget_side_comment'
22275         # ---------------------------------
22276
22277         # This flag tells the vertical aligner to reset the side comment
22278         # location if we are entering a new block from level 0.  This is
22279         # intended to keep side comments from drifting too far to the right.
22280         if (   $terminal_block_type
22281             && $nesting_depth_end > $nesting_depth_beg )
22282         {
22283             my $level_adj        = $lev;
22284             my $radjusted_levels = $self->[_radjusted_levels_];
22285             if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
22286             {
22287                 $level_adj = $radjusted_levels->[$Kbeg];
22288                 if ( $level_adj < 0 ) { $level_adj = 0 }
22289             }
22290             if ( $level_adj == 0 ) {
22291                 $rvao_args->{forget_side_comment} = 1;
22292             }
22293         }
22294
22295         # -----------------------------------
22296         # Store the remaining non-flag values
22297         # -----------------------------------
22298         $rvao_args->{Kend}            = $Kend_code;
22299         $rvao_args->{ci_level}        = $ci_levels_to_go[$ibeg];
22300         $rvao_args->{indentation}     = $indentation;
22301         $rvao_args->{level_end}       = $nesting_depth_end;
22302         $rvao_args->{level}           = $nesting_depth_beg;
22303         $rvao_args->{rline_alignment} = $rline_alignment;
22304         $rvao_args->{maximum_line_length} =
22305           $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
22306
22307         # --------------------------------------
22308         # send this line to the vertical aligner
22309         # --------------------------------------
22310         my $vao = $self->[_vertical_aligner_object_];
22311         $vao->valign_input($rvao_args);
22312
22313         $do_not_pad = 0;
22314
22315         # Set flag indicating if this line ends in an opening
22316         # token and is very short, so that a blank line is not
22317         # needed if the subsequent line is a comment.
22318         # Examples of what we are looking for:
22319         #   {
22320         #   && (
22321         #   BEGIN {
22322         #   default {
22323         #   sub {
22324         $self->[_last_output_short_opening_token_]
22325
22326           # line ends in opening token
22327           #              /^[\{\(\[L]$/
22328           = $is_opening_type{$type_end}
22329
22330           # and either
22331           && (
22332             # line has either single opening token
22333             $Kend == $Kbeg
22334
22335             # or is a single token followed by opening token.
22336             # Note that sub identifiers have blanks like 'sub doit'
22337             #                                 $token_beg !~ /\s+/
22338             || ( $Kend - $Kbeg <= 2 && index( $token_beg, ' ' ) < 0 )
22339           )
22340
22341           # and limit total to 10 character widths
22342           && token_sequence_length( $ibeg, $iend ) <= 10;
22343
22344     } ## end of loop to output each line
22345
22346     # remember indentation of lines containing opening containers for
22347     # later use by sub final_indentation_adjustment
22348     $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list )
22349       if ( !$is_block_comment );
22350
22351     # output any new -cscw block comment
22352     if ($cscw_block_comment) {
22353         $self->flush_vertical_aligner();
22354         my $file_writer_object = $self->[_file_writer_object_];
22355         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
22356     }
22357     return;
22358 }
22359
22360 sub check_batch_summed_lengths {
22361
22362     my ( $self, $msg ) = @_;
22363     $msg = "" unless defined($msg);
22364     my $rLL = $self->[_rLL_];
22365
22366     # Verify that the summed lengths are correct. We want to be sure that
22367     # errors have not been introduced by programming changes.  Summed lengths
22368     # are defined in sub $store_token.  Operations like padding and unmasking
22369     # semicolons can change token lengths, but those operations are expected to
22370     # update the summed lengths when they make changes.  So the summed lengths
22371     # should always be correct.
22372     foreach my $i ( 0 .. $max_index_to_go ) {
22373         my $len_by_sum =
22374           $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
22375         my $len_tok_i = $token_lengths_to_go[$i];
22376         my $KK        = $K_to_go[$i];
22377         my $len_tok_K;
22378         if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
22379         if ( $len_by_sum != $len_tok_i
22380             || defined($len_tok_K) && $len_by_sum != $len_tok_K )
22381         {
22382             my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
22383             $KK = 'undef' unless defined($KK);
22384             my $tok  = $tokens_to_go[$i];
22385             my $type = $types_to_go[$i];
22386             Fault(<<EOM);
22387 Summed lengths are appear to be incorrect.  $msg
22388 lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
22389 near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
22390 EOM
22391         }
22392     }
22393     return;
22394 }
22395
22396 {    ## begin closure set_vertical_alignment_markers
22397     my %is_vertical_alignment_type;
22398     my %is_not_vertical_alignment_token;
22399     my %is_vertical_alignment_keyword;
22400     my %is_terminal_alignment_type;
22401     my %is_low_level_alignment_token;
22402
22403     BEGIN {
22404
22405         my @q;
22406
22407         # Replaced =~ and // in the list.  // had been removed in RT 119588
22408         @q = qw#
22409           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
22410           { ? : => && || ~~ !~~ =~ !~ // <=> ->
22411           #;
22412         @is_vertical_alignment_type{@q} = (1) x scalar(@q);
22413
22414         # These 'tokens' are not aligned. We need this to remove [
22415         # from the above list because it has type ='{'
22416         @q = qw([);
22417         @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
22418
22419         # these are the only types aligned at a line end
22420         @q = qw(&& || =>);
22421         @is_terminal_alignment_type{@q} = (1) x scalar(@q);
22422
22423         # these tokens only align at line level
22424         @q = ( '{', '(' );
22425         @is_low_level_alignment_token{@q} = (1) x scalar(@q);
22426
22427         # eq and ne were removed from this list to improve alignment chances
22428         @q = qw(if unless and or err for foreach while until);
22429         @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
22430     }
22431
22432     sub set_vertical_alignment_markers {
22433
22434         # This routine takes the first step toward vertical alignment of the
22435         # lines of output text.  It looks for certain tokens which can serve as
22436         # vertical alignment markers (such as an '=').
22437         #
22438         # Method: We look at each token $i in this output batch and set
22439         # $ralignment_type_to_go->[$i] equal to those tokens at which we would
22440         # accept vertical alignment.
22441
22442         my ( $self, $ri_first, $ri_last ) = @_;
22443
22444         my $ralignment_type_to_go;
22445         my $ralignment_counts       = [];
22446         my $ralignment_hash_by_line = [];
22447
22448         # NOTE: closing side comments can insert up to 2 additional tokens
22449         # beyond the original $max_index_to_go, so we need to check ri_last for
22450         # the last index.
22451         my $max_line = @{$ri_first} - 1;
22452         my $max_i    = $ri_last->[$max_line];
22453         if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
22454
22455         # -----------------------------------------------------------------
22456         # Shortcut:
22457         #    - no alignments if there is only 1 token.
22458         #    - and nothing to do if we aren't allowed to change whitespace.
22459         # -----------------------------------------------------------------
22460         if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
22461             return ( $ralignment_type_to_go, $ralignment_counts,
22462                 $ralignment_hash_by_line );
22463         }
22464
22465         my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
22466         my $ris_function_call_paren    = $self->[_ris_function_call_paren_];
22467         my $rLL                        = $self->[_rLL_];
22468
22469         # -------------------------------
22470         # First handle any side comment.
22471         # -------------------------------
22472         my $i_terminal = $max_i;
22473         if ( $types_to_go[$max_i] eq '#' ) {
22474
22475             # We know $max_i > 0 if we get here.
22476             $i_terminal -= 1;
22477             if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
22478                 $i_terminal -= 1;
22479             }
22480
22481             my $token = $tokens_to_go[$max_i];
22482             my $KK    = $K_to_go[$max_i];
22483
22484             # Do not align various special side comments
22485             my $do_not_align = (
22486
22487                 # it is any specially marked side comment
22488                 ( defined($KK) && $rspecial_side_comment_type->{$KK} )
22489
22490                 # or it is a static side comment
22491                   || ( $rOpts->{'static-side-comments'}
22492                     && $token =~ /$static_side_comment_pattern/ )
22493
22494                   # or a closing side comment
22495                   || ( $types_to_go[$i_terminal] eq '}'
22496                     && $tokens_to_go[$i_terminal] eq '}'
22497                     && $token =~ /$closing_side_comment_prefix_pattern/ )
22498             );
22499
22500             # - For the specific combination -vc -nvsc, we put all side comments
22501             #   at fixed locations. Note that we will lose hanging side comment
22502             #   alignments. Otherwise, hsc's can move to strange locations.
22503             # - For -nvc -nvsc we make all side comments vertical alignments
22504             #   because the vertical aligner will check for -nvsc and be able
22505             #   to reduce the final padding to the side comments for long lines.
22506             #   and keep hanging side comments aligned.
22507             if (   !$do_not_align
22508                 && !$rOpts_valign_side_comments
22509                 && $rOpts_valign_code )
22510             {
22511
22512                 $do_not_align = 1;
22513                 my $ipad = $max_i - 1;
22514                 if ( $types_to_go[$ipad] eq 'b' ) {
22515                     my $pad_spaces =
22516                       $rOpts->{'minimum-space-to-comment'} -
22517                       $token_lengths_to_go[$ipad];
22518                     $self->pad_token( $ipad, $pad_spaces );
22519                 }
22520             }
22521
22522             if ( !$do_not_align ) {
22523                 $ralignment_type_to_go->[$max_i] = '#';
22524                 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
22525                 $ralignment_counts->[$max_line]++;
22526             }
22527         }
22528
22529         # ----------------------------------------------
22530         # Nothing more to do on this line if -nvc is set
22531         # ----------------------------------------------
22532         if ( !$rOpts_valign_code ) {
22533             return ( $ralignment_type_to_go, $ralignment_counts,
22534                 $ralignment_hash_by_line );
22535         }
22536
22537         # -------------------------------------
22538         # Loop over each line of this batch ...
22539         # -------------------------------------
22540         my $last_vertical_alignment_BEFORE_index;
22541         my $vert_last_nonblank_type;
22542         my $vert_last_nonblank_token;
22543         my $vert_last_nonblank_block_type;
22544
22545         foreach my $line ( 0 .. $max_line ) {
22546
22547             my $ibeg = $ri_first->[$line];
22548             my $iend = $ri_last->[$line];
22549
22550             next if ( $iend <= $ibeg );
22551
22552             # back up before any side comment
22553             if ( $iend > $i_terminal ) { $iend = $i_terminal }
22554
22555             my $level_beg = $levels_to_go[$ibeg];
22556             my $token_beg = $tokens_to_go[$ibeg];
22557             my $type_beg  = $types_to_go[$ibeg];
22558             my $type_beg_special_char =
22559               ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
22560
22561             $last_vertical_alignment_BEFORE_index = -1;
22562             $vert_last_nonblank_type              = $type_beg;
22563             $vert_last_nonblank_token             = $token_beg;
22564
22565             # ----------------------------------------------------------------
22566             # Initialization code merged from 'sub delete_needless_alignments'
22567             # ----------------------------------------------------------------
22568             my $i_good_paren  = -1;
22569             my $i_elsif_close = $ibeg - 1;
22570             my $i_elsif_open  = $iend + 1;
22571             my @imatch_list;
22572             if ( $type_beg eq 'k' ) {
22573
22574                 # Initialization for paren patch: mark a location of a paren we
22575                 # should keep, such as one following something like a leading
22576                 # 'if', 'elsif',
22577                 $i_good_paren = $ibeg + 1;
22578                 if ( $types_to_go[$i_good_paren] eq 'b' ) {
22579                     $i_good_paren++;
22580                 }
22581
22582                 # Initializtion for 'elsif' patch: remember the paren range of
22583                 # an elsif, and do not make alignments within them because this
22584                 # can cause loss of padding and overall brace alignment in the
22585                 # vertical aligner.
22586                 if (   $token_beg eq 'elsif'
22587                     && $i_good_paren < $iend
22588                     && $tokens_to_go[$i_good_paren] eq '(' )
22589                 {
22590                     $i_elsif_open  = $i_good_paren;
22591                     $i_elsif_close = $mate_index_to_go[$i_good_paren];
22592                 }
22593             } ## end if ( $type_beg eq 'k' )
22594
22595             # --------------------------------------------
22596             # Loop over each token in this output line ...
22597             # --------------------------------------------
22598             foreach my $i ( $ibeg + 1 .. $iend ) {
22599
22600                 next if ( $types_to_go[$i] eq 'b' );
22601
22602                 my $type           = $types_to_go[$i];
22603                 my $token          = $tokens_to_go[$i];
22604                 my $alignment_type = '';
22605
22606                 # ----------------------------------------------
22607                 # Check for 'paren patch' : Remove excess parens
22608                 # ----------------------------------------------
22609
22610                 # Excess alignment of parens can prevent other good alignments.
22611                 # For example, note the parens in the first two rows of the
22612                 # following snippet.  They would normally get marked for
22613                 # alignment and aligned as follows:
22614
22615                 #    my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
22616                 #    my $h = $rows * $cell_h +    ( $rows + 1 ) * $border;
22617                 #    my $img = new Gimp::Image( $w, $h, RGB );
22618
22619                 # This causes unnecessary paren alignment and prevents the
22620                 # third equals from aligning. If we remove the unwanted
22621                 # alignments we get:
22622
22623                 #    my $w   = $columns * $cell_w + ( $columns + 1 ) * $border;
22624                 #    my $h   = $rows * $cell_h + ( $rows + 1 ) * $border;
22625                 #    my $img = new Gimp::Image( $w, $h, RGB );
22626
22627                 # A rule for doing this which works well is to remove alignment
22628                 # of parens whose containers do not contain other aligning
22629                 # tokens, with the exception that we always keep alignment of
22630                 # the first opening paren on a line (for things like 'if' and
22631                 # 'elsif' statements).
22632                 if ( $token eq ')' && @imatch_list ) {
22633
22634                     # undo the corresponding opening paren if:
22635                     # - it is at the top of the stack
22636                     # - and not the first overall opening paren
22637                     # - does not follow a leading keyword on this line
22638                     my $imate = $mate_index_to_go[$i];
22639                     if (   $imatch_list[-1] eq $imate
22640                         && ( $ibeg > 1 || @imatch_list > 1 )
22641                         && $imate > $i_good_paren )
22642                     {
22643                         if ( $ralignment_type_to_go->[$imate] ) {
22644                             $ralignment_type_to_go->[$imate] = '';
22645                             $ralignment_counts->[$line]--;
22646                             delete $ralignment_hash_by_line->[$line]->{$imate};
22647                         }
22648                         pop @imatch_list;
22649                     }
22650                 }
22651
22652                 # do not align tokens at lower level than start of line
22653                 # except for side comments
22654                 if ( $levels_to_go[$i] < $level_beg ) {
22655                     next;
22656                 }
22657
22658                 #--------------------------------------------------------
22659                 # First see if we want to align BEFORE this token
22660                 #--------------------------------------------------------
22661
22662                 # The first possible token that we can align before
22663                 # is index 2 because: 1) it doesn't normally make sense to
22664                 # align before the first token and 2) the second
22665                 # token must be a blank if we are to align before
22666                 # the third
22667                 if ( $i < $ibeg + 2 ) { }
22668
22669                 # must follow a blank token
22670                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
22671
22672                 # otherwise, do not align two in a row to create a
22673                 # blank field
22674                 elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
22675
22676                 # align before one of these keywords
22677                 # (within a line, since $i>1)
22678                 elsif ( $type eq 'k' ) {
22679
22680                     #  /^(if|unless|and|or|eq|ne)$/
22681                     if ( $is_vertical_alignment_keyword{$token} ) {
22682                         $alignment_type = $token;
22683                     }
22684                 }
22685
22686                 # align before one of these types..
22687                 elsif ( $is_vertical_alignment_type{$type}
22688                     && !$is_not_vertical_alignment_token{$token} )
22689                 {
22690                     $alignment_type = $token;
22691
22692                     # Do not align a terminal token.  Although it might
22693                     # occasionally look ok to do this, this has been found to be
22694                     # a good general rule.  The main problems are:
22695                     # (1) that the terminal token (such as an = or :) might get
22696                     # moved far to the right where it is hard to see because
22697                     # nothing follows it, and
22698                     # (2) doing so may prevent other good alignments.
22699                     # Current exceptions are && and || and =>
22700                     if ( $i == $iend ) {
22701                         $alignment_type = ""
22702                           unless ( $is_terminal_alignment_type{$type} );
22703                     }
22704
22705                     # Do not align leading ': (' or '. ('.  This would prevent
22706                     # alignment in something like the following:
22707                     #   $extra_space .=
22708                     #       ( $input_line_number < 10 )  ? "  "
22709                     #     : ( $input_line_number < 100 ) ? " "
22710                     #     :                                "";
22711                     # or
22712                     #  $code =
22713                     #      ( $case_matters ? $accessor : " lc($accessor) " )
22714                     #    . ( $yesno        ? " eq "       : " ne " )
22715
22716                     # Also, do not align a ( following a leading ? so we can
22717                     # align something like this:
22718                     #   $converter{$_}->{ushortok} =
22719                     #     $PDL::IO::Pic::biggrays
22720                     #     ? ( m/GIF/          ? 0 : 1 )
22721                     #     : ( m/GIF|RAST|IFF/ ? 0 : 1 );
22722                     if (   $type_beg_special_char
22723                         && $i == $ibeg + 2
22724                         && $types_to_go[ $i - 1 ] eq 'b' )
22725                     {
22726                         $alignment_type = "";
22727                     }
22728
22729                     # Certain tokens only align at the same level as the
22730                     # initial line level
22731                     if (   $is_low_level_alignment_token{$token}
22732                         && $levels_to_go[$i] != $level_beg )
22733                     {
22734                         $alignment_type = "";
22735                     }
22736
22737                     # For a paren after keyword, only align something like this:
22738                     #    if    ( $a ) { &a }
22739                     #    elsif ( $b ) { &b }
22740                     if ( $token eq '(' ) {
22741
22742                         if ( $vert_last_nonblank_type eq 'k' ) {
22743                             $alignment_type = ""
22744                               unless $vert_last_nonblank_token =~
22745                               /^(if|unless|elsif)$/;
22746                         }
22747
22748                         # Do not align a spaced-function-paren if requested.
22749                         # Issue git #53, #73.
22750                         if ( !$rOpts_function_paren_vertical_alignment ) {
22751                             my $seqno = $type_sequence_to_go[$i];
22752                             if ( $ris_function_call_paren->{$seqno} ) {
22753                                 $alignment_type = "";
22754                             }
22755                         }
22756                     }
22757
22758                     # be sure the alignment tokens are unique
22759                     # This didn't work well: reason not determined
22760                     # if ($token ne $type) {$alignment_type .= $type}
22761                 }
22762
22763                 # NOTE: This is deactivated because it causes the previous
22764                 # if/elsif alignment to fail
22765                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
22766                 #{ $alignment_type = $type; }
22767
22768                 if ($alignment_type) {
22769                     $last_vertical_alignment_BEFORE_index = $i;
22770                 }
22771
22772                 #--------------------------------------------------------
22773                 # Next see if we want to align AFTER the previous nonblank
22774                 #--------------------------------------------------------
22775
22776                 # We want to line up ',' and interior ';' tokens, with the added
22777                 # space AFTER these tokens.  (Note: interior ';' is included
22778                 # because it may occur in short blocks).
22779                 elsif (
22780
22781                     # we haven't already set it
22782                     ##!$alignment_type
22783
22784                     # previous token IS one of these:
22785                     (
22786                            $vert_last_nonblank_type eq ','
22787                         || $vert_last_nonblank_type eq ';'
22788                     )
22789
22790                     # and its not the first token of the line
22791                     ## && $i > $ibeg
22792
22793                     # and it follows a blank
22794                     && $types_to_go[ $i - 1 ] eq 'b'
22795
22796                     # and it's NOT one of these
22797                     && !$is_closing_token{$type}
22798
22799                     # then go ahead and align
22800                   )
22801
22802                 {
22803                     $alignment_type = $vert_last_nonblank_type;
22804                 }
22805
22806                 #-----------------------
22807                 # Set the alignment type
22808                 #-----------------------
22809                 if ($alignment_type) {
22810
22811                     # but do not align the opening brace of an anonymous sub
22812                     if (   $token eq '{'
22813                         && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
22814                     {
22815
22816                     }
22817
22818                     # and do not make alignments within 'elsif' parens
22819                     elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
22820
22821                     }
22822
22823                     # and ignore any tokens which have leading padded spaces
22824                     # example: perl527/lop.t
22825                     elsif ( substr( $alignment_type, 0, 1 ) eq ' ' ) {
22826
22827                     }
22828
22829                     else {
22830                         $ralignment_type_to_go->[$i] = $alignment_type;
22831                         $ralignment_hash_by_line->[$line]->{$i} =
22832                           $alignment_type;
22833                         $ralignment_counts->[$line]++;
22834                         push @imatch_list, $i;
22835                     }
22836                 }
22837
22838                 $vert_last_nonblank_type  = $type;
22839                 $vert_last_nonblank_token = $token;
22840             }
22841         }
22842
22843         return ( $ralignment_type_to_go, $ralignment_counts,
22844             $ralignment_hash_by_line );
22845     } ## end sub set_vertical_alignment_markers
22846 } ## end closure set_vertical_alignment_markers
22847
22848 sub make_vertical_alignments {
22849     my ( $self, $ri_first, $ri_last ) = @_;
22850
22851     #----------------------------
22852     # Shortcut for a single token
22853     #----------------------------
22854     if ( $max_index_to_go == 0 ) {
22855         if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
22856             my $rtokens   = [];
22857             my $rfields   = [ $tokens_to_go[0] ];
22858             my $rpatterns = [ $types_to_go[0] ];
22859             my $rfield_lengths =
22860               [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
22861             return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
22862         }
22863
22864         # Strange line packing, not fatal but should not happen
22865         elsif (DEVEL_MODE) {
22866             my $max_line = @{$ri_first} - 1;
22867             my $ibeg     = $ri_first->[0];
22868             my $iend     = $ri_last->[0];
22869             my $tok_b    = $tokens_to_go[$ibeg];
22870             my $tok_e    = $tokens_to_go[$iend];
22871             my $type_b   = $types_to_go[$ibeg];
22872             my $type_e   = $types_to_go[$iend];
22873             Fault(
22874 "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
22875             );
22876         }
22877     }
22878
22879     #---------------------------------------------------------
22880     # Step 1: Define the alignment tokens for the entire batch
22881     #---------------------------------------------------------
22882     my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
22883       = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
22884
22885     #----------------------------------------------
22886     # Step 2: Break each line into alignment fields
22887     #----------------------------------------------
22888     my $rline_alignments = [];
22889     my $max_line         = @{$ri_first} - 1;
22890     foreach my $line ( 0 .. $max_line ) {
22891
22892         my $ibeg = $ri_first->[$line];
22893         my $iend = $ri_last->[$line];
22894
22895         my $rtok_fld_pat_len = $self->make_alignment_patterns(
22896             $ibeg, $iend, $ralignment_type_to_go,
22897             $ralignment_counts->[$line],
22898             $ralignment_hash_by_line->[$line]
22899         );
22900         push @{$rline_alignments}, $rtok_fld_pat_len;
22901     }
22902     return $rline_alignments;
22903 } ## end sub make_vertical_alignments
22904
22905 sub get_seqno {
22906
22907     # get opening and closing sequence numbers of a token for the vertical
22908     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
22909     # to be treated somewhat like opening and closing tokens for stacking
22910     # tokens by the vertical aligner.
22911     my ( $self, $ii, $ending_in_quote ) = @_;
22912
22913     my $rLL = $self->[_rLL_];
22914
22915     my $KK    = $K_to_go[$ii];
22916     my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
22917
22918     if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
22919         my $SEQ_QW = -1;
22920         my $token  = $rLL->[$KK]->[_TOKEN_];
22921         if ( $ii > 0 ) {
22922             $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
22923         }
22924         else {
22925             if ( !$ending_in_quote ) {
22926                 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
22927             }
22928         }
22929     }
22930     return ($seqno);
22931 }
22932
22933 {
22934     my %undo_extended_ci;
22935
22936     sub initialize_undo_ci {
22937         %undo_extended_ci = ();
22938         return;
22939     }
22940
22941     sub undo_ci {
22942
22943         # Undo continuation indentation in certain sequences
22944         my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
22945         my ( $line_1, $line_2, $lev_last );
22946         my $this_line_is_semicolon_terminated;
22947         my $max_line = @{$ri_first} - 1;
22948
22949         my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
22950
22951         # Prepare a list of controlling indexes for each line if required.
22952         # This is used for efficient processing below.  Note: this is
22953         # critical for speed. In the initial implementation I just looped
22954         # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
22955         # found that this routine was causing a huge run time in large lists.
22956         # On a very large list test case, this new coding dropped the run time
22957         # of this routine from 30 seconds to 169 milliseconds.
22958         my @i_controlling_ci;
22959         if ( @{$rix_seqno_controlling_ci} ) {
22960             my @tmp     = reverse @{$rix_seqno_controlling_ci};
22961             my $ix_next = pop @tmp;
22962             foreach my $line ( 0 .. $max_line ) {
22963                 my $iend = $ri_last->[$line];
22964                 while ( defined($ix_next) && $ix_next <= $iend ) {
22965                     push @{ $i_controlling_ci[$line] }, $ix_next;
22966                     $ix_next = pop @tmp;
22967                 }
22968             }
22969         }
22970
22971         # Loop over all lines of the batch ...
22972
22973         # Workaround originally created for problem c007, in which the
22974         # combination -lp -xci could produce a "Program bug" message in unusual
22975         # circumstances.
22976         my $skip_SECTION_1;
22977         if (   $rOpts_line_up_parentheses
22978             && $rOpts_extended_continuation_indentation )
22979         {
22980
22981             # Only set this flag if -lp is actually used here
22982             foreach my $line ( 0 .. $max_line ) {
22983                 my $ibeg = $ri_first->[$line];
22984                 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
22985                     $skip_SECTION_1 = 1;
22986                     last;
22987                 }
22988             }
22989         }
22990
22991         foreach my $line ( 0 .. $max_line ) {
22992
22993             my $ibeg = $ri_first->[$line];
22994             my $iend = $ri_last->[$line];
22995             my $lev  = $levels_to_go[$ibeg];
22996
22997             #-----------------------------------
22998             # SECTION 1: Undo needless common CI
22999             #-----------------------------------
23000
23001             # We are looking at leading tokens and looking for a sequence all
23002             # at the same level and all at a higher level than enclosing lines.
23003
23004             # For example, we can undo continuation indentation in sort/map/grep
23005             # chains
23006
23007             #    my $dat1 = pack( "n*",
23008             #        map { $_, $lookup->{$_} }
23009             #          sort { $a <=> $b }
23010             #          grep { $lookup->{$_} ne $default } keys %$lookup );
23011
23012             # to become
23013
23014             #    my $dat1 = pack( "n*",
23015             #        map { $_, $lookup->{$_} }
23016             #        sort { $a <=> $b }
23017             #        grep { $lookup->{$_} ne $default } keys %$lookup );
23018
23019             if ( $line > 0 && !$skip_SECTION_1 ) {
23020
23021                 # if we have started a chain..
23022                 if ($line_1) {
23023
23024                     # see if it continues..
23025                     if ( $lev == $lev_last ) {
23026                         if (   $types_to_go[$ibeg] eq 'k'
23027                             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23028                         {
23029
23030                             # chain continues...
23031                             # check for chain ending at end of a statement
23032                             if ( $line == $max_line ) {
23033
23034                                 # see of this line ends a statement
23035                                 $this_line_is_semicolon_terminated =
23036                                   $types_to_go[$iend] eq ';'
23037
23038                                   # with possible side comment
23039                                   || ( $types_to_go[$iend] eq '#'
23040                                     && $iend - $ibeg >= 2
23041                                     && $types_to_go[ $iend - 2 ] eq ';'
23042                                     && $types_to_go[ $iend - 1 ] eq 'b' );
23043                             }
23044                             $line_2 = $line
23045                               if ($this_line_is_semicolon_terminated);
23046                         }
23047                         else {
23048
23049                             # kill chain
23050                             $line_1 = undef;
23051                         }
23052                     }
23053                     elsif ( $lev < $lev_last ) {
23054
23055                         # chain ends with previous line
23056                         $line_2 = $line - 1;
23057                     }
23058                     elsif ( $lev > $lev_last ) {
23059
23060                         # kill chain
23061                         $line_1 = undef;
23062                     }
23063
23064                     # undo the continuation indentation if a chain ends
23065                     if ( defined($line_2) && defined($line_1) ) {
23066                         my $continuation_line_count = $line_2 - $line_1 + 1;
23067                         @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
23068                           = (0) x ($continuation_line_count)
23069                           if ( $continuation_line_count >= 0 );
23070                         @leading_spaces_to_go[ @{$ri_first}
23071                           [ $line_1 .. $line_2 ] ] =
23072                           @reduced_spaces_to_go[ @{$ri_first}
23073                           [ $line_1 .. $line_2 ] ];
23074                         $line_1 = undef;
23075                     }
23076                 }
23077
23078                 # not in a chain yet..
23079                 else {
23080
23081                     # look for start of a new sort/map/grep chain
23082                     if ( $lev > $lev_last ) {
23083                         if (   $types_to_go[$ibeg] eq 'k'
23084                             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23085                         {
23086                             $line_1 = $line;
23087                         }
23088                     }
23089                 }
23090             }
23091
23092             #-------------------------------------
23093             # SECTION 2: Undo ci at cuddled blocks
23094             #-------------------------------------
23095
23096             # Note that sub final_indentation_adjustment will be called later to
23097             # actually do this, but for now we will tentatively mark cuddled
23098             # lines with ci=0 so that the the -xci loop which follows will be
23099             # correct at cuddles.
23100             if (
23101                 $types_to_go[$ibeg] eq '}'
23102                 && ( $nesting_depth_to_go[$iend] + 1 ==
23103                     $nesting_depth_to_go[$ibeg] )
23104               )
23105             {
23106                 my $terminal_type = $types_to_go[$iend];
23107                 if ( $terminal_type eq '#' && $iend > $ibeg ) {
23108                     $terminal_type = $types_to_go[ $iend - 1 ];
23109                     if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
23110                         $terminal_type = $types_to_go[ $iend - 2 ];
23111                     }
23112                 }
23113                 if ( $terminal_type eq '{' ) {
23114                     my $Kbeg = $K_to_go[$ibeg];
23115                     $ci_levels_to_go[$ibeg] = 0;
23116                 }
23117             }
23118
23119             #--------------------------------------------------------
23120             # SECTION 3: Undo ci set by sub extended_ci if not needed
23121             #--------------------------------------------------------
23122
23123             # Undo the ci of the leading token if its controlling token
23124             # went out on a previous line without ci
23125             if ( $ci_levels_to_go[$ibeg] ) {
23126                 my $Kbeg  = $K_to_go[$ibeg];
23127                 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
23128                 if ( $seqno && $undo_extended_ci{$seqno} ) {
23129
23130                     # but do not undo ci set by the -lp flag
23131                     if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
23132                         $ci_levels_to_go[$ibeg] = 0;
23133                         $leading_spaces_to_go[$ibeg] =
23134                           $reduced_spaces_to_go[$ibeg];
23135                     }
23136                 }
23137             }
23138
23139             # Flag any controlling opening tokens in lines without ci.  This
23140             # will be used later in the above if statement to undo the ci which
23141             # they added.  The array i_controlling_ci[$line] was prepared at
23142             # the top of this routine.
23143             if ( !$ci_levels_to_go[$ibeg]
23144                 && defined( $i_controlling_ci[$line] ) )
23145             {
23146                 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
23147                     my $seqno = $type_sequence_to_go[$i];
23148                     $undo_extended_ci{$seqno} = 1;
23149                 }
23150             }
23151
23152             $lev_last = $lev;
23153         }
23154
23155         return;
23156     }
23157 }
23158
23159 {    ## begin closure set_logical_padding
23160     my %is_math_op;
23161
23162     BEGIN {
23163
23164         my @q = qw( + - * / );
23165         @is_math_op{@q} = (1) x scalar(@q);
23166     }
23167
23168     sub set_logical_padding {
23169
23170         # Look at a batch of lines and see if extra padding can improve the
23171         # alignment when there are certain leading operators. Here is an
23172         # example, in which some extra space is introduced before
23173         # '( $year' to make it line up with the subsequent lines:
23174         #
23175         #       if (   ( $Year < 1601 )
23176         #           || ( $Year > 2899 )
23177         #           || ( $EndYear < 1601 )
23178         #           || ( $EndYear > 2899 ) )
23179         #       {
23180         #           &Error_OutOfRange;
23181         #       }
23182         #
23183         my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
23184           = @_;
23185         my $max_line = @{$ri_first} - 1;
23186
23187         my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
23188             $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
23189
23190         # Patch to produce padding in the first line of short code blocks.
23191         # This is part of an update to fix cases b562 .. b983.
23192         # This is needed to compensate for a change which was made in 'sub
23193         # starting_one_line_block' to prevent blinkers.  Previously, that sub
23194         # would not look at the total block size and rely on sub
23195         # break_long_lines to break up long blocks. Consequently, the
23196         # first line of those batches would end in the opening block brace of a
23197         # sort/map/grep/eval block.  When this was changed to immediately check
23198         # for blocks which were too long, the opening block brace would go out
23199         # in a single batch, and the block contents would go out as the next
23200         # batch.  This caused the logic in this routine which decides if the
23201         # first line should be padded to be incorrect.  To fix this, we set a
23202         # flag if the previous batch ended in an opening sort/map/grep/eval
23203         # block brace, and use it to adjust the logic to compensate.
23204
23205         # For example, the following would have previously been a single batch
23206         # but now is two batches.  We want to pad the line starting in '$dir':
23207         #    my (@indices) =                      # batch n-1  (prev batch n)
23208         #      sort {                             # batch n-1  (prev batch n)
23209         #            $dir eq 'left'               # batch n
23210         #          ? $cells[$a] <=> $cells[$b]    # batch n
23211         #          : $cells[$b] <=> $cells[$a];   # batch n
23212         #      } ( 0 .. $#cells );                # batch n
23213
23214         my $rLL                  = $self->[_rLL_];
23215         my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
23216
23217         my $is_short_block;
23218         if ( $K_to_go[0] > 0 ) {
23219             my $Kp = $K_to_go[0] - 1;
23220             if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23221                 $Kp -= 1;
23222             }
23223             if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
23224                 $Kp -= 1;
23225                 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23226                     $Kp -= 1;
23227                 }
23228             }
23229             my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
23230             if ($seqno) {
23231                 my $block_type = $rblock_type_of_seqno->{$seqno};
23232                 if ($block_type) {
23233                     $is_short_block = $is_sort_map_grep_eval{$block_type};
23234                     $is_short_block ||= $want_one_line_block{$block_type};
23235                 }
23236             }
23237         }
23238
23239         # looking at each line of this batch..
23240         foreach my $line ( 0 .. $max_line - 1 ) {
23241
23242             # see if the next line begins with a logical operator
23243             $ibeg      = $ri_first->[$line];
23244             $iend      = $ri_last->[$line];
23245             $ibeg_next = $ri_first->[ $line + 1 ];
23246             $tok_next  = $tokens_to_go[$ibeg_next];
23247             $type_next = $types_to_go[$ibeg_next];
23248
23249             $has_leading_op_next = ( $tok_next =~ /^\w/ )
23250               ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
23251               : $is_chain_operator{$type_next};    # and, or
23252
23253             next unless ($has_leading_op_next);
23254
23255             # next line must not be at lesser depth
23256             next
23257               if ( $nesting_depth_to_go[$ibeg] >
23258                 $nesting_depth_to_go[$ibeg_next] );
23259
23260             # identify the token in this line to be padded on the left
23261             $ipad = undef;
23262
23263             # handle lines at same depth...
23264             if ( $nesting_depth_to_go[$ibeg] ==
23265                 $nesting_depth_to_go[$ibeg_next] )
23266             {
23267
23268                 # if this is not first line of the batch ...
23269                 if ( $line > 0 ) {
23270
23271                     # and we have leading operator..
23272                     next if $has_leading_op;
23273
23274                     # Introduce padding if..
23275                     # 1. the previous line is at lesser depth, or
23276                     # 2. the previous line ends in an assignment
23277                     # 3. the previous line ends in a 'return'
23278                     # 4. the previous line ends in a comma
23279                     # Example 1: previous line at lesser depth
23280                     #       if (   ( $Year < 1601 )      # <- we are here but
23281                     #           || ( $Year > 2899 )      #  list has not yet
23282                     #           || ( $EndYear < 1601 )   # collapsed vertically
23283                     #           || ( $EndYear > 2899 ) )
23284                     #       {
23285                     #
23286                     # Example 2: previous line ending in assignment:
23287                     #    $leapyear =
23288                     #        $year % 4   ? 0     # <- We are here
23289                     #      : $year % 100 ? 1
23290                     #      : $year % 400 ? 0
23291                     #      : 1;
23292                     #
23293                     # Example 3: previous line ending in comma:
23294                     #    push @expr,
23295                     #        /test/   ? undef
23296                     #      : eval($_) ? 1
23297                     #      : eval($_) ? 1
23298                     #      :            0;
23299
23300                    # be sure levels agree (do not indent after an indented 'if')
23301                     next
23302                       if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
23303
23304                     # allow padding on first line after a comma but only if:
23305                     # (1) this is line 2 and
23306                     # (2) there are at more than three lines and
23307                     # (3) lines 3 and 4 have the same leading operator
23308                     # These rules try to prevent padding within a long
23309                     # comma-separated list.
23310                     my $ok_comma;
23311                     if (   $types_to_go[$iendm] eq ','
23312                         && $line == 1
23313                         && $max_line > 2 )
23314                     {
23315                         my $ibeg_next_next = $ri_first->[ $line + 2 ];
23316                         my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
23317                         $ok_comma = $tok_next_next eq $tok_next;
23318                     }
23319
23320                     next
23321                       unless (
23322                            $is_assignment{ $types_to_go[$iendm] }
23323                         || $ok_comma
23324                         || ( $nesting_depth_to_go[$ibegm] <
23325                             $nesting_depth_to_go[$ibeg] )
23326                         || (   $types_to_go[$iendm] eq 'k'
23327                             && $tokens_to_go[$iendm] eq 'return' )
23328                       );
23329
23330                     # we will add padding before the first token
23331                     $ipad = $ibeg;
23332                 }
23333
23334                 # for first line of the batch..
23335                 else {
23336
23337                     # WARNING: Never indent if first line is starting in a
23338                     # continued quote, which would change the quote.
23339                     next if $starting_in_quote;
23340
23341                     # if this is text after closing '}'
23342                     # then look for an interior token to pad
23343                     if ( $types_to_go[$ibeg] eq '}' ) {
23344
23345                     }
23346
23347                     # otherwise, we might pad if it looks really good
23348                     elsif ($is_short_block) {
23349                         $ipad = $ibeg;
23350                     }
23351                     else {
23352
23353                         # we might pad token $ibeg, so be sure that it
23354                         # is at the same depth as the next line.
23355                         next
23356                           if ( $nesting_depth_to_go[$ibeg] !=
23357                             $nesting_depth_to_go[$ibeg_next] );
23358
23359                         # We can pad on line 1 of a statement if at least 3
23360                         # lines will be aligned. Otherwise, it
23361                         # can look very confusing.
23362
23363                  # We have to be careful not to pad if there are too few
23364                  # lines.  The current rule is:
23365                  # (1) in general we require at least 3 consecutive lines
23366                  # with the same leading chain operator token,
23367                  # (2) but an exception is that we only require two lines
23368                  # with leading colons if there are no more lines.  For example,
23369                  # the first $i in the following snippet would get padding
23370                  # by the second rule:
23371                  #
23372                  #   $i == 1 ? ( "First", "Color" )
23373                  # : $i == 2 ? ( "Then",  "Rarity" )
23374                  # :           ( "Then",  "Name" );
23375
23376                         if ( $max_line > 1 ) {
23377                             my $leading_token = $tokens_to_go[$ibeg_next];
23378                             my $tokens_differ;
23379
23380                             # never indent line 1 of a '.' series because
23381                             # previous line is most likely at same level.
23382                             # TODO: we should also look at the leading_spaces
23383                             # of the last output line and skip if it is same
23384                             # as this line.
23385                             next if ( $leading_token eq '.' );
23386
23387                             my $count = 1;
23388                             foreach my $l ( 2 .. 3 ) {
23389                                 last if ( $line + $l > $max_line );
23390                                 my $ibeg_next_next = $ri_first->[ $line + $l ];
23391                                 if ( $tokens_to_go[$ibeg_next_next] ne
23392                                     $leading_token )
23393                                 {
23394                                     $tokens_differ = 1;
23395                                     last;
23396                                 }
23397                                 $count++;
23398                             }
23399                             next if ($tokens_differ);
23400                             next if ( $count < 3 && $leading_token ne ':' );
23401                             $ipad = $ibeg;
23402                         }
23403                         else {
23404                             next;
23405                         }
23406                     }
23407                 }
23408             }
23409
23410             # find interior token to pad if necessary
23411             if ( !defined($ipad) ) {
23412
23413                 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
23414
23415                     # find any unclosed container
23416                     next
23417                       unless ( $type_sequence_to_go[$i]
23418                         && $mate_index_to_go[$i] > $iend );
23419
23420                     # find next nonblank token to pad
23421                     $ipad = $inext_to_go[$i];
23422                     last if ( $ipad > $iend );
23423                 }
23424                 last unless $ipad;
23425             }
23426
23427             # We cannot pad the first leading token of a file because
23428             # it could cause a bug in which the starting indentation
23429             # level is guessed incorrectly each time the code is run
23430             # though perltidy, thus causing the code to march off to
23431             # the right.  For example, the following snippet would have
23432             # this problem:
23433
23434 ##     ov_method mycan( $package, '(""' ),       $package
23435 ##  or ov_method mycan( $package, '(0+' ),       $package
23436 ##  or ov_method mycan( $package, '(bool' ),     $package
23437 ##  or ov_method mycan( $package, '(nomethod' ), $package;
23438
23439             # If this snippet is within a block this won't happen
23440             # unless the user just processes the snippet alone within
23441             # an editor.  In that case either the user will see and
23442             # fix the problem or it will be corrected next time the
23443             # entire file is processed with perltidy.
23444             next if ( $ipad == 0 && $peak_batch_size <= 1 );
23445
23446 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
23447 ## IT DID MORE HARM THAN GOOD
23448 ##            ceil(
23449 ##                      $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
23450 ##                    / $upem
23451 ##            ),
23452 ##?            # do not put leading padding for just 2 lines of math
23453 ##?            if (   $ipad == $ibeg
23454 ##?                && $line > 0
23455 ##?                && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
23456 ##?                && $is_math_op{$type_next}
23457 ##?                && $line + 2 <= $max_line )
23458 ##?            {
23459 ##?                my $ibeg_next_next = $ri_first->[ $line + 2 ];
23460 ##?                my $type_next_next = $types_to_go[$ibeg_next_next];
23461 ##?                next if !$is_math_op{$type_next_next};
23462 ##?            }
23463
23464             # next line must not be at greater depth
23465             my $iend_next = $ri_last->[ $line + 1 ];
23466             next
23467               if ( $nesting_depth_to_go[ $iend_next + 1 ] >
23468                 $nesting_depth_to_go[$ipad] );
23469
23470             # lines must be somewhat similar to be padded..
23471             my $inext_next = $inext_to_go[$ibeg_next];
23472             my $type       = $types_to_go[$ipad];
23473             my $type_next  = $types_to_go[ $ipad + 1 ];
23474
23475             # see if there are multiple continuation lines
23476             my $logical_continuation_lines = 1;
23477             if ( $line + 2 <= $max_line ) {
23478                 my $leading_token  = $tokens_to_go[$ibeg_next];
23479                 my $ibeg_next_next = $ri_first->[ $line + 2 ];
23480                 if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
23481                     && $nesting_depth_to_go[$ibeg_next] eq
23482                     $nesting_depth_to_go[$ibeg_next_next] )
23483                 {
23484                     $logical_continuation_lines++;
23485                 }
23486             }
23487
23488             # see if leading types match
23489             my $types_match = $types_to_go[$inext_next] eq $type;
23490             my $matches_without_bang;
23491
23492             # if first line has leading ! then compare the following token
23493             if ( !$types_match && $type eq '!' ) {
23494                 $types_match = $matches_without_bang =
23495                   $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
23496             }
23497             if (
23498
23499                 # either we have multiple continuation lines to follow
23500                 # and we are not padding the first token
23501                 (
23502                     $logical_continuation_lines > 1
23503                     && ( $ipad > 0 || $is_short_block )
23504                 )
23505
23506                 # or..
23507                 || (
23508
23509                     # types must match
23510                     $types_match
23511
23512                     # and keywords must match if keyword
23513                     && !(
23514                            $type eq 'k'
23515                         && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
23516                     )
23517                 )
23518               )
23519             {
23520
23521                 #----------------------begin special checks--------------
23522                 #
23523                 # SPECIAL CHECK 1:
23524                 # A check is needed before we can make the pad.
23525                 # If we are in a list with some long items, we want each
23526                 # item to stand out.  So in the following example, the
23527                 # first line beginning with '$casefold->' would look good
23528                 # padded to align with the next line, but then it
23529                 # would be indented more than the last line, so we
23530                 # won't do it.
23531                 #
23532                 #  ok(
23533                 #      $casefold->{code}         eq '0041'
23534                 #        && $casefold->{status}  eq 'C'
23535                 #        && $casefold->{mapping} eq '0061',
23536                 #      'casefold 0x41'
23537                 #  );
23538                 #
23539                 # Note:
23540                 # It would be faster, and almost as good, to use a comma
23541                 # count, and not pad if comma_count > 1 and the previous
23542                 # line did not end with a comma.
23543                 #
23544                 my $ok_to_pad = 1;
23545
23546                 my $ibg   = $ri_first->[ $line + 1 ];
23547                 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
23548
23549                 # just use simplified formula for leading spaces to avoid
23550                 # needless sub calls
23551                 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
23552
23553                 # look at each line beyond the next ..
23554                 my $l = $line + 1;
23555                 foreach my $ltest ( $line + 2 .. $max_line ) {
23556                     $l = $ltest;
23557                     my $ibg = $ri_first->[$l];
23558
23559                     # quit looking at the end of this container
23560                     last
23561                       if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
23562                       || ( $nesting_depth_to_go[$ibg] < $depth );
23563
23564                     # cannot do the pad if a later line would be
23565                     # outdented more
23566                     if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
23567                         $ok_to_pad = 0;
23568                         last;
23569                     }
23570                 }
23571
23572                 # don't pad if we end in a broken list
23573                 if ( $l == $max_line ) {
23574                     my $i2 = $ri_last->[$l];
23575                     if ( $types_to_go[$i2] eq '#' ) {
23576                         my $i1 = $ri_first->[$l];
23577                         next if terminal_type_i( $i1, $i2 ) eq ',';
23578                     }
23579                 }
23580
23581                 # SPECIAL CHECK 2:
23582                 # a minus may introduce a quoted variable, and we will
23583                 # add the pad only if this line begins with a bare word,
23584                 # such as for the word 'Button' here:
23585                 #    [
23586                 #         Button      => "Print letter \"~$_\"",
23587                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
23588                 #        -accelerator => "Meta+$_"
23589                 #    ];
23590                 #
23591                 #  On the other hand, if 'Button' is quoted, it looks best
23592                 #  not to pad:
23593                 #    [
23594                 #        'Button'     => "Print letter \"~$_\"",
23595                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
23596                 #        -accelerator => "Meta+$_"
23597                 #    ];
23598                 if ( $types_to_go[$ibeg_next] eq 'm' ) {
23599                     $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
23600                 }
23601
23602                 next unless $ok_to_pad;
23603
23604                 #----------------------end special check---------------
23605
23606                 my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
23607                 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
23608                 $pad_spaces = $length_2 - $length_1;
23609
23610                 # If the first line has a leading ! and the second does
23611                 # not, then remove one space to try to align the next
23612                 # leading characters, which are often the same.  For example:
23613                 #  if (  !$ts
23614                 #      || $ts == $self->Holder
23615                 #      || $self->Holder->Type eq "Arena" )
23616                 #
23617                 # This usually helps readability, but if there are subsequent
23618                 # ! operators things will still get messed up.  For example:
23619                 #
23620                 #  if (  !exists $Net::DNS::typesbyname{$qtype}
23621                 #      && exists $Net::DNS::classesbyname{$qtype}
23622                 #      && !exists $Net::DNS::classesbyname{$qclass}
23623                 #      && exists $Net::DNS::typesbyname{$qclass} )
23624                 # We can't fix that.
23625                 if ($matches_without_bang) { $pad_spaces-- }
23626
23627                 # make sure this won't change if -lp is used
23628                 my $indentation_1 = $leading_spaces_to_go[$ibeg];
23629                 if ( ref($indentation_1)
23630                     && $indentation_1->get_recoverable_spaces() == 0 )
23631                 {
23632                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
23633                     if ( ref($indentation_2)
23634                         && $indentation_2->get_recoverable_spaces() != 0 )
23635                     {
23636                         $pad_spaces = 0;
23637                     }
23638                 }
23639
23640                 # we might be able to handle a pad of -1 by removing a blank
23641                 # token
23642                 if ( $pad_spaces < 0 ) {
23643
23644                     # Deactivated for -kpit due to conflict. This block deletes
23645                     # a space in an attempt to improve alignment in some cases,
23646                     # but it may conflict with user spacing requests.  For now
23647                     # it is just deactivated if the -kpit option is used.
23648                     if ( $pad_spaces == -1 ) {
23649                         if (   $ipad > $ibeg
23650                             && $types_to_go[ $ipad - 1 ] eq 'b'
23651                             && !%keyword_paren_inner_tightness )
23652                         {
23653                             $self->pad_token( $ipad - 1, $pad_spaces );
23654                         }
23655                     }
23656                     $pad_spaces = 0;
23657                 }
23658
23659                 # now apply any padding for alignment
23660                 if ( $ipad >= 0 && $pad_spaces ) {
23661
23662                     my $length_t = total_line_length( $ibeg, $iend );
23663                     if ( $pad_spaces + $length_t <=
23664                         $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
23665                     {
23666                         $self->pad_token( $ipad, $pad_spaces );
23667                     }
23668                 }
23669             }
23670         }
23671         continue {
23672             $iendm          = $iend;
23673             $ibegm          = $ibeg;
23674             $has_leading_op = $has_leading_op_next;
23675         } ## end of loop over lines
23676         return;
23677     }
23678 } ## end closure set_logical_padding
23679
23680 sub pad_token {
23681
23682     # insert $pad_spaces before token number $ipad
23683     my ( $self, $ipad, $pad_spaces ) = @_;
23684     my $rLL     = $self->[_rLL_];
23685     my $KK      = $K_to_go[$ipad];
23686     my $tok     = $rLL->[$KK]->[_TOKEN_];
23687     my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
23688
23689     if ( $pad_spaces > 0 ) {
23690         $tok = ' ' x $pad_spaces . $tok;
23691         $tok_len += $pad_spaces;
23692     }
23693     elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
23694         $tok     = "";
23695         $tok_len = 0;
23696     }
23697     else {
23698
23699         # shouldn't happen
23700         return;
23701     }
23702
23703     $tok     = $rLL->[$KK]->[_TOKEN_]        = $tok;
23704     $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
23705
23706     $token_lengths_to_go[$ipad] += $pad_spaces;
23707     $tokens_to_go[$ipad] = $tok;
23708
23709     foreach my $i ( $ipad .. $max_index_to_go ) {
23710         $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
23711     }
23712     return;
23713 }
23714
23715 {    ## begin closure make_alignment_patterns
23716
23717     my %keyword_map;
23718     my %operator_map;
23719     my %is_w_n_C;
23720     my %is_my_local_our;
23721     my %is_kwU;
23722     my %is_use_like;
23723     my %is_binary_type;
23724     my %is_binary_keyword;
23725     my %name_map;
23726
23727     BEGIN {
23728
23729         # Note: %block_type_map is now global to enable the -gal=s option
23730
23731         # map certain keywords to the same 'if' class to align
23732         # long if/elsif sequences. [elsif.pl]
23733         %keyword_map = (
23734             'unless'  => 'if',
23735             'else'    => 'if',
23736             'elsif'   => 'if',
23737             'when'    => 'given',
23738             'default' => 'given',
23739             'case'    => 'switch',
23740
23741             # treat an 'undef' similar to numbers and quotes
23742             'undef' => 'Q',
23743         );
23744
23745         # map certain operators to the same class for pattern matching
23746         %operator_map = (
23747             '!~' => '=~',
23748             '+=' => '+=',
23749             '-=' => '+=',
23750             '*=' => '+=',
23751             '/=' => '+=',
23752         );
23753
23754         %is_w_n_C = (
23755             'w' => 1,
23756             'n' => 1,
23757             'C' => 1,
23758         );
23759
23760         # leading keywords which to skip for efficiency when making parenless
23761         # container names
23762         my @q = qw( my local our return );
23763         @{is_my_local_our}{@q} = (1) x scalar(@q);
23764
23765         # leading keywords where we should just join one token to form
23766         # parenless name
23767         @q = qw( use );
23768         @{is_use_like}{@q} = (1) x scalar(@q);
23769
23770         # leading token types which may be used to make a container name
23771         @q = qw( k w U );
23772         @{is_kwU}{@q} = (1) x scalar(@q);
23773
23774         # token types which prevent using leading word as a container name
23775         @q = qw(
23776           x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
23777           &= // >> ~. &. |. ^.
23778           **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
23779         );
23780         push @q, ',';
23781         @{is_binary_type}{@q} = (1) x scalar(@q);
23782
23783         # token keywords which prevent using leading word as a container name
23784         @_ = qw(and or err eq ne cmp);
23785         @is_binary_keyword{@_} = (1) x scalar(@_);
23786
23787         # Some common function calls whose args can be aligned.  These do not
23788         # give good alignments if the lengths differ significantly.
23789         %name_map = (
23790             'unlike' => 'like',
23791             'isnt'   => 'is',
23792             ##'is_deeply' => 'is',   # poor; names lengths too different
23793         );
23794
23795     }
23796
23797     sub make_alignment_patterns {
23798
23799         # Here we do some important preliminary work for the
23800         # vertical aligner.  We create four arrays for one
23801         # output line. These arrays contain strings that can
23802         # be tested by the vertical aligner to see if
23803         # consecutive lines can be aligned vertically.
23804         #
23805         # The four arrays are indexed on the vertical
23806         # alignment fields and are:
23807         # @tokens - a list of any vertical alignment tokens for this line.
23808         #   These are tokens, such as '=' '&&' '#' etc which
23809         #   we want to might align vertically.  These are
23810         #   decorated with various information such as
23811         #   nesting depth to prevent unwanted vertical
23812         #   alignment matches.
23813         # @fields - the actual text of the line between the vertical alignment
23814         #   tokens.
23815         # @patterns - a modified list of token types, one for each alignment
23816         #   field.  These should normally each match before alignment is
23817         #   allowed, even when the alignment tokens match.
23818         # @field_lengths - the display width of each field
23819
23820         my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
23821             $ralignment_hash )
23822           = @_;
23823
23824         # The var $ralignment_hash contains all of the alignments for this
23825         # line.  It is not yet used but is available for future coding in case
23826         # there is a need to do a preliminary scan of the alignment tokens.
23827         if (DEVEL_MODE) {
23828             my $new_count = 0;
23829             if ( defined($ralignment_hash) ) {
23830                 $new_count = keys %{$ralignment_hash};
23831             }
23832             my $old_count = $alignment_count;
23833             $old_count = 0 unless ($old_count);
23834             if ( $new_count != $old_count ) {
23835                 my $K   = $K_to_go[$ibeg];
23836                 my $rLL = $self->[_rLL_];
23837                 my $lnl = $rLL->[$K]->[_LINE_INDEX_];
23838                 Fault(
23839 "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
23840                 );
23841             }
23842         }
23843
23844         # -------------------------------------
23845         # Shortcut for lines without alignments
23846         # -------------------------------------
23847         if ( !$alignment_count ) {
23848             my $rtokens        = [];
23849             my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
23850                   $summed_lengths_to_go[$ibeg] ];
23851             my $rpatterns;
23852             my $rfields;
23853             if ( $ibeg == $iend ) {
23854                 $rfields   = [ $tokens_to_go[$ibeg] ];
23855                 $rpatterns = [ $types_to_go[$ibeg] ];
23856             }
23857             else {
23858                 $rfields   = [ join( '', @tokens_to_go[ $ibeg .. $iend ] ) ];
23859                 $rpatterns = [ join( '', @types_to_go[ $ibeg .. $iend ] ) ];
23860             }
23861             return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
23862         }
23863
23864         my $i_start        = $ibeg;
23865         my $depth          = 0;
23866         my %container_name = ( 0 => "" );
23867
23868         my @tokens        = ();
23869         my @fields        = ();
23870         my @patterns      = ();
23871         my @field_lengths = ();
23872
23873         #-------------------------------------------------------------
23874         # Make a container name for any uncontained commas, issue c089
23875         #-------------------------------------------------------------
23876         # This is a generalization of the fix for rt136416 which was a
23877         # specialized patch just for 'use Module' statements.
23878         # We restrict this to semicolon-terminated statements; that way
23879         # we know that the top level commas are not in a list container.
23880         if ( $ibeg == 0 && $iend == $max_index_to_go ) {
23881             my $iterm = $max_index_to_go;
23882             if ( $types_to_go[$iterm] eq '#' ) {
23883                 $iterm = $iprev_to_go[$iterm];
23884             }
23885
23886             # Alignment lines ending like '=> sub {';  fixes issue c093
23887             my $term_type_ok = $types_to_go[$iterm] eq ';';
23888             $term_type_ok ||=
23889               $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
23890
23891             if (   $iterm > $ibeg
23892                 && $term_type_ok
23893                 && !$is_my_local_our{ $tokens_to_go[$ibeg] }
23894                 && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
23895             {
23896
23897                 # Make a container name by combining all leading barewords,
23898                 # keywords and functions.
23899                 my $name  = "";
23900                 my $count = 0;
23901                 my $count_max;
23902                 my $iname_end;
23903                 my $ilast_blank;
23904                 for ( $ibeg .. $iterm ) {
23905                     my $type = $types_to_go[$_];
23906
23907                     if ( $type eq 'b' ) {
23908                         $ilast_blank = $_;
23909                         next;
23910                     }
23911
23912                     my $token = $tokens_to_go[$_];
23913
23914                     # Give up if we find an opening paren, binary operator or
23915                     # comma within or after the proposed container name.
23916                     if (   $token eq '('
23917                         || $is_binary_type{$type}
23918                         || $type eq 'k' && $is_binary_keyword{$token} )
23919                     {
23920                         $name = "";
23921                         last;
23922                     }
23923
23924                     # The container name is only built of certain types:
23925                     last if ( !$is_kwU{$type} );
23926
23927                     # Normally it is made of one word, but two words for 'use'
23928                     if ( $count == 0 ) {
23929                         if (   $type eq 'k'
23930                             && $is_use_like{ $tokens_to_go[$_] } )
23931                         {
23932                             $count_max = 2;
23933                         }
23934                         else {
23935                             $count_max = 1;
23936                         }
23937                     }
23938                     elsif ( defined($count_max) && $count >= $count_max ) {
23939                         last;
23940                     }
23941
23942                     if ( defined( $name_map{$token} ) ) {
23943                         $token = $name_map{$token};
23944                     }
23945
23946                     $name .= ' ' . $token;
23947                     $iname_end = $_;
23948                     $count++;
23949                 }
23950
23951                 # Require a space after the container name token(s)
23952                 if (   $name
23953                     && defined($ilast_blank)
23954                     && $ilast_blank > $iname_end )
23955                 {
23956                     $name = substr( $name, 1 );
23957                     $container_name{'0'} = $name;
23958                 }
23959             }
23960         }
23961
23962         # --------------------
23963         # Loop over all tokens
23964         # --------------------
23965         my $j = 0;    # field index
23966
23967         $patterns[0] = "";
23968         my %token_count;
23969         for my $i ( $ibeg .. $iend ) {
23970
23971             # Keep track of containers balanced on this line only.
23972             # These are used below to prevent unwanted cross-line alignments.
23973             # Unbalanced containers already avoid aligning across
23974             # container boundaries.
23975
23976             my $type       = $types_to_go[$i];
23977             my $token      = $tokens_to_go[$i];
23978             my $depth_last = $depth;
23979             if ( $type_sequence_to_go[$i] ) {
23980                 if ( $is_opening_token{$token} ) {
23981
23982                     # if container is balanced on this line...
23983                     my $i_mate = $mate_index_to_go[$i];
23984                     if ( $i_mate > $i && $i_mate <= $iend ) {
23985                         $depth++;
23986
23987                      # Append the previous token name to make the container name
23988                      # more unique.  This name will also be given to any commas
23989                      # within this container, and it helps avoid undesirable
23990                      # alignments of different types of containers.
23991
23992                      # Containers beginning with { and [ are given those names
23993                      # for uniqueness. That way commas in different containers
23994                      # will not match. Here is an example of what this prevents:
23995                      #   a => [ 1,       2, 3 ],
23996                      #   b => { b1 => 4, b2 => 5 },
23997                      # Here is another example of what we avoid by labeling the
23998                      # commas properly:
23999
24000                    # is_d( [ $a,        $a ], [ $b,               $c ] );
24001                    # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
24002                    # is_d( [ \$a,       \$a ], [ \$b,             \$c ] );
24003
24004                         my $name = $token;
24005                         if ( $token eq '(' ) {
24006                             $name = $self->make_paren_name($i);
24007                         }
24008
24009                         # name cannot be '.', so change to something else if so
24010                         if ( $name eq '.' ) { $name = 'dot' }
24011
24012                         $container_name{$depth} = "+" . $name;
24013
24014                         # Make the container name even more unique if necessary.
24015                         # If we are not vertically aligning this opening paren,
24016                         # append a character count to avoid bad alignment since
24017                         # it usually looks bad to align commas within containers
24018                         # for which the opening parens do not align.  Here
24019                         # is an example very BAD alignment of commas (because
24020                         # the atan2 functions are not all aligned):
24021                         #    $XY =
24022                         #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
24023                         #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
24024                         #      $X * atan2( $X,            1 ) -
24025                         #      $Y * atan2( $Y,            1 );
24026                         #
24027                         # On the other hand, it is usually okay to align commas
24028                         # if opening parens align, such as:
24029                         #    glVertex3d( $cx + $s * $xs, $cy,            $z );
24030                         #    glVertex3d( $cx,            $cy + $s * $ys, $z );
24031                         #    glVertex3d( $cx - $s * $xs, $cy,            $z );
24032                         #    glVertex3d( $cx,            $cy - $s * $ys, $z );
24033                         #
24034                         # To distinguish between these situations, we append
24035                         # the length of the line from the previous matching
24036                         # token, or beginning of line, to the function name.
24037                         # This will allow the vertical aligner to reject
24038                         # undesirable matches.
24039
24040                         # if we are not aligning on this paren...
24041                         if ( !$ralignment_type_to_go->[$i] ) {
24042
24043                             # Sum length from previous alignment
24044                             my $len = token_sequence_length( $i_start, $i - 1 );
24045
24046                             # Minor patch: do not include the length of any '!'.
24047                             # Otherwise, commas in the following line will not
24048                             # match
24049                             #  ok( 20, tapprox( ( pdl 2,  3 ), ( pdl 2, 3 ) ) );
24050                             #  ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
24051                             if ( grep { $_ eq '!' }
24052                                 @types_to_go[ $i_start .. $i - 1 ] )
24053                             {
24054                                 $len -= 1;
24055                             }
24056
24057                             if ( $i_start == $ibeg ) {
24058
24059                                 # For first token, use distance from start of
24060                                 # line but subtract off the indentation due to
24061                                 # level.  Otherwise, results could vary with
24062                                 # indentation.
24063                                 $len +=
24064                                   leading_spaces_to_go($ibeg) -
24065                                   $levels_to_go[$i_start] *
24066                                   $rOpts_indent_columns;
24067                                 if ( $len < 0 ) { $len = 0 }
24068                             }
24069
24070                             # tack this length onto the container name to try
24071                             # to make a unique token name
24072                             $container_name{$depth} .= "-" . $len;
24073                         } ## end if ( !$ralignment_type_to_go...)
24074                     } ## end if ( $i_mate > $i && $i_mate...)
24075                 } ## end if ( $is_opening_token...)
24076
24077                 elsif ( $is_closing_type{$token} ) {
24078                     $depth-- if $depth > 0;
24079                 }
24080             } ## end if ( $type_sequence_to_go...)
24081
24082             # if we find a new synchronization token, we are done with
24083             # a field
24084             if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
24085
24086                 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
24087
24088                 # map similar items
24089                 my $tok_map = $operator_map{$tok};
24090                 $tok = $tok_map if ($tok_map);
24091
24092                 # make separators in different nesting depths unique
24093                 # by appending the nesting depth digit.
24094                 if ( $raw_tok ne '#' ) {
24095                     $tok .= "$nesting_depth_to_go[$i]";
24096                 }
24097
24098                 # also decorate commas with any container name to avoid
24099                 # unwanted cross-line alignments.
24100                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
24101
24102                   # If we are at an opening token which increased depth, we have
24103                   # to use the name from the previous depth.
24104                     my $depth_p =
24105                       ( $depth_last < $depth ? $depth_last : $depth );
24106                     if ( $container_name{$depth_p} ) {
24107                         $tok .= $container_name{$depth_p};
24108                     }
24109                 }
24110
24111                 # Patch to avoid aligning leading and trailing if, unless.
24112                 # Mark trailing if, unless statements with container names.
24113                 # This makes them different from leading if, unless which
24114                 # are not so marked at present.  If we ever need to name
24115                 # them too, we could use ci to distinguish them.
24116                 # Example problem to avoid:
24117                 #    return ( 2, "DBERROR" )
24118                 #      if ( $retval == 2 );
24119                 #    if   ( scalar @_ ) {
24120                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
24121                 #    }
24122                 if ( $raw_tok eq '(' ) {
24123                     if (   $ci_levels_to_go[$ibeg]
24124                         && $container_name{$depth} =~ /^\+(if|unless)/ )
24125                     {
24126                         $tok .= $container_name{$depth};
24127                     }
24128                 }
24129
24130                 # Decorate block braces with block types to avoid
24131                 # unwanted alignments such as the following:
24132                 # foreach ( @{$routput_array} ) { $fh->print($_) }
24133                 # eval                          { $fh->close() };
24134                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
24135                     my $block_type = $block_type_to_go[$i];
24136
24137                     # map certain related block types to allow
24138                     # else blocks to align
24139                     $block_type = $block_type_map{$block_type}
24140                       if ( defined( $block_type_map{$block_type} ) );
24141
24142                     # remove sub names to allow one-line sub braces to align
24143                     # regardless of name
24144                     if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
24145
24146                     # allow all control-type blocks to align
24147                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
24148
24149                     $tok .= $block_type;
24150                 }
24151
24152                 # Mark multiple copies of certain tokens with the copy number
24153                 # This will allow the aligner to decide if they are matched.
24154                 # For now, only do this for equals. For example, the two
24155                 # equals on the next line will be labeled '=0' and '=0.2'.
24156                 # Later, the '=0.2' will be ignored in alignment because it
24157                 # has no match.
24158
24159                 # $|          = $debug = 1 if $opt_d;
24160                 # $full_index = 1          if $opt_i;
24161
24162                 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
24163                     $token_count{$tok}++;
24164                     if ( $token_count{$tok} > 1 ) {
24165                         $tok .= '.' . $token_count{$tok};
24166                     }
24167                 }
24168
24169                 # concatenate the text of the consecutive tokens to form
24170                 # the field
24171                 push( @fields,
24172                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
24173
24174                 push @field_lengths,
24175                   $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
24176
24177                 # store the alignment token for this field
24178                 push( @tokens, $tok );
24179
24180                 # get ready for the next batch
24181                 $i_start = $i;
24182                 $j++;
24183                 $patterns[$j] = "";
24184             } ## end if ( new synchronization token
24185
24186             # continue accumulating tokens
24187
24188             # for keywords we have to use the actual text
24189             if ( $type eq 'k' ) {
24190
24191                 my $tok_fix = $tokens_to_go[$i];
24192
24193                 # but map certain keywords to a common string to allow
24194                 # alignment.
24195                 $tok_fix = $keyword_map{$tok_fix}
24196                   if ( defined( $keyword_map{$tok_fix} ) );
24197                 $patterns[$j] .= $tok_fix;
24198             }
24199
24200             elsif ( $type eq 'b' ) {
24201                 $patterns[$j] .= $type;
24202             }
24203
24204             # Mark most things before arrows as a quote to
24205             # get them to line up. Testfile: mixed.pl.
24206
24207             # handle $type =~ /^[wnC]$/
24208             elsif ( $is_w_n_C{$type} ) {
24209
24210                 my $type_fix = $type;
24211
24212                 if ( $i < $iend - 1 ) {
24213                     my $next_type = $types_to_go[ $i + 1 ];
24214                     my $i_next_nonblank =
24215                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
24216
24217                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
24218                         $type_fix = 'Q';
24219
24220                         # Patch to ignore leading minus before words,
24221                         # by changing pattern 'mQ' into just 'Q',
24222                         # so that we can align things like this:
24223                         #  Button   => "Print letter \"~$_\"",
24224                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
24225                         if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
24226                     }
24227                 }
24228
24229                 # Convert a bareword within braces into a quote for
24230                 # matching.  This will allow alignment of expressions like
24231                 # this:
24232                 #    local ( $SIG{'INT'} ) = IGNORE;
24233                 #    local ( $SIG{ALRM} )  = 'POSTMAN';
24234                 if (   $type eq 'w'
24235                     && $i > $ibeg
24236                     && $i < $iend
24237                     && $types_to_go[ $i - 1 ] eq 'L'
24238                     && $types_to_go[ $i + 1 ] eq 'R' )
24239                 {
24240                     $type_fix = 'Q';
24241                 }
24242
24243                 # patch to make numbers and quotes align
24244                 if ( $type eq 'n' ) { $type_fix = 'Q' }
24245
24246                 $patterns[$j] .= $type_fix;
24247             } ## end elsif ( $is_w_n_C{$type} )
24248
24249             # ignore any ! in patterns
24250             elsif ( $type eq '!' ) { }
24251
24252             # everything else
24253             else {
24254                 $patterns[$j] .= $type;
24255             }
24256
24257             # remove any zero-level name at first fat comma
24258             if ( $depth == 0 && $type eq '=>' ) {
24259                 $container_name{$depth} = "";
24260             }
24261         } ## end for my $i ( $ibeg .. $iend)
24262
24263         # done with this line .. join text of tokens to make the last field
24264         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
24265         push @field_lengths,
24266           $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
24267
24268         return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
24269     } ## end sub make_alignment_patterns
24270
24271 } ## end closure make_alignment_patterns
24272
24273 sub make_paren_name {
24274     my ( $self, $i ) = @_;
24275
24276     # The token at index $i is a '('.
24277     # Create an alignment name for it to avoid incorrect alignments.
24278
24279     # Start with the name of the previous nonblank token...
24280     my $name = "";
24281     my $im   = $i - 1;
24282     return "" if ( $im < 0 );
24283     if ( $types_to_go[$im] eq 'b' ) { $im--; }
24284     return "" if ( $im < 0 );
24285     $name = $tokens_to_go[$im];
24286
24287     # Prepend any sub name to an isolated -> to avoid unwanted alignments
24288     # [test case is test8/penco.pl]
24289     if ( $name eq '->' ) {
24290         $im--;
24291         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
24292             $name = $tokens_to_go[$im] . $name;
24293         }
24294     }
24295
24296     # Finally, remove any leading arrows
24297     if ( substr( $name, 0, 2 ) eq '->' ) {
24298         $name = substr( $name, 2 );
24299     }
24300     return $name;
24301 }
24302
24303 {    ## begin closure final_indentation_adjustment
24304
24305     my ( $last_indentation_written, $last_unadjusted_indentation,
24306         $last_leading_token );
24307
24308     sub initialize_final_indentation_adjustment {
24309         $last_indentation_written    = 0;
24310         $last_unadjusted_indentation = 0;
24311         $last_leading_token          = "";
24312         return;
24313     }
24314
24315     sub final_indentation_adjustment {
24316
24317         #--------------------------------------------------------------------
24318         # This routine sets the final indentation of a line in the Formatter.
24319         #--------------------------------------------------------------------
24320
24321         # It starts with the basic indentation which has been defined for the
24322         # leading token, and then takes into account any options that the user
24323         # has set regarding special indenting and outdenting.
24324
24325         # This routine has to resolve a number of complex interacting issues,
24326         # including:
24327         # 1. The various -cti=n type flags, which contain the desired change in
24328         #    indentation for lines ending in commas and semicolons, should be
24329         #    followed,
24330         # 2. qw quotes require special processing and do not fit perfectly
24331         #    with normal containers,
24332         # 3. formatting with -wn can complicate things, especially with qw
24333         #    quotes,
24334         # 4. formatting with the -lp option is complicated, and does not
24335         #    work well with qw quotes and with -wn formatting.
24336         # 5. a number of special situations, such as 'cuddled' formatting.
24337         # 6. This routine is mainly concerned with outdenting closing tokens
24338         #    but note that there is some overlap with the functions of sub
24339         #    undo_ci, which was processed earlier, so care has to be taken to
24340         #    keep them coordinated.
24341
24342         my (
24343             $self,       $ibeg,
24344             $iend,       $rfields,
24345             $rpatterns,  $ri_first,
24346             $ri_last,    $rindentation_list,
24347             $level_jump, $starting_in_quote,
24348             $is_static_block_comment,
24349         ) = @_;
24350
24351         my $rLL                      = $self->[_rLL_];
24352         my $Klimit                   = $self->[_Klimit_];
24353         my $ris_bli_container        = $self->[_ris_bli_container_];
24354         my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
24355         my $rwant_reduced_ci         = $self->[_rwant_reduced_ci_];
24356         my $rK_weld_left             = $self->[_rK_weld_left_];
24357
24358         # Find the last code token of this line
24359         my $i_terminal    = $iend;
24360         my $terminal_type = $types_to_go[$iend];
24361         if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
24362             $i_terminal -= 1;
24363             $terminal_type = $types_to_go[$i_terminal];
24364             if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
24365                 $i_terminal -= 1;
24366                 $terminal_type = $types_to_go[$i_terminal];
24367             }
24368         }
24369
24370         my $terminal_block_type = $block_type_to_go[$i_terminal];
24371         my $is_outdented_line   = 0;
24372
24373         my $type_beg            = $types_to_go[$ibeg];
24374         my $token_beg           = $tokens_to_go[$ibeg];
24375         my $block_type_beg      = $block_type_to_go[$ibeg];
24376         my $level_beg           = $levels_to_go[$ibeg];
24377         my $leading_spaces_beg  = $leading_spaces_to_go[$ibeg];
24378         my $K_beg               = $K_to_go[$ibeg];
24379         my $seqno_beg           = $type_sequence_to_go[$ibeg];
24380         my $ibeg_weld_fix       = $ibeg;
24381         my $is_closing_type_beg = $is_closing_type{$type_beg};
24382         my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
24383
24384         # QW INDENTATION PATCH 3:
24385         my $seqno_qw_closing;
24386         if ( $type_beg eq 'q' && $ibeg == 0 ) {
24387             my $KK = $K_to_go[$ibeg];
24388             $seqno_qw_closing =
24389               $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
24390         }
24391
24392         my $is_semicolon_terminated = $terminal_type eq ';'
24393           && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
24394             || $seqno_qw_closing );
24395
24396         # NOTE: A future improvement would be to make it semicolon terminated
24397         # even if it does not have a semicolon but is followed by a closing
24398         # block brace. This would undo ci even for something like the
24399         # following, in which the final paren does not have a semicolon because
24400         # it is a possible weld location:
24401
24402         # if ($BOLD_MATH) {
24403         #     (
24404         #         $labels, $comment,
24405         #         join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
24406         #     )
24407         # }
24408         #
24409
24410         # MOJO: Set a flag if this lines begins with ')->'
24411         my $leading_paren_arrow = (
24412                  $is_closing_type_beg
24413               && $token_beg eq ')'
24414               && (
24415                 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
24416                 || (   $ibeg < $i_terminal - 1
24417                     && $types_to_go[ $ibeg + 1 ] eq 'b'
24418                     && $types_to_go[ $ibeg + 2 ] eq '->' )
24419               )
24420         );
24421
24422         #---------------------------------------------------------
24423         # Section 1: set a flag and a default indentation
24424         #
24425         # Most lines are indented according to the initial token.
24426         # But it is common to outdent to the level just after the
24427         # terminal token in certain cases...
24428         # adjust_indentation flag:
24429         #       0 - do not adjust
24430         #       1 - outdent
24431         #       2 - vertically align with opening token
24432         #       3 - indent
24433         #---------------------------------------------------------
24434         my $adjust_indentation         = 0;
24435         my $default_adjust_indentation = $adjust_indentation;
24436
24437         my (
24438             $opening_indentation, $opening_offset,
24439             $is_leading,          $opening_exists
24440         );
24441
24442         # Honor any flag to reduce -ci set by the -bbxi=n option
24443         if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
24444
24445             # if this is an opening, it must be alone on the line ...
24446             if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
24447                 $adjust_indentation = 1;
24448             }
24449
24450             # ... or a single welded unit (fix for b1173)
24451             elsif ($total_weld_count) {
24452                 my $Kterm      = $K_to_go[$i_terminal];
24453                 my $Kterm_test = $rK_weld_left->{$Kterm};
24454                 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
24455                     $Kterm = $Kterm_test;
24456                 }
24457                 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
24458             }
24459         }
24460
24461         # Update the $is_bli flag as we go. It is initially 1.
24462         # We note seeing a leading opening brace by setting it to 2.
24463         # If we get to the closing brace without seeing the opening then we
24464         # turn it off.  This occurs if the opening brace did not get output
24465         # at the start of a line, so we will then indent the closing brace
24466         # in the default way.
24467         if ( $is_bli_beg && $is_bli_beg == 1 ) {
24468             my $K_opening_container = $self->[_K_opening_container_];
24469             my $K_opening           = $K_opening_container->{$seqno_beg};
24470             if ( $K_beg eq $K_opening ) {
24471                 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
24472             }
24473             else { $is_bli_beg = 0 }
24474         }
24475
24476         # QW PATCH for the combination -lp -wn
24477         # For -lp formatting use $ibeg_weld_fix to get around the problem
24478         # that with -lp type formatting the opening and closing tokens to not
24479         # have sequence numbers.
24480         if ( $seqno_qw_closing && $total_weld_count ) {
24481             my $i_plus = $inext_to_go[$ibeg];
24482             if ( $i_plus <= $max_index_to_go ) {
24483                 my $K_plus = $K_to_go[$i_plus];
24484                 if ( defined( $rK_weld_left->{$K_plus} ) ) {
24485                     $ibeg_weld_fix = $i_plus;
24486                 }
24487             }
24488         }
24489
24490         # if we are at a closing token of some type..
24491         if ( $is_closing_type_beg || $seqno_qw_closing ) {
24492
24493             # get the indentation of the line containing the corresponding
24494             # opening token
24495             (
24496                 $opening_indentation, $opening_offset,
24497                 $is_leading,          $opening_exists
24498               )
24499               = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
24500                 $ri_last, $rindentation_list, $seqno_qw_closing );
24501
24502             my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
24503
24504             # First set the default behavior:
24505             if (
24506
24507                 # default behavior is to outdent closing lines
24508                 # of the form:   ");  };  ];  )->xxx;"
24509                 $is_semicolon_terminated
24510
24511                 # and 'cuddled parens' of the form:   ")->pack("
24512                 # Bug fix for RT #123749]: the types here were
24513                 # incorrectly '(' and ')'.  Corrected to be '{' and '}'
24514                 || (
24515                        $terminal_type eq '{'
24516                     && $type_beg eq '}'
24517                     && ( $nesting_depth_to_go[$iend] + 1 ==
24518                         $nesting_depth_to_go[$ibeg] )
24519                 )
24520
24521                 # remove continuation indentation for any line like
24522                 #       } ... {
24523                 # or without ending '{' and unbalanced, such as
24524                 #       such as '}->{$operator}'
24525                 || (
24526                     $type_beg eq '}'
24527
24528                     && (   $types_to_go[$iend] eq '{'
24529                         || $levels_to_go[$iend] < $level_beg )
24530                 )
24531
24532                 # and when the next line is at a lower indentation level...
24533
24534                 # PATCH #1: and only if the style allows undoing continuation
24535                 # for all closing token types. We should really wait until
24536                 # the indentation of the next line is known and then make
24537                 # a decision, but that would require another pass.
24538
24539                 # PATCH #2: and not if this token is under -xci control
24540                 || (   $level_jump < 0
24541                     && !$some_closing_token_indentation
24542                     && !$rseqno_controlling_my_ci->{$K_beg} )
24543
24544                 # Patch for -wn=2, multiple welded closing tokens
24545                 || (   $i_terminal > $ibeg
24546                     && $is_closing_type{ $types_to_go[$iend] } )
24547
24548                 # Alternate Patch for git #51, isolated closing qw token not
24549                 # outdented if no-delete-old-newlines is set. This works, but
24550                 # a more general patch elsewhere fixes the real problem: ljump.
24551                 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
24552
24553               )
24554             {
24555                 $adjust_indentation = 1;
24556             }
24557
24558             # outdent something like '),'
24559             if (
24560                 $terminal_type eq ','
24561
24562                 # Removed this constraint for -wn
24563                 # OLD: allow just one character before the comma
24564                 # && $i_terminal == $ibeg + 1
24565
24566                 # require LIST environment; otherwise, we may outdent too much -
24567                 # this can happen in calls without parentheses (overload.t);
24568                 && $terminal_is_in_list
24569               )
24570             {
24571                 $adjust_indentation = 1;
24572             }
24573
24574             # undo continuation indentation of a terminal closing token if
24575             # it is the last token before a level decrease.  This will allow
24576             # a closing token to line up with its opening counterpart, and
24577             # avoids an indentation jump larger than 1 level.
24578             if (   $i_terminal == $ibeg
24579                 && $is_closing_type_beg
24580                 && defined($K_beg)
24581                 && $K_beg < $Klimit )
24582             {
24583                 my $K_plus    = $K_beg + 1;
24584                 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
24585
24586                 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24587                     $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24588                 }
24589
24590                 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
24591                     $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24592                     if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24593                         $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24594                     }
24595
24596                     # Note: we have skipped past just one comment (perhaps a
24597                     # side comment).  There could be more, and we could easily
24598                     # skip past all the rest with the following code, or with a
24599                     # while loop.  It would be rare to have to do this, and
24600                     # those block comments would still be indented, so it would
24601                     # to leave them indented.  So it seems best to just stop at
24602                     # a maximum of one comment.
24603                     ##if ($type_plus eq '#') {
24604                     ##   $K_plus = $self->K_next_code($K_plus);
24605                     ##}
24606                 }
24607
24608                 if ( !$is_bli_beg && defined($K_plus) ) {
24609                     my $lev        = $level_beg;
24610                     my $level_next = $rLL->[$K_plus]->[_LEVEL_];
24611
24612                     # and do not undo ci if it was set by the -xci option
24613                     $adjust_indentation = 1
24614                       if ( $level_next < $lev
24615                         && !$rseqno_controlling_my_ci->{$K_beg} );
24616                 }
24617
24618                 # Patch for RT #96101, in which closing brace of anonymous subs
24619                 # was not outdented.  We should look ahead and see if there is
24620                 # a level decrease at the next token (i.e., a closing token),
24621                 # but right now we do not have that information.  For now
24622                 # we see if we are in a list, and this works well.
24623                 # See test files 'sub*.t' for good test cases.
24624                 if (   $terminal_is_in_list
24625                     && !$rOpts_indent_closing_brace
24626                     && $block_type_beg
24627                     && $block_type_beg =~ /$ASUB_PATTERN/ )
24628                 {
24629                     (
24630                         $opening_indentation, $opening_offset,
24631                         $is_leading,          $opening_exists
24632                       )
24633                       = $self->get_opening_indentation( $ibeg, $ri_first,
24634                         $ri_last, $rindentation_list );
24635                     my $indentation = $leading_spaces_beg;
24636                     if ( defined($opening_indentation)
24637                         && get_spaces($indentation) >
24638                         get_spaces($opening_indentation) )
24639                     {
24640                         $adjust_indentation = 1;
24641                     }
24642                 }
24643             }
24644
24645             # YVES patch 1 of 2:
24646             # Undo ci of line with leading closing eval brace,
24647             # but not beyond the indention of the line with
24648             # the opening brace.
24649             if (
24650                 $block_type_beg eq 'eval'
24651                 ##&& !$rOpts_line_up_parentheses
24652                 && !ref($leading_spaces_beg)
24653                 && !$rOpts_indent_closing_brace
24654               )
24655             {
24656                 (
24657                     $opening_indentation, $opening_offset,
24658                     $is_leading,          $opening_exists
24659                   )
24660                   = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
24661                     $rindentation_list );
24662                 my $indentation = $leading_spaces_beg;
24663                 if ( defined($opening_indentation)
24664                     && get_spaces($indentation) >
24665                     get_spaces($opening_indentation) )
24666                 {
24667                     $adjust_indentation = 1;
24668                 }
24669             }
24670
24671             # patch for issue git #40: -bli setting has priority
24672             $adjust_indentation = 0 if ($is_bli_beg);
24673
24674             $default_adjust_indentation = $adjust_indentation;
24675
24676             # Now modify default behavior according to user request:
24677             # handle option to indent non-blocks of the form );  };  ];
24678             # But don't do special indentation to something like ')->pack('
24679             if ( !$block_type_beg ) {
24680
24681                 # Note that logical padding has already been applied, so we may
24682                 # need to remove some spaces to get a valid hash key.
24683                 my $tok = $token_beg;
24684                 my $cti = $closing_token_indentation{$tok};
24685
24686                 # Fix the value of 'cti' for an isloated non-welded closing qw
24687                 # delimiter.
24688                 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
24689
24690                     # A quote delimiter which is not a container will not have
24691                     # a cti value defined.  In this case use the style of a
24692                     # paren. For example
24693                     #   my @fars = (
24694                     #      qw<
24695                     #        far
24696                     #        farfar
24697                     #        farfars-far
24698                     #      >,
24699                     #   );
24700                     if ( !defined($cti) && length($tok) == 1 ) {
24701
24702                         # something other than ')', '}', ']' ; use flag for ')'
24703                         $cti = $closing_token_indentation{')'};
24704
24705                         # But for now, do not outdent non-container qw
24706                         # delimiters because it would would change existing
24707                         # formatting.
24708                         if ( $tok ne '>' ) { $cti = 3 }
24709                     }
24710
24711                     # A non-welded closing qw cannot currently use -cti=1
24712                     # because that option requires a sequence number to find
24713                     # the opening indentation, and qw quote delimiters are not
24714                     # sequenced items.
24715                     if ( defined($cti) && $cti == 1 ) { $cti = 0 }
24716                 }
24717
24718                 if ( !defined($cti) ) {
24719
24720                     # $cti may not be defined for several reasons.
24721                     # -padding may have been applied so the character
24722                     #  has a length > 1
24723                     # - we may have welded to a closing quote token.
24724                     #   Here is an example (perltidy -wn):
24725                     #       __PACKAGE__->load_components( qw(
24726                     #  >         Core
24727                     #  >
24728                     #  >     ) );
24729                     $adjust_indentation = 0;
24730
24731                 }
24732                 elsif ( $cti == 1 ) {
24733                     if (   $i_terminal <= $ibeg + 1
24734                         || $is_semicolon_terminated )
24735                     {
24736                         $adjust_indentation = 2;
24737                     }
24738                     else {
24739                         $adjust_indentation = 0;
24740                     }
24741                 }
24742                 elsif ( $cti == 2 ) {
24743                     if ($is_semicolon_terminated) {
24744                         $adjust_indentation = 3;
24745                     }
24746                     else {
24747                         $adjust_indentation = 0;
24748                     }
24749                 }
24750                 elsif ( $cti == 3 ) {
24751                     $adjust_indentation = 3;
24752                 }
24753             }
24754
24755             # handle option to indent blocks
24756             else {
24757                 if (
24758                     $rOpts_indent_closing_brace
24759                     && (
24760                         $i_terminal == $ibeg    #  isolated terminal '}'
24761                         || $is_semicolon_terminated
24762                     )
24763                   )                             #  } xxxx ;
24764                 {
24765                     $adjust_indentation = 3;
24766                 }
24767             }
24768         }
24769
24770         # if at ');', '};', '>;', and '];' of a terminal qw quote
24771         elsif (
24772                substr( $rpatterns->[0], 0, 2 ) eq 'qb'
24773             && substr( $rfields->[0], -1, 1 ) eq ';'
24774             ##&& $rpatterns->[0] =~ /^qb*;$/
24775             && $rfields->[0] =~ /^([\)\}\]\>]);$/
24776           )
24777         {
24778             if ( $closing_token_indentation{$1} == 0 ) {
24779                 $adjust_indentation = 1;
24780             }
24781             else {
24782                 $adjust_indentation = 3;
24783             }
24784         }
24785
24786         # if line begins with a ':', align it with any
24787         # previous line leading with corresponding ?
24788         elsif ( $type_beg eq ':' ) {
24789             (
24790                 $opening_indentation, $opening_offset,
24791                 $is_leading,          $opening_exists
24792               )
24793               = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
24794                 $rindentation_list );
24795             if ($is_leading) { $adjust_indentation = 2; }
24796         }
24797
24798         #---------------------------------------------------------
24799         # Section 2: set indentation according to flag set above
24800         #
24801         # Select the indentation object to define leading
24802         # whitespace.  If we are outdenting something like '} } );'
24803         # then we want to use one level below the last token
24804         # ($i_terminal) in order to get it to fully outdent through
24805         # all levels.
24806         #---------------------------------------------------------
24807         my $indentation;
24808         my $lev;
24809         my $level_end = $levels_to_go[$iend];
24810
24811         if ( $adjust_indentation == 0 ) {
24812             $indentation = $leading_spaces_beg;
24813             $lev         = $level_beg;
24814         }
24815         elsif ( $adjust_indentation == 1 ) {
24816
24817             # Change the indentation to be that of a different token on the line
24818             # Previously, the indentation of the terminal token was used:
24819             # OLD CODING:
24820             # $indentation = $reduced_spaces_to_go[$i_terminal];
24821             # $lev         = $levels_to_go[$i_terminal];
24822
24823             # Generalization for MOJO:
24824             # Use the lowest level indentation of the tokens on the line.
24825             # For example, here we can use the indentation of the ending ';':
24826             #    } until ($selection > 0 and $selection < 10);   # ok to use ';'
24827             # But this will not outdent if we use the terminal indentation:
24828             #    )->then( sub {      # use indentation of the ->, not the {
24829             # Warning: reduced_spaces_to_go[] may be a reference, do not
24830             # do numerical checks with it
24831
24832             my $i_ind = $ibeg;
24833             $indentation = $reduced_spaces_to_go[$i_ind];
24834             $lev         = $levels_to_go[$i_ind];
24835             while ( $i_ind < $i_terminal ) {
24836                 $i_ind++;
24837                 if ( $levels_to_go[$i_ind] < $lev ) {
24838                     $indentation = $reduced_spaces_to_go[$i_ind];
24839                     $lev         = $levels_to_go[$i_ind];
24840                 }
24841             }
24842         }
24843
24844         # handle indented closing token which aligns with opening token
24845         elsif ( $adjust_indentation == 2 ) {
24846
24847             # handle option to align closing token with opening token
24848             $lev = $level_beg;
24849
24850             # calculate spaces needed to align with opening token
24851             my $space_count =
24852               get_spaces($opening_indentation) + $opening_offset;
24853
24854             # Indent less than the previous line.
24855             #
24856             # Problem: For -lp we don't exactly know what it was if there
24857             # were recoverable spaces sent to the aligner.  A good solution
24858             # would be to force a flush of the vertical alignment buffer, so
24859             # that we would know.  For now, this rule is used for -lp:
24860             #
24861             # When the last line did not start with a closing token we will
24862             # be optimistic that the aligner will recover everything wanted.
24863             #
24864             # This rule will prevent us from breaking a hierarchy of closing
24865             # tokens, and in a worst case will leave a closing paren too far
24866             # indented, but this is better than frequently leaving it not
24867             # indented enough.
24868             my $last_spaces = get_spaces($last_indentation_written);
24869
24870             if ( ref($last_indentation_written)
24871                 && !$is_closing_token{$last_leading_token} )
24872             {
24873                 $last_spaces +=
24874                   get_recoverable_spaces($last_indentation_written);
24875             }
24876
24877             # reset the indentation to the new space count if it works
24878             # only options are all or none: nothing in-between looks good
24879             $lev = $level_beg;
24880
24881             my $diff = $last_spaces - $space_count;
24882             if ( $diff > 0 ) {
24883                 $indentation = $space_count;
24884             }
24885             else {
24886
24887                 # We need to fix things ... but there is no good way to do it.
24888                 # The best solution is for the user to use a longer maximum
24889                 # line length.  We could get a smooth variation if we just move
24890                 # the paren in using
24891                 #    $space_count -= ( 1 - $diff );
24892                 # But unfortunately this can give a rather unbalanced look.
24893
24894                 # For -xlp we currently allow a tolerance of one indentation
24895                 # level and then revert to a simpler default.  This will jump
24896                 # suddenly but keeps a balanced look.
24897                 if (   $rOpts_extended_line_up_parentheses
24898                     && $diff >= -$rOpts_indent_columns
24899                     && $space_count > $leading_spaces_beg )
24900                 {
24901                     $indentation = $space_count;
24902                 }
24903
24904                 # Otherwise revert to defaults
24905                 elsif ( $default_adjust_indentation == 0 ) {
24906                     $indentation = $leading_spaces_beg;
24907                 }
24908                 elsif ( $default_adjust_indentation == 1 ) {
24909                     $indentation = $reduced_spaces_to_go[$i_terminal];
24910                     $lev         = $levels_to_go[$i_terminal];
24911                 }
24912             }
24913         }
24914
24915         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
24916         else {
24917
24918             # handle -icb (indented closing code block braces)
24919             # Updated method for indented block braces: indent one full level if
24920             # there is no continuation indentation.  This will occur for major
24921             # structures such as sub, if, else, but not for things like map
24922             # blocks.
24923             #
24924             # Note: only code blocks without continuation indentation are
24925             # handled here (if, else, unless, ..). In the following snippet,
24926             # the terminal brace of the sort block will have continuation
24927             # indentation as shown so it will not be handled by the coding
24928             # here.  We would have to undo the continuation indentation to do
24929             # this, but it probably looks ok as is.  This is a possible future
24930             # update for semicolon terminated lines.
24931             #
24932             #     if ($sortby eq 'date' or $sortby eq 'size') {
24933             #         @files = sort {
24934             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
24935             #                 or $a cmp $b
24936             #                 } @files;
24937             #         }
24938             #
24939             if (   $block_type_beg
24940                 && $ci_levels_to_go[$i_terminal] == 0 )
24941             {
24942                 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
24943                 $indentation = $spaces + $rOpts_indent_columns;
24944
24945                 # NOTE: for -lp we could create a new indentation object, but
24946                 # there is probably no need to do it
24947             }
24948
24949             # handle -icp and any -icb block braces which fall through above
24950             # test such as the 'sort' block mentioned above.
24951             else {
24952
24953                 # There are currently two ways to handle -icp...
24954                 # One way is to use the indentation of the previous line:
24955                 # $indentation = $last_indentation_written;
24956
24957                 # The other way is to use the indentation that the previous line
24958                 # would have had if it hadn't been adjusted:
24959                 $indentation = $last_unadjusted_indentation;
24960
24961                 # Current method: use the minimum of the two. This avoids
24962                 # inconsistent indentation.
24963                 if ( get_spaces($last_indentation_written) <
24964                     get_spaces($indentation) )
24965                 {
24966                     $indentation = $last_indentation_written;
24967                 }
24968             }
24969
24970             # use previous indentation but use own level
24971             # to cause list to be flushed properly
24972             $lev = $level_beg;
24973         }
24974
24975         # remember indentation except for multi-line quotes, which get
24976         # no indentation
24977         unless ( $ibeg == 0 && $starting_in_quote ) {
24978             $last_indentation_written    = $indentation;
24979             $last_unadjusted_indentation = $leading_spaces_beg;
24980             $last_leading_token          = $token_beg;
24981
24982             # Patch to make a line which is the end of a qw quote work with the
24983             # -lp option.  Make $token_beg look like a closing token as some
24984             # type even if it is not.  This veriable will become
24985             # $last_leading_token at the end of this loop.  Then, if the -lp
24986             # style is selected, and the next line is also a
24987             # closing token, it will not get more indentation than this line.
24988             # We need to do this because qw quotes (at present) only get
24989             # continuation indentation, not one level of indentation, so we
24990             # need to turn off the -lp indentation.
24991
24992             # ... a picture is worth a thousand words:
24993
24994             # perltidy -wn -gnu (Without this patch):
24995             #   ok(defined(
24996             #       $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
24997             #       2981014)])
24998             #             ));
24999
25000             # perltidy -wn -gnu (With this patch):
25001             #  ok(defined(
25002             #      $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
25003             #      2981014)])
25004             #  ));
25005             if ( $seqno_qw_closing
25006                 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
25007             {
25008                 $last_leading_token = ')';
25009             }
25010         }
25011
25012         # be sure lines with leading closing tokens are not outdented more
25013         # than the line which contained the corresponding opening token.
25014
25015         #--------------------------------------------------------
25016         # updated per bug report in alex_bug.pl: we must not
25017         # mess with the indentation of closing logical braces so
25018         # we must treat something like '} else {' as if it were
25019         # an isolated brace
25020         #--------------------------------------------------------
25021         my $is_isolated_block_brace = $block_type_beg
25022           && ( $i_terminal == $ibeg
25023             || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
25024           );
25025
25026         # only do this for a ':; which is aligned with its leading '?'
25027         my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
25028
25029         if (
25030             defined($opening_indentation)
25031             && !$leading_paren_arrow    # MOJO
25032             && !$is_isolated_block_brace
25033             && !$is_unaligned_colon
25034           )
25035         {
25036             if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
25037                 $indentation = $opening_indentation;
25038             }
25039         }
25040
25041         # remember the indentation of each line of this batch
25042         push @{$rindentation_list}, $indentation;
25043
25044         # outdent lines with certain leading tokens...
25045         if (
25046
25047             # must be first word of this batch
25048             $ibeg == 0
25049
25050             # and ...
25051             && (
25052
25053                 # certain leading keywords if requested
25054                 $rOpts_outdent_keywords
25055                 && $type_beg eq 'k'
25056                 && $outdent_keyword{$token_beg}
25057
25058                 # or labels if requested
25059                 || $rOpts_outdent_labels && $type_beg eq 'J'
25060
25061                 # or static block comments if requested
25062                 || $is_static_block_comment
25063                 && $rOpts_outdent_static_block_comments
25064             )
25065           )
25066         {
25067             my $space_count = leading_spaces_to_go($ibeg);
25068             if ( $space_count > 0 ) {
25069                 $space_count -= $rOpts_continuation_indentation;
25070                 $is_outdented_line = 1;
25071                 if ( $space_count < 0 ) { $space_count = 0 }
25072
25073                 # do not promote a spaced static block comment to non-spaced;
25074                 # this is not normally necessary but could be for some
25075                 # unusual user inputs (such as -ci = -i)
25076                 if ( $type_beg eq '#' && $space_count == 0 ) {
25077                     $space_count = 1;
25078                 }
25079
25080                 $indentation = $space_count;
25081             }
25082         }
25083
25084         return ( $indentation, $lev, $level_end, $terminal_type,
25085             $terminal_block_type, $is_semicolon_terminated,
25086             $is_outdented_line );
25087     }
25088 } ## end closure final_indentation_adjustment
25089
25090 sub get_opening_indentation {
25091
25092     # get the indentation of the line which output the opening token
25093     # corresponding to a given closing token in the current output batch.
25094     #
25095     # given:
25096     # $i_closing - index in this line of a closing token ')' '}' or ']'
25097     #
25098     # $ri_first - reference to list of the first index $i for each output
25099     #               line in this batch
25100     # $ri_last - reference to list of the last index $i for each output line
25101     #              in this batch
25102     # $rindentation_list - reference to a list containing the indentation
25103     #            used for each line.
25104     # $qw_seqno - optional sequence number to use if normal seqno not defined
25105     #           (TODO: would be more general to just look this up from index i)
25106     #
25107     # return:
25108     #   -the indentation of the line which contained the opening token
25109     #    which matches the token at index $i_opening
25110     #   -and its offset (number of columns) from the start of the line
25111     #
25112     my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
25113       = @_;
25114
25115     # first, see if the opening token is in the current batch
25116     my $i_opening = $mate_index_to_go[$i_closing];
25117     my ( $indent, $offset, $is_leading, $exists );
25118     $exists = 1;
25119     if ( defined($i_opening) && $i_opening >= 0 ) {
25120
25121         # it is..look up the indentation
25122         ( $indent, $offset, $is_leading ) =
25123           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
25124             $rindentation_list );
25125     }
25126
25127     # if not, it should have been stored in the hash by a previous batch
25128     else {
25129         my $seqno = $type_sequence_to_go[$i_closing];
25130         $seqno = $qw_seqno unless ($seqno);
25131         ( $indent, $offset, $is_leading, $exists ) =
25132           get_saved_opening_indentation($seqno);
25133     }
25134     return ( $indent, $offset, $is_leading, $exists );
25135 }
25136
25137 sub set_vertical_tightness_flags {
25138
25139     my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
25140         $ending_in_quote, $closing_side_comment )
25141       = @_;
25142
25143     # Define vertical tightness controls for the nth line of a batch.
25144
25145     # These parameters are passed to the vertical aligner to indicated
25146     # if we should combine this line with the next line to achieve the
25147     # desired vertical tightness.  This was previously an array but
25148     # has been converted to a hash:
25149
25150     # old   hash              Meaning
25151     # index key
25152     #
25153     # 0   _vt_type:           1=opening non-block    2=closing non-block
25154     #                         3=opening block brace  4=closing block brace
25155     #
25156     # 1a  _vt_opening_flag:   1=no multiple steps, 2=multiple steps ok
25157     # 1b  _vt_closing_flag:   spaces of padding to use if closing
25158     # 2   _vt_seqno:          sequence number of container
25159     # 3   _vt_valid flag:     do not append if this flag is false. Will be
25160     #           true if appropriate -vt flag is set.  Otherwise, Will be
25161     #           made true only for 2 line container in parens with -lp
25162     # 4   _vt_seqno_beg:      sequence number of first token of line
25163     # 5   _vt_seqno_end:      sequence number of last token of line
25164     # 6   _vt_min_lines:      min number of lines for joining opening cache,
25165     #                           0=no constraint
25166     # 7   _vt_max_lines:      max number of lines for joining opening cache,
25167     #                           0=no constraint
25168
25169     # The vertical tightness mechanism can add whitespace, so whitespace can
25170     # continually increase if we allowed it when the -fws flag is set.
25171     # See case b499 for an example.
25172
25173     # Speedup: just return for a comment
25174     if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
25175         return;
25176     }
25177
25178     # Define these values...
25179     my $vt_type         = 0;
25180     my $vt_opening_flag = 0;
25181     my $vt_closing_flag = 0;
25182     my $vt_seqno        = 0;
25183     my $vt_valid_flag   = 0;
25184     my $vt_seqno_beg    = 0;
25185     my $vt_seqno_end    = 0;
25186     my $vt_min_lines    = 0;
25187     my $vt_max_lines    = 0;
25188
25189     goto RETURN
25190       if ($rOpts_freeze_whitespace);
25191
25192     # Uses these global parameters:
25193     #   $rOpts_block_brace_tightness
25194     #   $rOpts_block_brace_vertical_tightness
25195     #   $rOpts_stack_closing_block_brace
25196     #   %opening_vertical_tightness
25197     #   %closing_vertical_tightness
25198     #   %opening_token_right
25199     #   %stack_closing_token
25200     #   %stack_opening_token
25201
25202     #--------------------------------------------------------------
25203     # Vertical Tightness Flags Section 1:
25204     # Handle Lines 1 .. n-1 but not the last line
25205     # For non-BLOCK tokens, we will need to examine the next line
25206     # too, so we won't consider the last line.
25207     #--------------------------------------------------------------
25208     if ( $n < $n_last_line ) {
25209
25210         #--------------------------------------------------------------
25211         # Vertical Tightness Flags Section 1a:
25212         # Look for Type 1, last token of this line is a non-block opening token
25213         #--------------------------------------------------------------
25214         my $ibeg_next = $ri_first->[ $n + 1 ];
25215         my $token_end = $tokens_to_go[$iend];
25216         my $iend_next = $ri_last->[ $n + 1 ];
25217
25218         if (
25219                $type_sequence_to_go[$iend]
25220             && !$block_type_to_go[$iend]
25221             && $is_opening_token{$token_end}
25222             && (
25223                 $opening_vertical_tightness{$token_end} > 0
25224
25225                 # allow 2-line method call to be closed up
25226                 || (   $rOpts_line_up_parentheses
25227                     && $token_end eq '('
25228                     && $self->[_rlp_object_by_seqno_]
25229                     ->{ $type_sequence_to_go[$iend] }
25230                     && $iend > $ibeg
25231                     && $types_to_go[ $iend - 1 ] ne 'b' )
25232             )
25233           )
25234         {
25235             # avoid multiple jumps in nesting depth in one line if
25236             # requested
25237             my $ovt       = $opening_vertical_tightness{$token_end};
25238             my $iend_next = $ri_last->[ $n + 1 ];
25239
25240             # Turn off the -vt flag if the next line ends in a weld.
25241             # This avoids an instability with one-line welds (fixes b1183).
25242             my $type_end_next = $types_to_go[$iend_next];
25243             $ovt = 0
25244               if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
25245                 && $is_closing_type{$type_end_next} );
25246
25247             # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270
25248             # See similar patch above for $cvt.
25249             my $seqno = $type_sequence_to_go[$iend];
25250             if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) {
25251                 $ovt = 0;
25252             }
25253
25254             unless (
25255                 $ovt < 2
25256                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
25257                     $nesting_depth_to_go[$ibeg_next] )
25258               )
25259             {
25260
25261                 # If -vt flag has not been set, mark this as invalid
25262                 # and aligner will validate it if it sees the closing paren
25263                 # within 2 lines.
25264                 my $valid_flag = $ovt;
25265
25266                 $vt_type         = 1;
25267                 $vt_opening_flag = $ovt;
25268                 $vt_seqno        = $type_sequence_to_go[$iend];
25269                 $vt_valid_flag   = $valid_flag;
25270             }
25271         }
25272
25273         #--------------------------------------------------------------
25274         # Vertical Tightness Flags Section 1b:
25275         # Look for Type 2, first token of next line is a non-block closing
25276         # token .. and be sure this line does not have a side comment
25277         #--------------------------------------------------------------
25278         my $token_next = $tokens_to_go[$ibeg_next];
25279         if (   $type_sequence_to_go[$ibeg_next]
25280             && !$block_type_to_go[$ibeg_next]
25281             && $is_closing_token{$token_next}
25282             && $types_to_go[$iend] ne '#' )    # for safety, shouldn't happen!
25283         {
25284             my $ovt = $opening_vertical_tightness{$token_next};
25285             my $cvt = $closing_vertical_tightness{$token_next};
25286
25287             # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
25288             # See similar patch above for $ovt.
25289             my $seqno = $type_sequence_to_go[$ibeg_next];
25290             if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
25291                 $cvt = 0;
25292             }
25293
25294             # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
25295             # otherwise.  Added for rt136417.
25296             if ( $cvt == 3 ) {
25297                 my $seqno = $type_sequence_to_go[$ibeg_next];
25298                 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
25299             }
25300
25301             # The unusual combination -pvtc=2 -dws -naws can be unstable.
25302             # This fixes b1282, b1283.  This can be moved to set_options.
25303             if (   $cvt == 2
25304                 && $rOpts_delete_old_whitespace
25305                 && !$rOpts_add_whitespace )
25306             {
25307                 $cvt = 1;
25308             }
25309
25310             if (
25311
25312                 # Never append a trailing line like   ')->pack(' because it
25313                 # will throw off later alignment.  So this line must start at a
25314                 # deeper level than the next line (fix1 for welding, git #45).
25315                 (
25316                     $nesting_depth_to_go[$ibeg_next] >=
25317                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
25318                 )
25319                 && (
25320                     $cvt == 2
25321                     || (
25322                         !$self->is_in_list_by_i($ibeg_next)
25323                         && (
25324                             $cvt == 1
25325
25326                             # allow closing up 2-line method calls
25327                             || (   $rOpts_line_up_parentheses
25328                                 && $token_next eq ')'
25329                                 && $self->[_rlp_object_by_seqno_]
25330                                 ->{ $type_sequence_to_go[$ibeg_next] } )
25331                         )
25332                     )
25333                 )
25334               )
25335             {
25336
25337                 # decide which trailing closing tokens to append..
25338                 my $ok = 0;
25339                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
25340                 else {
25341                     my $str = join( '',
25342                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
25343
25344                     # append closing token if followed by comment or ';'
25345                     # or another closing token (fix2 for welding, git #45)
25346                     if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
25347                 }
25348
25349                 if ($ok) {
25350                     my $valid_flag = $cvt;
25351                     my $min_lines  = 0;
25352                     my $max_lines  = 0;
25353
25354                     # Fix for b1187 and b1188: Blinking can occur if we allow
25355                     # welded tokens to re-form into one-line blocks during
25356                     # vertical alignment when -lp used.  So for this case we
25357                     # set the minimum number of lines to be 1 instead of 0.
25358                     # The maximum should be 1 if -vtc is not used.  If -vtc is
25359                     # used, we turn the valid
25360                     # flag off and set the maximum to 0. This is equivalent to
25361                     # using a large number.
25362                     my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
25363                     if (   $rOpts_line_up_parentheses
25364                         && $total_weld_count
25365                         && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
25366                         && $self->is_welded_at_seqno($seqno_ibeg_next) )
25367                     {
25368                         $min_lines  = 1;
25369                         $max_lines  = $cvt ? 0 : 1;
25370                         $valid_flag = 0;
25371                     }
25372
25373                     $vt_type         = 2;
25374                     $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
25375                     $vt_seqno        = $type_sequence_to_go[$ibeg_next];
25376                     $vt_valid_flag   = $valid_flag;
25377                     $vt_min_lines    = $min_lines;
25378                     $vt_max_lines    = $max_lines;
25379                 }
25380             }
25381         }
25382
25383         #--------------------------------------------------------------
25384         # Vertical Tightness Flags Section 1c:
25385         # Implement the Opening Token Right flag (Type 2)..
25386         # If requested, move an isolated trailing opening token to the end of
25387         # the previous line which ended in a comma.  We could do this
25388         # in sub recombine_breakpoints but that would cause problems
25389         # with -lp formatting.  The problem is that indentation will
25390         # quickly move far to the right in nested expressions.  By
25391         # doing it after indentation has been set, we avoid changes
25392         # to the indentation.  Actual movement of the token takes place
25393         # in sub valign_output_step_B.
25394
25395         # Note added 4 May 2021: the man page suggests that the -otr flags
25396         # are mainly for opening tokens following commas.  But this seems
25397         # to have been generalized long ago to include other situations.
25398         # I checked the coding back to 2012 and it is essentially the same
25399         # as here, so it is best to leave this unchanged for now.
25400         #--------------------------------------------------------------
25401         if (
25402             $opening_token_right{ $tokens_to_go[$ibeg_next] }
25403
25404             # previous line is not opening
25405             # (use -sot to combine with it)
25406             && !$is_opening_token{$token_end}
25407
25408             # previous line ended in one of these
25409             # (add other cases if necessary; '=>' and '.' are not necessary
25410             && !$block_type_to_go[$ibeg_next]
25411
25412             # this is a line with just an opening token
25413             && (   $iend_next == $ibeg_next
25414                 || $iend_next == $ibeg_next + 2
25415                 && $types_to_go[$iend_next] eq '#' )
25416
25417             # Fix for case b1060 when both -baoo and -otr are set:
25418             # to avoid blinking, honor the -baoo flag over the -otr flag.
25419             && $token_end ne '||' && $token_end ne '&&'
25420
25421             # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
25422             && !(
25423                    $token_end eq '='
25424                 && $rOpts_line_up_parentheses
25425                 && $self->[_rlp_object_by_seqno_]
25426                 ->{ $type_sequence_to_go[$ibeg_next] }
25427             )
25428
25429             # looks bad if we align vertically with the wrong container
25430             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
25431           )
25432         {
25433             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25434
25435             $vt_type         = 2;
25436             $vt_closing_flag = $spaces;
25437             $vt_seqno        = $type_sequence_to_go[$ibeg_next];
25438             $vt_valid_flag   = 1;
25439         }
25440
25441         #--------------------------------------------------------------
25442         # Vertical Tightness Flags Section 1d:
25443         # Stacking of opening and closing tokens (Type 2)
25444         #--------------------------------------------------------------
25445         my $stackable;
25446         my $token_beg_next = $tokens_to_go[$ibeg_next];
25447
25448         # patch to make something like 'qw(' behave like an opening paren
25449         # (aran.t)
25450         if ( $types_to_go[$ibeg_next] eq 'q' ) {
25451             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
25452                 $token_beg_next = $1;
25453             }
25454         }
25455
25456         if (   $is_closing_token{$token_end}
25457             && $is_closing_token{$token_beg_next} )
25458         {
25459
25460             # avoid instability of combo -bom and -sct; b1179
25461             my $seq_next = $type_sequence_to_go[$ibeg_next];
25462             $stackable = $stack_closing_token{$token_beg_next}
25463               unless ( $block_type_to_go[$ibeg_next]
25464                 || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
25465         }
25466         elsif ($is_opening_token{$token_end}
25467             && $is_opening_token{$token_beg_next} )
25468         {
25469             $stackable = $stack_opening_token{$token_beg_next}
25470               unless ( $block_type_to_go[$ibeg_next] )
25471               ;    # shouldn't happen; just checking
25472         }
25473
25474         if ($stackable) {
25475
25476             my $is_semicolon_terminated;
25477             if ( $n + 1 == $n_last_line ) {
25478                 my ( $terminal_type, $i_terminal ) =
25479                   terminal_type_i( $ibeg_next, $iend_next );
25480                 $is_semicolon_terminated = $terminal_type eq ';'
25481                   && $nesting_depth_to_go[$iend_next] <
25482                   $nesting_depth_to_go[$ibeg_next];
25483             }
25484
25485             # this must be a line with just an opening token
25486             # or end in a semicolon
25487             if (
25488                 $is_semicolon_terminated
25489                 || (   $iend_next == $ibeg_next
25490                     || $iend_next == $ibeg_next + 2
25491                     && $types_to_go[$iend_next] eq '#' )
25492               )
25493             {
25494                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25495
25496                 $vt_type         = 2;
25497                 $vt_closing_flag = $spaces;
25498                 $vt_seqno        = $type_sequence_to_go[$ibeg_next];
25499                 $vt_valid_flag   = 1;
25500
25501             }
25502         }
25503     }
25504
25505     #--------------------------------------------------------------
25506     # Vertical Tightness Flags Section 2:
25507     # Handle type 3, opening block braces on last line of the batch
25508     # Check for a last line with isolated opening BLOCK curly
25509     #--------------------------------------------------------------
25510     elsif ($rOpts_block_brace_vertical_tightness
25511         && $ibeg eq $iend
25512         && $types_to_go[$iend] eq '{'
25513         && $block_type_to_go[$iend] =~
25514         /$block_brace_vertical_tightness_pattern/ )
25515     {
25516         $vt_type         = 3;
25517         $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
25518         $vt_seqno        = 0;
25519         $vt_valid_flag   = 1;
25520     }
25521
25522     #--------------------------------------------------------------
25523     # Vertical Tightness Flags Section 3:
25524     # Handle type 4, a closing block brace on the last line of the batch Check
25525     # for a last line with isolated closing BLOCK curly
25526     # Patch: added a check for any new closing side comment which the
25527     # -csc option may generate. If it exists, there will be a side comment
25528     # so we cannot combine with a brace on the next line.  This issue
25529     # occurs for the combination -scbb and -csc is used.
25530     #--------------------------------------------------------------
25531     elsif ($rOpts_stack_closing_block_brace
25532         && $ibeg eq $iend
25533         && $block_type_to_go[$iend]
25534         && $types_to_go[$iend] eq '}'
25535         && ( !$closing_side_comment || $n < $n_last_line ) )
25536     {
25537         my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
25538
25539         $vt_type         = 4;
25540         $vt_closing_flag = $spaces;
25541         $vt_seqno        = $type_sequence_to_go[$iend];
25542         $vt_valid_flag   = 1;
25543
25544     }
25545
25546     # get the sequence numbers of the ends of this line
25547     $vt_seqno_beg = $type_sequence_to_go[$ibeg];
25548     if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
25549         $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
25550     }
25551
25552     $vt_seqno_end = $type_sequence_to_go[$iend];
25553     if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
25554         $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
25555     }
25556
25557   RETURN:
25558
25559     my $rvertical_tightness_flags = {
25560         _vt_type         => $vt_type,
25561         _vt_opening_flag => $vt_opening_flag,
25562         _vt_closing_flag => $vt_closing_flag,
25563         _vt_seqno        => $vt_seqno,
25564         _vt_valid_flag   => $vt_valid_flag,
25565         _vt_seqno_beg    => $vt_seqno_beg,
25566         _vt_seqno_end    => $vt_seqno_end,
25567         _vt_min_lines    => $vt_min_lines,
25568         _vt_max_lines    => $vt_max_lines,
25569     };
25570
25571     return ($rvertical_tightness_flags);
25572 }
25573
25574 ##########################################################
25575 # CODE SECTION 14: Code for creating closing side comments
25576 ##########################################################
25577
25578 {    ## begin closure accumulate_csc_text
25579
25580 # These routines are called once per batch when the --closing-side-comments flag
25581 # has been set.
25582
25583     my %block_leading_text;
25584     my %block_opening_line_number;
25585     my $csc_new_statement_ok;
25586     my $csc_last_label;
25587     my %csc_block_label;
25588     my $accumulating_text_for_block;
25589     my $leading_block_text;
25590     my $rleading_block_if_elsif_text;
25591     my $leading_block_text_level;
25592     my $leading_block_text_length_exceeded;
25593     my $leading_block_text_line_length;
25594     my $leading_block_text_line_number;
25595
25596     sub initialize_csc_vars {
25597         %block_leading_text           = ();
25598         %block_opening_line_number    = ();
25599         $csc_new_statement_ok         = 1;
25600         $csc_last_label               = "";
25601         %csc_block_label              = ();
25602         $rleading_block_if_elsif_text = [];
25603         $accumulating_text_for_block  = "";
25604         reset_block_text_accumulator();
25605         return;
25606     }
25607
25608     sub reset_block_text_accumulator {
25609
25610         # save text after 'if' and 'elsif' to append after 'else'
25611         if ($accumulating_text_for_block) {
25612
25613             if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
25614                 push @{$rleading_block_if_elsif_text}, $leading_block_text;
25615             }
25616         }
25617         $accumulating_text_for_block        = "";
25618         $leading_block_text                 = "";
25619         $leading_block_text_level           = 0;
25620         $leading_block_text_length_exceeded = 0;
25621         $leading_block_text_line_number     = 0;
25622         $leading_block_text_line_length     = 0;
25623         return;
25624     }
25625
25626     sub set_block_text_accumulator {
25627         my ( $self, $i ) = @_;
25628         $accumulating_text_for_block = $tokens_to_go[$i];
25629         if ( $accumulating_text_for_block !~ /^els/ ) {
25630             $rleading_block_if_elsif_text = [];
25631         }
25632         $leading_block_text                 = "";
25633         $leading_block_text_level           = $levels_to_go[$i];
25634         $leading_block_text_line_number     = $self->get_output_line_number();
25635         $leading_block_text_length_exceeded = 0;
25636
25637         # this will contain the column number of the last character
25638         # of the closing side comment
25639         $leading_block_text_line_length =
25640           length($csc_last_label) +
25641           length($accumulating_text_for_block) +
25642           length( $rOpts->{'closing-side-comment-prefix'} ) +
25643           $leading_block_text_level * $rOpts_indent_columns + 3;
25644         return;
25645     }
25646
25647     sub accumulate_block_text {
25648         my ( $self, $i ) = @_;
25649
25650         # accumulate leading text for -csc, ignoring any side comments
25651         if (   $accumulating_text_for_block
25652             && !$leading_block_text_length_exceeded
25653             && $types_to_go[$i] ne '#' )
25654         {
25655
25656             my $added_length = $token_lengths_to_go[$i];
25657             $added_length += 1 if $i == 0;
25658             my $new_line_length =
25659               $leading_block_text_line_length + $added_length;
25660
25661             # we can add this text if we don't exceed some limits..
25662             if (
25663
25664                 # we must not have already exceeded the text length limit
25665                 length($leading_block_text) <
25666                 $rOpts_closing_side_comment_maximum_text
25667
25668                 # and either:
25669                 # the new total line length must be below the line length limit
25670                 # or the new length must be below the text length limit
25671                 # (ie, we may allow one token to exceed the text length limit)
25672                 && (
25673                     $new_line_length <
25674                     $maximum_line_length_at_level[$leading_block_text_level]
25675
25676                     || length($leading_block_text) + $added_length <
25677                     $rOpts_closing_side_comment_maximum_text
25678                 )
25679
25680                # UNLESS: we are adding a closing paren before the brace we seek.
25681                # This is an attempt to avoid situations where the ... to be
25682                # added are longer than the omitted right paren, as in:
25683
25684              #   foreach my $item (@a_rather_long_variable_name_here) {
25685              #      &whatever;
25686              #   } ## end foreach my $item (@a_rather_long_variable_name_here...
25687
25688                 || (
25689                     $tokens_to_go[$i] eq ')'
25690                     && (
25691                         (
25692                                $i + 1 <= $max_index_to_go
25693                             && $block_type_to_go[ $i + 1 ] eq
25694                             $accumulating_text_for_block
25695                         )
25696                         || (   $i + 2 <= $max_index_to_go
25697                             && $block_type_to_go[ $i + 2 ] eq
25698                             $accumulating_text_for_block )
25699                     )
25700                 )
25701               )
25702             {
25703
25704                 # add an extra space at each newline
25705                 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
25706                     $leading_block_text .= ' ';
25707                 }
25708
25709                 # add the token text
25710                 $leading_block_text .= $tokens_to_go[$i];
25711                 $leading_block_text_line_length = $new_line_length;
25712             }
25713
25714             # show that text was truncated if necessary
25715             elsif ( $types_to_go[$i] ne 'b' ) {
25716                 $leading_block_text_length_exceeded = 1;
25717                 $leading_block_text .= '...';
25718             }
25719         }
25720         return;
25721     }
25722
25723     sub accumulate_csc_text {
25724
25725         my ($self) = @_;
25726
25727         # called once per output buffer when -csc is used. Accumulates
25728         # the text placed after certain closing block braces.
25729         # Defines and returns the following for this buffer:
25730
25731         my $block_leading_text = "";    # the leading text of the last '}'
25732         my $rblock_leading_if_elsif_text;
25733         my $i_block_leading_text =
25734           -1;    # index of token owning block_leading_text
25735         my $block_line_count    = 100;    # how many lines the block spans
25736         my $terminal_type       = 'b';    # type of last nonblank token
25737         my $i_terminal          = 0;      # index of last nonblank token
25738         my $terminal_block_type = "";
25739
25740         # update most recent statement label
25741         $csc_last_label = "" unless ($csc_last_label);
25742         if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
25743         my $block_label = $csc_last_label;
25744
25745         # Loop over all tokens of this batch
25746         for my $i ( 0 .. $max_index_to_go ) {
25747             my $type       = $types_to_go[$i];
25748             my $block_type = $block_type_to_go[$i];
25749             my $token      = $tokens_to_go[$i];
25750
25751             # remember last nonblank token type
25752             if ( $type ne '#' && $type ne 'b' ) {
25753                 $terminal_type       = $type;
25754                 $terminal_block_type = $block_type;
25755                 $i_terminal          = $i;
25756             }
25757
25758             my $type_sequence = $type_sequence_to_go[$i];
25759             if ( $block_type && $type_sequence ) {
25760
25761                 if ( $token eq '}' ) {
25762
25763                     # restore any leading text saved when we entered this block
25764                     if ( defined( $block_leading_text{$type_sequence} ) ) {
25765                         ( $block_leading_text, $rblock_leading_if_elsif_text )
25766                           = @{ $block_leading_text{$type_sequence} };
25767                         $i_block_leading_text = $i;
25768                         delete $block_leading_text{$type_sequence};
25769                         $rleading_block_if_elsif_text =
25770                           $rblock_leading_if_elsif_text;
25771                     }
25772
25773                     if ( defined( $csc_block_label{$type_sequence} ) ) {
25774                         $block_label = $csc_block_label{$type_sequence};
25775                         delete $csc_block_label{$type_sequence};
25776                     }
25777
25778                     # if we run into a '}' then we probably started accumulating
25779                     # at something like a trailing 'if' clause..no harm done.
25780                     if (   $accumulating_text_for_block
25781                         && $levels_to_go[$i] <= $leading_block_text_level )
25782                     {
25783                         my $lev = $levels_to_go[$i];
25784                         reset_block_text_accumulator();
25785                     }
25786
25787                     if ( defined( $block_opening_line_number{$type_sequence} ) )
25788                     {
25789                         my $output_line_number =
25790                           $self->get_output_line_number();
25791                         $block_line_count =
25792                           $output_line_number -
25793                           $block_opening_line_number{$type_sequence} + 1;
25794                         delete $block_opening_line_number{$type_sequence};
25795                     }
25796                     else {
25797
25798                         # Error: block opening line undefined for this line..
25799                         # This shouldn't be possible, but it is not a
25800                         # significant problem.
25801                     }
25802                 }
25803
25804                 elsif ( $token eq '{' ) {
25805
25806                     my $line_number = $self->get_output_line_number();
25807                     $block_opening_line_number{$type_sequence} = $line_number;
25808
25809                     # set a label for this block, except for
25810                     # a bare block which already has the label
25811                     # A label can only be used on the next {
25812                     if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
25813                     $csc_block_label{$type_sequence} = $csc_last_label;
25814                     $csc_last_label = "";
25815
25816                     if (   $accumulating_text_for_block
25817                         && $levels_to_go[$i] == $leading_block_text_level )
25818                     {
25819
25820                         if ( $accumulating_text_for_block eq $block_type ) {
25821
25822                             # save any leading text before we enter this block
25823                             $block_leading_text{$type_sequence} = [
25824                                 $leading_block_text,
25825                                 $rleading_block_if_elsif_text
25826                             ];
25827                             $block_opening_line_number{$type_sequence} =
25828                               $leading_block_text_line_number;
25829                             reset_block_text_accumulator();
25830                         }
25831                         else {
25832
25833                             # shouldn't happen, but not a serious error.
25834                             # We were accumulating -csc text for block type
25835                             # $accumulating_text_for_block and unexpectedly
25836                             # encountered a '{' for block type $block_type.
25837                         }
25838                     }
25839                 }
25840             }
25841
25842             if (   $type eq 'k'
25843                 && $csc_new_statement_ok
25844                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
25845                 && $token =~ /$closing_side_comment_list_pattern/ )
25846             {
25847                 $self->set_block_text_accumulator($i);
25848             }
25849             else {
25850
25851                 # note: ignoring type 'q' because of tricks being played
25852                 # with 'q' for hanging side comments
25853                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
25854                     $csc_new_statement_ok =
25855                       ( $block_type || $type eq 'J' || $type eq ';' );
25856                 }
25857                 if (   $type eq ';'
25858                     && $accumulating_text_for_block
25859                     && $levels_to_go[$i] == $leading_block_text_level )
25860                 {
25861                     reset_block_text_accumulator();
25862                 }
25863                 else {
25864                     $self->accumulate_block_text($i);
25865                 }
25866             }
25867         }
25868
25869         # Treat an 'else' block specially by adding preceding 'if' and
25870         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
25871         # especially for cuddled-else formatting.
25872         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
25873             $block_leading_text =
25874               $self->make_else_csc_text( $i_terminal, $terminal_block_type,
25875                 $block_leading_text, $rblock_leading_if_elsif_text );
25876         }
25877
25878         # if this line ends in a label then remember it for the next pass
25879         $csc_last_label = "";
25880         if ( $terminal_type eq 'J' ) {
25881             $csc_last_label = $tokens_to_go[$i_terminal];
25882         }
25883
25884         return ( $terminal_type, $i_terminal, $i_block_leading_text,
25885             $block_leading_text, $block_line_count, $block_label );
25886     }
25887
25888     sub make_else_csc_text {
25889
25890         # create additional -csc text for an 'else' and optionally 'elsif',
25891         # depending on the value of switch
25892         #
25893         #  = 0 add 'if' text to trailing else
25894         #  = 1 same as 0 plus:
25895         #      add 'if' to 'elsif's if can fit in line length
25896         #      add last 'elsif' to trailing else if can fit in one line
25897         #  = 2 same as 1 but do not check if exceed line length
25898         #
25899         # $rif_elsif_text = a reference to a list of all previous closing
25900         # side comments created for this if block
25901         #
25902         my ( $self, $i_terminal, $block_type, $block_leading_text,
25903             $rif_elsif_text )
25904           = @_;
25905         my $csc_text = $block_leading_text;
25906
25907         if (   $block_type eq 'elsif'
25908             && $rOpts_closing_side_comment_else_flag == 0 )
25909         {
25910             return $csc_text;
25911         }
25912
25913         my $count = @{$rif_elsif_text};
25914         return $csc_text unless ($count);
25915
25916         my $if_text = '[ if' . $rif_elsif_text->[0];
25917
25918         # always show the leading 'if' text on 'else'
25919         if ( $block_type eq 'else' ) {
25920             $csc_text .= $if_text;
25921         }
25922
25923         # see if that's all
25924         if ( $rOpts_closing_side_comment_else_flag == 0 ) {
25925             return $csc_text;
25926         }
25927
25928         my $last_elsif_text = "";
25929         if ( $count > 1 ) {
25930             $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
25931             if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
25932         }
25933
25934         # tentatively append one more item
25935         my $saved_text = $csc_text;
25936         if ( $block_type eq 'else' ) {
25937             $csc_text .= $last_elsif_text;
25938         }
25939         else {
25940             $csc_text .= ' ' . $if_text;
25941         }
25942
25943         # all done if no length checks requested
25944         if ( $rOpts_closing_side_comment_else_flag == 2 ) {
25945             return $csc_text;
25946         }
25947
25948         # undo it if line length exceeded
25949         my $length =
25950           length($csc_text) +
25951           length($block_type) +
25952           length( $rOpts->{'closing-side-comment-prefix'} ) +
25953           $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
25954         if (
25955             $length > $maximum_line_length_at_level[$leading_block_text_level] )
25956         {
25957             $csc_text = $saved_text;
25958         }
25959         return $csc_text;
25960     }
25961 } ## end closure accumulate_csc_text
25962
25963 {    ## begin closure balance_csc_text
25964
25965     # Some additional routines for handling the --closing-side-comments option
25966
25967     my %matching_char;
25968
25969     BEGIN {
25970         %matching_char = (
25971             '{' => '}',
25972             '(' => ')',
25973             '[' => ']',
25974             '}' => '{',
25975             ')' => '(',
25976             ']' => '[',
25977         );
25978     }
25979
25980     sub balance_csc_text {
25981
25982         # Append characters to balance a closing side comment so that editors
25983         # such as vim can correctly jump through code.
25984         # Simple Example:
25985         #  input  = ## end foreach my $foo ( sort { $b  ...
25986         #  output = ## end foreach my $foo ( sort { $b  ...})
25987
25988         # NOTE: This routine does not currently filter out structures within
25989         # quoted text because the bounce algorithms in text editors do not
25990         # necessarily do this either (a version of vim was checked and
25991         # did not do this).
25992
25993         # Some complex examples which will cause trouble for some editors:
25994         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
25995         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
25996         #  if ( $1 eq '{' ) {
25997         # test file test1/braces.pl has many such examples.
25998
25999         my ($csc) = @_;
26000
26001         # loop to examine characters one-by-one, RIGHT to LEFT and
26002         # build a balancing ending, LEFT to RIGHT.
26003         for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
26004
26005             my $char = substr( $csc, $pos, 1 );
26006
26007             # ignore everything except structural characters
26008             next unless ( $matching_char{$char} );
26009
26010             # pop most recently appended character
26011             my $top = chop($csc);
26012
26013             # push it back plus the mate to the newest character
26014             # unless they balance each other.
26015             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
26016         }
26017
26018         # return the balanced string
26019         return $csc;
26020     }
26021 } ## end closure balance_csc_text
26022
26023 sub add_closing_side_comment {
26024
26025     my ( $self, $ri_first, $ri_last ) = @_;
26026     my $rLL = $self->[_rLL_];
26027
26028     # add closing side comments after closing block braces if -csc used
26029     my ( $closing_side_comment, $cscw_block_comment );
26030
26031     #---------------------------------------------------------------
26032     # Step 1: loop through all tokens of this line to accumulate
26033     # the text needed to create the closing side comments. Also see
26034     # how the line ends.
26035     #---------------------------------------------------------------
26036
26037     my ( $terminal_type, $i_terminal, $i_block_leading_text,
26038         $block_leading_text, $block_line_count, $block_label )
26039       = $self->accumulate_csc_text();
26040
26041     #---------------------------------------------------------------
26042     # Step 2: make the closing side comment if this ends a block
26043     #---------------------------------------------------------------
26044     my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
26045
26046     # if this line might end in a block closure..
26047     if (
26048         $terminal_type eq '}'
26049
26050         # Fix 1 for c091, this is only for blocks
26051         && $block_type_to_go[$i_terminal]
26052
26053         # ..and either
26054         && (
26055
26056             # the block is long enough
26057             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
26058
26059             # or there is an existing comment to check
26060             || (   $have_side_comment
26061                 && $rOpts->{'closing-side-comment-warnings'} )
26062         )
26063
26064         # .. and if this is one of the types of interest
26065         && $block_type_to_go[$i_terminal] =~
26066         /$closing_side_comment_list_pattern/
26067
26068         # .. but not an anonymous sub
26069         # These are not normally of interest, and their closing braces are
26070         # often followed by commas or semicolons anyway.  This also avoids
26071         # possible erratic output due to line numbering inconsistencies
26072         # in the cases where their closing braces terminate a line.
26073         && $block_type_to_go[$i_terminal] ne 'sub'
26074
26075         # ..and the corresponding opening brace must is not in this batch
26076         # (because we do not need to tag one-line blocks, although this
26077         # should also be caught with a positive -csci value)
26078         && $mate_index_to_go[$i_terminal] < 0
26079
26080         # ..and either
26081         && (
26082
26083             # this is the last token (line doesn't have a side comment)
26084             !$have_side_comment
26085
26086             # or the old side comment is a closing side comment
26087             || $tokens_to_go[$max_index_to_go] =~
26088             /$closing_side_comment_prefix_pattern/
26089         )
26090       )
26091     {
26092
26093         # then make the closing side comment text
26094         if ($block_label) { $block_label .= " " }
26095         my $token =
26096 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
26097
26098         # append any extra descriptive text collected above
26099         if ( $i_block_leading_text == $i_terminal ) {
26100             $token .= $block_leading_text;
26101         }
26102
26103         $token = balance_csc_text($token)
26104           if $rOpts->{'closing-side-comments-balanced'};
26105
26106         $token =~ s/\s*$//;    # trim any trailing whitespace
26107
26108         # handle case of existing closing side comment
26109         if ($have_side_comment) {
26110
26111             # warn if requested and tokens differ significantly
26112             if ( $rOpts->{'closing-side-comment-warnings'} ) {
26113                 my $old_csc = $tokens_to_go[$max_index_to_go];
26114                 my $new_csc = $token;
26115                 $new_csc =~ s/\s+//g;            # trim all whitespace
26116                 $old_csc =~ s/\s+//g;            # trim all whitespace
26117                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
26118                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
26119                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
26120                 my $new_trailing_dots = $1;
26121                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
26122
26123                 # Patch to handle multiple closing side comments at
26124                 # else and elsif's.  These have become too complicated
26125                 # to check, so if we see an indication of
26126                 # '[ if' or '[ # elsif', then assume they were made
26127                 # by perltidy.
26128                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
26129                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
26130                 }
26131                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
26132                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
26133                 }
26134
26135                 # if old comment is contained in new comment,
26136                 # only compare the common part.
26137                 if ( length($new_csc) > length($old_csc) ) {
26138                     $new_csc = substr( $new_csc, 0, length($old_csc) );
26139                 }
26140
26141                 # if the new comment is shorter and has been limited,
26142                 # only compare the common part.
26143                 if ( length($new_csc) < length($old_csc)
26144                     && $new_trailing_dots )
26145                 {
26146                     $old_csc = substr( $old_csc, 0, length($new_csc) );
26147                 }
26148
26149                 # any remaining difference?
26150                 if ( $new_csc ne $old_csc ) {
26151
26152                     # just leave the old comment if we are below the threshold
26153                     # for creating side comments
26154                     if ( $block_line_count <
26155                         $rOpts->{'closing-side-comment-interval'} )
26156                     {
26157                         $token = undef;
26158                     }
26159
26160                     # otherwise we'll make a note of it
26161                     else {
26162
26163                         warning(
26164 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
26165                         );
26166
26167                         # save the old side comment in a new trailing block
26168                         # comment
26169                         my $timestamp = "";
26170                         if ( $rOpts->{'timestamp'} ) {
26171                             my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
26172                             $year  += 1900;
26173                             $month += 1;
26174                             $timestamp = "$year-$month-$day";
26175                         }
26176                         $cscw_block_comment =
26177 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
26178 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
26179                     }
26180                 }
26181                 else {
26182
26183                     # No differences.. we can safely delete old comment if we
26184                     # are below the threshold
26185                     if ( $block_line_count <
26186                         $rOpts->{'closing-side-comment-interval'} )
26187                     {
26188                         # Since the line breaks have already been set, we have
26189                         # to remove the token from the _to_go array and also
26190                         # from the line range (this fixes issue c081).
26191                         # Note that we can only get here if -cscw has been set
26192                         # because otherwise the old comment is already deleted.
26193                         $token = undef;
26194                         my $ibeg = $ri_first->[-1];
26195                         my $iend = $ri_last->[-1];
26196                         if (   $iend > $ibeg
26197                             && $iend == $max_index_to_go
26198                             && $types_to_go[$max_index_to_go] eq '#' )
26199                         {
26200                             $iend--;
26201                             $max_index_to_go--;
26202                             if (   $iend > $ibeg
26203                                 && $types_to_go[$max_index_to_go] eq 'b' )
26204                             {
26205                                 $iend--;
26206                                 $max_index_to_go--;
26207                             }
26208                             $ri_last->[-1] = $iend;
26209                         }
26210                     }
26211                 }
26212             }
26213
26214             # switch to the new csc (unless we deleted it!)
26215             if ($token) {
26216
26217                 my $len_tok = length($token); # NOTE: length no longer important
26218                 my $added_len =
26219                   $len_tok - $token_lengths_to_go[$max_index_to_go];
26220
26221                 $tokens_to_go[$max_index_to_go]        = $token;
26222                 $token_lengths_to_go[$max_index_to_go] = $len_tok;
26223                 my $K = $K_to_go[$max_index_to_go];
26224                 $rLL->[$K]->[_TOKEN_]        = $token;
26225                 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
26226                 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
26227             }
26228         }
26229
26230         # handle case of NO existing closing side comment
26231         else {
26232
26233             # To avoid inserting a new token in the token arrays, we
26234             # will just return the new side comment so that it can be
26235             # inserted just before it is needed in the call to the
26236             # vertical aligner.
26237             $closing_side_comment = $token;
26238         }
26239     }
26240     return ( $closing_side_comment, $cscw_block_comment );
26241 }
26242
26243 ############################
26244 # CODE SECTION 15: Summarize
26245 ############################
26246
26247 sub wrapup {
26248
26249     # This is the last routine called when a file is formatted.
26250     # Flush buffer and write any informative messages
26251     my $self = shift;
26252
26253     $self->flush();
26254     my $file_writer_object = $self->[_file_writer_object_];
26255     $file_writer_object->decrement_output_line_number()
26256       ;    # fix up line number since it was incremented
26257     we_are_at_the_last_line();
26258
26259     my $max_depth = $self->[_maximum_BLOCK_level_];
26260     my $at_line   = $self->[_maximum_BLOCK_level_at_line_];
26261     write_logfile_entry(
26262 "Maximum leading structural depth is $max_depth in input at line $at_line\n"
26263     );
26264
26265     my $added_semicolon_count    = $self->[_added_semicolon_count_];
26266     my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
26267     my $last_added_semicolon_at  = $self->[_last_added_semicolon_at_];
26268
26269     if ( $added_semicolon_count > 0 ) {
26270         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
26271         my $what =
26272           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
26273         write_logfile_entry("$added_semicolon_count $what added:\n");
26274         write_logfile_entry(
26275             "  $first at input line $first_added_semicolon_at\n");
26276
26277         if ( $added_semicolon_count > 1 ) {
26278             write_logfile_entry(
26279                 "   Last at input line $last_added_semicolon_at\n");
26280         }
26281         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
26282         write_logfile_entry("\n");
26283     }
26284
26285     my $deleted_semicolon_count    = $self->[_deleted_semicolon_count_];
26286     my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
26287     my $last_deleted_semicolon_at  = $self->[_last_deleted_semicolon_at_];
26288     if ( $deleted_semicolon_count > 0 ) {
26289         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
26290         my $what =
26291           ( $deleted_semicolon_count > 1 )
26292           ? "semicolons were"
26293           : "semicolon was";
26294         write_logfile_entry(
26295             "$deleted_semicolon_count unnecessary $what deleted:\n");
26296         write_logfile_entry(
26297             "  $first at input line $first_deleted_semicolon_at\n");
26298
26299         if ( $deleted_semicolon_count > 1 ) {
26300             write_logfile_entry(
26301                 "   Last at input line $last_deleted_semicolon_at\n");
26302         }
26303         write_logfile_entry("  (Use -ndsm to prevent semicolon deletion)\n");
26304         write_logfile_entry("\n");
26305     }
26306
26307     my $embedded_tab_count    = $self->[_embedded_tab_count_];
26308     my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
26309     my $last_embedded_tab_at  = $self->[_last_embedded_tab_at_];
26310     if ( $embedded_tab_count > 0 ) {
26311         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
26312         my $what =
26313           ( $embedded_tab_count > 1 )
26314           ? "quotes or patterns"
26315           : "quote or pattern";
26316         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
26317         write_logfile_entry(
26318 "This means the display of this script could vary with device or software\n"
26319         );
26320         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
26321
26322         if ( $embedded_tab_count > 1 ) {
26323             write_logfile_entry(
26324                 "   Last at input line $last_embedded_tab_at\n");
26325         }
26326         write_logfile_entry("\n");
26327     }
26328
26329     my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
26330     my $last_tabbing_disagreement  = $self->[_last_tabbing_disagreement_];
26331     my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
26332     my $in_tabbing_disagreement    = $self->[_in_tabbing_disagreement_];
26333
26334     if ($first_tabbing_disagreement) {
26335         write_logfile_entry(
26336 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
26337         );
26338     }
26339
26340     my $first_btd = $self->[_first_brace_tabbing_disagreement_];
26341     if ($first_btd) {
26342         my $msg =
26343 "First closing brace indentation disagreement started at input line $first_btd\n";
26344         write_logfile_entry($msg);
26345
26346         # leave a hint in the .ERR file if there was a brace error
26347         if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26348     }
26349
26350     my $in_btd = $self->[_in_brace_tabbing_disagreement_];
26351     if ($in_btd) {
26352         my $msg =
26353 "Ending with brace indentation disagreement which started at input line $in_btd\n";
26354         write_logfile_entry($msg);
26355
26356         # leave a hint in the .ERR file if there was a brace error
26357         if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26358     }
26359
26360     if ($in_tabbing_disagreement) {
26361         my $msg =
26362 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
26363         write_logfile_entry($msg);
26364     }
26365     else {
26366
26367         if ($last_tabbing_disagreement) {
26368
26369             write_logfile_entry(
26370 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
26371             );
26372         }
26373         else {
26374             write_logfile_entry("No indentation disagreement seen\n");
26375         }
26376     }
26377
26378     if ($first_tabbing_disagreement) {
26379         write_logfile_entry(
26380 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
26381         );
26382     }
26383     write_logfile_entry("\n");
26384
26385     my $vao = $self->[_vertical_aligner_object_];
26386     $vao->report_anything_unusual();
26387
26388     $file_writer_object->report_line_length_errors();
26389
26390     $self->[_converged_] = $file_writer_object->get_convergence_check()
26391       || $rOpts->{'indent-only'};
26392
26393     return;
26394 }
26395
26396 } ## end package Perl::Tidy::Formatter
26397 1;