1 #####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 #####################################################################
9 # CODE SECTION 1: Preliminary code, global definitions and sub new
11 # CODE SECTION 2: Some Basic Utilities
12 # CODE SECTION 3: Check and process options
14 # CODE SECTION 4: Receive lines from the tokenizer
16 # CODE SECTION 5: Pre-process the entire file
17 # sub finish_formatting
18 # CODE SECTION 6: Process line-by-line
19 # sub process_all_lines
20 # CODE SECTION 7: Process lines of code
21 # process_line_of_CODE
22 # CODE SECTION 8: Utilities for setting breakpoints
23 # sub set_forced_breakpoint
24 # CODE SECTION 9: Process batches of code
25 # sub grind_batch_of_CODE
26 # CODE SECTION 10: Code to break long statements
27 # sub break_long_lines
28 # CODE SECTION 11: Code to break long lists
30 # CODE SECTION 12: Code for setting indentation
31 # CODE SECTION 13: Preparing batch of lines for vertical alignment
32 # sub convey_batch_to_vertical_aligner
33 # CODE SECTION 14: Code for creating closing side comments
34 # sub add_closing_side_comment
35 # CODE SECTION 15: Summarize
38 #######################################################################
39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
40 #######################################################################
42 package Perl::Tidy::Formatter;
46 # DEVEL_MODE gets switched on during automated testing for extra checking
47 use constant DEVEL_MODE => 0;
48 use constant EMPTY_STRING => q{};
49 use constant SPACE => q{ };
51 { #<<< A non-indenting brace to contain all lexical variables
54 use English qw( -no_match_vars );
55 our $VERSION = '20220613';
57 # The Tokenizer will be loaded with the Formatter
58 ##use Perl::Tidy::Tokenizer; # for is_keyword()
62 # Catch any undefined sub calls so that we are sure to get
63 # some diagnostic information. This sub should never be called
64 # except for a programming error.
66 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
67 my ( $pkg, $fname, $lno ) = caller();
68 my $my_package = __PACKAGE__;
70 ======================================================================
71 Error detected in package '$my_package', version $VERSION
72 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
73 Called from package: '$pkg'
74 Called from File '$fname' at line '$lno'
75 This error is probably due to a recent programming change
76 ======================================================================
83 $self->_decrement_count();
89 Perl::Tidy::Die($msg);
90 croak "unexpected return from Perl::Tidy::Die";
95 Perl::Tidy::Warn($msg);
102 # This routine is called for errors that really should not occur
103 # except if there has been a bug introduced by a recent program change.
104 # Please add comments at calls to Fault to explain why the call
105 # should not occur, and where to look to fix it.
106 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
107 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
108 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
109 my $input_stream_name = get_input_stream_name();
112 ==============================================================================
113 While operating on input stream with name: '$input_stream_name'
114 A fault was detected at line $line0 of sub '$subroutine1'
116 which was called from line $line1 of sub '$subroutine2'
118 This is probably an error introduced by a recent programming change.
119 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
120 ==============================================================================
123 # We shouldn't get here, but this return is to keep Perl-Critic from
130 Perl::Tidy::Exit($msg);
131 croak "unexpected return from Perl::Tidy::Exit";
134 # Global variables ...
137 #-----------------------------------------------------------------
138 # Section 1: Global variables which are either always constant or
139 # are constant after being configured by user-supplied
140 # parameters. They remain constant as a file is being processed.
141 #-----------------------------------------------------------------
143 # user parameters and shortcuts
146 $rOpts_add_whitespace,
147 $rOpts_blank_lines_after_opening_block,
148 $rOpts_block_brace_tightness,
149 $rOpts_block_brace_vertical_tightness,
150 $rOpts_break_after_labels,
151 $rOpts_break_at_old_attribute_breakpoints,
152 $rOpts_break_at_old_comma_breakpoints,
153 $rOpts_break_at_old_keyword_breakpoints,
154 $rOpts_break_at_old_logical_breakpoints,
155 $rOpts_break_at_old_semicolon_breakpoints,
156 $rOpts_break_at_old_ternary_breakpoints,
157 $rOpts_break_open_compact_parens,
158 $rOpts_closing_side_comments,
159 $rOpts_closing_side_comment_else_flag,
160 $rOpts_closing_side_comment_maximum_text,
161 $rOpts_comma_arrow_breakpoints,
162 $rOpts_continuation_indentation,
163 $rOpts_delete_closing_side_comments,
164 $rOpts_delete_old_whitespace,
165 $rOpts_delete_side_comments,
166 $rOpts_extended_continuation_indentation,
167 $rOpts_format_skipping,
168 $rOpts_freeze_whitespace,
169 $rOpts_function_paren_vertical_alignment,
170 $rOpts_fuzzy_line_length,
171 $rOpts_ignore_old_breakpoints,
172 $rOpts_ignore_side_comment_lengths,
173 $rOpts_indent_closing_brace,
174 $rOpts_indent_columns,
176 $rOpts_keep_interior_semicolons,
177 $rOpts_line_up_parentheses,
178 $rOpts_logical_padding,
179 $rOpts_maximum_consecutive_blank_lines,
180 $rOpts_maximum_fields_per_table,
181 $rOpts_maximum_line_length,
182 $rOpts_one_line_block_semicolons,
183 $rOpts_opening_brace_always_on_right,
184 $rOpts_outdent_keywords,
185 $rOpts_outdent_labels,
186 $rOpts_outdent_long_comments,
187 $rOpts_outdent_long_quotes,
188 $rOpts_outdent_static_block_comments,
190 $rOpts_short_concatenation_item_length,
191 $rOpts_stack_closing_block_brace,
192 $rOpts_static_block_comments,
193 $rOpts_sub_alias_list,
194 $rOpts_tee_block_comments,
196 $rOpts_tee_side_comments,
197 $rOpts_variable_maximum_line_length,
200 $rOpts_valign_side_comments,
201 $rOpts_whitespace_cycle,
202 $rOpts_extended_line_up_parentheses,
204 # Static hashes initialized in a BEGIN block
206 %is_if_unless_and_or_last_next_redo_return,
207 %is_if_elsif_else_unless_while_until_for_foreach,
208 %is_if_unless_while_until_for_foreach,
209 %is_last_next_redo_return,
213 %is_if_unless_elsif_else,
217 %is_block_without_semicolon,
218 %ok_to_add_semicolon_for_block_type,
224 %is_equal_or_fat_comma,
226 %is_opening_sequence_token,
227 %is_closing_sequence_token,
228 %is_container_label_type,
229 %is_die_confess_croak_warn,
234 # Initialized in check_options. These are constants and could
235 # just as well be initialized in a BEGIN block.
237 %is_anon_sub_brace_follower,
238 %is_anon_sub_1_brace_follower,
239 %is_other_brace_follower,
241 # Initialized and re-initialized in sub initialize_grep_and_friends;
242 # These can be modified by grep-alias-list
244 %is_sort_map_grep_eval,
245 %is_sort_map_grep_eval_do,
247 %is_keyword_returning_list,
250 # Initialized in sub initialize_whitespace_hashes;
251 # Some can be modified according to user parameters.
256 # Configured in sub initialize_bond_strength_hashes
257 %right_bond_strength,
260 # Hashes for -kbb=s and -kba=s
261 %keep_break_before_type,
262 %keep_break_after_type,
264 # Initialized in check_options, modified by prepare_cuddled_block_types:
265 %want_one_line_block,
267 # Initialized in sub prepare_cuddled_block_types
268 $rcuddled_block_types,
270 # Initialized and configured in check_options
272 %keyword_paren_inner_tightness,
276 %break_before_container_types,
277 %container_indentation_options,
279 %space_after_keyword,
284 %opening_vertical_tightness,
285 %closing_vertical_tightness,
286 %closing_token_indentation,
287 $some_closing_token_indentation,
289 %opening_token_right,
290 %stack_opening_token,
291 %stack_closing_token,
293 %weld_nested_exclusion_rules,
294 %line_up_parentheses_control_hash,
295 $line_up_parentheses_control_is_lxpl,
297 # regex patterns for text identification.
298 # Most are initialized in a sub make_**_pattern during configuration.
299 # Most can be configured by user parameters.
302 $static_block_comment_pattern,
303 $static_side_comment_pattern,
304 $format_skipping_pattern_begin,
305 $format_skipping_pattern_end,
306 $non_indenting_brace_pattern,
307 $bl_exclusion_pattern,
309 $bli_exclusion_pattern,
311 $block_brace_vertical_tightness_pattern,
312 $blank_lines_after_opening_block_pattern,
313 $blank_lines_before_closing_block_pattern,
314 $keyword_group_list_pattern,
315 $keyword_group_list_comment_pattern,
316 $closing_side_comment_prefix_pattern,
317 $closing_side_comment_list_pattern,
319 # Table to efficiently find indentation and max line length
321 @maximum_line_length_at_level,
322 @maximum_text_length_at_level,
326 # Total number of sequence items in a weld, for quick checks
329 #--------------------------------------------------------
330 # Section 2: Work arrays for the current batch of tokens.
331 #--------------------------------------------------------
333 # These are re-initialized for each batch of code
334 # in sub initialize_batch_variables.
337 @type_sequence_to_go,
338 @forced_breakpoint_to_go,
339 @token_lengths_to_go,
340 @summed_lengths_to_go,
342 @leading_spaces_to_go,
343 @reduced_spaces_to_go,
346 @nesting_depth_to_go,
348 @old_breakpoint_to_go,
356 # forced breakpoint variables associated with each batch of code
357 $forced_breakpoint_count,
358 $forced_breakpoint_undo_count,
359 $index_max_forced_break,
364 # Index names for token variables.
365 # Do not combine with other BEGIN blocks (c101).
369 _CUMULATIVE_LENGTH_ => $i++,
370 _LINE_INDEX_ => $i++,
371 _KNEXT_SEQ_ITEM_ => $i++,
374 _TOKEN_LENGTH_ => $i++,
376 _TYPE_SEQUENCE_ => $i++,
378 # Number of token variables; must be last in list:
385 # Index names for $self variables.
386 # Do not combine with other BEGIN blocks (c101).
390 _rlines_new_ => $i++,
393 _rdepth_of_opening_seqno_ => $i++,
395 _Iss_opening_ => $i++,
396 _Iss_closing_ => $i++,
397 _rblock_type_of_seqno_ => $i++,
398 _ris_asub_block_ => $i++,
399 _ris_sub_block_ => $i++,
400 _K_opening_container_ => $i++,
401 _K_closing_container_ => $i++,
402 _K_opening_ternary_ => $i++,
403 _K_closing_ternary_ => $i++,
404 _K_first_seq_item_ => $i++,
405 _rK_phantom_semicolons_ => $i++,
406 _rtype_count_by_seqno_ => $i++,
407 _ris_function_call_paren_ => $i++,
408 _rlec_count_by_seqno_ => $i++,
409 _ris_broken_container_ => $i++,
410 _ris_permanently_broken_ => $i++,
412 _rhas_broken_list_ => $i++,
413 _rhas_broken_list_with_lec_ => $i++,
414 _rhas_code_block_ => $i++,
415 _rhas_broken_code_block_ => $i++,
416 _rhas_ternary_ => $i++,
417 _ris_excluded_lp_container_ => $i++,
418 _rlp_object_by_seqno_ => $i++,
419 _rwant_reduced_ci_ => $i++,
420 _rno_xci_by_seqno_ => $i++,
421 _rbrace_left_ => $i++,
422 _ris_bli_container_ => $i++,
423 _rparent_of_seqno_ => $i++,
424 _rchildren_of_seqno_ => $i++,
425 _ris_list_by_seqno_ => $i++,
426 _rbreak_container_ => $i++,
427 _rshort_nested_ => $i++,
428 _length_function_ => $i++,
429 _is_encoded_data_ => $i++,
431 _sink_object_ => $i++,
432 _file_writer_object_ => $i++,
433 _vertical_aligner_object_ => $i++,
434 _logger_object_ => $i++,
435 _radjusted_levels_ => $i++,
436 _this_batch_ => $i++,
438 _last_output_short_opening_token_ => $i++,
440 _last_line_leading_type_ => $i++,
441 _last_line_leading_level_ => $i++,
442 _last_last_line_leading_level_ => $i++,
444 _added_semicolon_count_ => $i++,
445 _first_added_semicolon_at_ => $i++,
446 _last_added_semicolon_at_ => $i++,
448 _deleted_semicolon_count_ => $i++,
449 _first_deleted_semicolon_at_ => $i++,
450 _last_deleted_semicolon_at_ => $i++,
452 _embedded_tab_count_ => $i++,
453 _first_embedded_tab_at_ => $i++,
454 _last_embedded_tab_at_ => $i++,
456 _first_tabbing_disagreement_ => $i++,
457 _last_tabbing_disagreement_ => $i++,
458 _tabbing_disagreement_count_ => $i++,
459 _in_tabbing_disagreement_ => $i++,
460 _first_brace_tabbing_disagreement_ => $i++,
461 _in_brace_tabbing_disagreement_ => $i++,
463 _saw_VERSION_in_this_file_ => $i++,
464 _saw_END_or_DATA_ => $i++,
466 _rK_weld_left_ => $i++,
467 _rK_weld_right_ => $i++,
468 _rweld_len_right_at_K_ => $i++,
470 _rspecial_side_comment_type_ => $i++,
472 _rseqno_controlling_my_ci_ => $i++,
473 _ris_seqno_controlling_ci_ => $i++,
474 _save_logfile_ => $i++,
475 _maximum_level_ => $i++,
476 _maximum_level_at_line_ => $i++,
477 _maximum_BLOCK_level_ => $i++,
478 _maximum_BLOCK_level_at_line_ => $i++,
480 _rKrange_code_without_comments_ => $i++,
481 _rbreak_before_Kfirst_ => $i++,
482 _rbreak_after_Klast_ => $i++,
483 _rwant_container_open_ => $i++,
486 _rstarting_multiline_qw_seqno_by_K_ => $i++,
487 _rending_multiline_qw_seqno_by_K_ => $i++,
488 _rKrange_multiline_qw_by_seqno_ => $i++,
489 _rmultiline_qw_has_extra_level_ => $i++,
491 _rcollapsed_length_by_seqno_ => $i++,
492 _rbreak_before_container_by_seqno_ => $i++,
493 _ris_essential_old_breakpoint_ => $i++,
494 _roverride_cab3_ => $i++,
495 _ris_assigned_structure_ => $i++,
497 _rseqno_non_indenting_brace_by_ix_ => $i++,
498 _rreduce_vertical_tightness_by_seqno_ => $i++,
500 _LAST_SELF_INDEX_ => $i - 1,
506 # Index names for batch variables.
507 # Do not combine with other BEGIN blocks (c101).
508 # These are stored in _this_batch_, which is a sub-array of $self.
511 _starting_in_quote_ => $i++,
512 _ending_in_quote_ => $i++,
513 _is_static_block_comment_ => $i++,
516 _do_not_pad_ => $i++,
517 _peak_batch_size_ => $i++,
518 _batch_count_ => $i++,
519 _rix_seqno_controlling_ci_ => $i++,
520 _batch_CODE_type_ => $i++,
521 _ri_starting_one_line_block_ => $i++,
527 # Sequence number assigned to the root of sequence tree.
528 # The minimum of the actual sequences numbers is 4, so we can use 1
529 use constant SEQ_ROOT => 1;
531 # Codes for insertion and deletion of blanks
532 use constant DELETE => 0;
533 use constant STABLE => 1;
534 use constant INSERT => 2;
537 use constant WS_YES => 1;
538 use constant WS_OPTIONAL => 0;
539 use constant WS_NO => -1;
541 # Token bond strengths.
542 use constant NO_BREAK => 10_000;
543 use constant VERY_STRONG => 100;
544 use constant STRONG => 2.1;
545 use constant NOMINAL => 1.1;
546 use constant WEAK => 0.8;
547 use constant VERY_WEAK => 0.55;
549 # values for testing indexes in output array
550 use constant UNDEFINED_INDEX => -1;
552 # Maximum number of little messages; probably need not be changed.
553 use constant MAX_NAG_MESSAGES => 6;
555 # This is the decimal range of printable characters in ASCII. It is used to
556 # make quick preliminary checks before resorting to using a regex.
557 use constant ORD_PRINTABLE_MIN => 33;
558 use constant ORD_PRINTABLE_MAX => 126;
560 # Initialize constant hashes ...
564 = **= += *= &= <<= &&=
569 @is_assignment{@q} = (1) x scalar(@q);
571 @q = qw(is if unless and or err last next redo return);
572 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
574 # These block types may have text between the keyword and opening
575 # curly. Note: 'else' does not, but must be included to allow trailing
576 # if/elsif text to be appended.
577 # patch for SWITCH/CASE: added 'case' and 'when'
578 @q = qw(if elsif else unless while until for foreach case when catch);
579 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
582 @q = qw(if unless while until for foreach);
583 @is_if_unless_while_until_for_foreach{@q} =
586 @q = qw(last next redo return);
587 @is_last_next_redo_return{@q} = (1) x scalar(@q);
589 # Map related block names into a common name to allow vertical alignment
590 # used by sub make_alignment_patterns. Note: this is normally unchanged,
591 # but it contains 'grep' and can be re-initialized in
592 # sub initialize_grep_and_friends in a testing mode.
605 @is_if_unless{@q} = (1) x scalar(@q);
608 @is_if_elsif{@q} = (1) x scalar(@q);
610 @q = qw(if unless elsif);
611 @is_if_unless_elsif{@q} = (1) x scalar(@q);
613 @q = qw(if unless elsif else);
614 @is_if_unless_elsif_else{@q} = (1) x scalar(@q);
617 @is_elsif_else{@q} = (1) x scalar(@q);
620 @is_and_or{@q} = (1) x scalar(@q);
622 # Identify certain operators which often occur in chains.
623 # Note: the minus (-) causes a side effect of padding of the first line in
624 # something like this (by sub set_logical_padding):
625 # Checkbutton => 'Transmission checked',
626 # -variable => \$TRANS
627 # This usually improves appearance so it seems ok.
628 @q = qw(&& || and or : ? . + - * /);
629 @is_chain_operator{@q} = (1) x scalar(@q);
631 # Operators that the user can request break before or after.
632 # Note that some are keywords
633 @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
634 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
635 . : ? && || and or err xor
638 # We can remove semicolons after blocks preceded by these keywords
640 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
641 unless while until for foreach given when default);
642 @is_block_without_semicolon{@q} = (1) x scalar(@q);
644 # We will allow semicolons to be added within these block types
645 # as well as sub and package blocks.
647 # 1. Note that these keywords are omitted:
648 # switch case given when default sort map grep
649 # 2. It is also ok to add for sub and package blocks and a labeled block
650 # 3. But not okay for other perltidy types including:
652 # 4. Test files: blktype.t, blktype1.t, semicolon.t
654 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
655 unless do while until eval for foreach );
656 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
658 # 'L' is token for opening { at hash key
660 @is_opening_type{@q} = (1) x scalar(@q);
662 # 'R' is token for closing } at hash key
664 @is_closing_type{@q} = (1) x scalar(@q);
667 @is_opening_token{@q} = (1) x scalar(@q);
670 @is_closing_token{@q} = (1) x scalar(@q);
673 @is_ternary{@q} = (1) x scalar(@q);
676 @is_opening_sequence_token{@q} = (1) x scalar(@q);
679 @is_closing_sequence_token{@q} = (1) x scalar(@q);
681 # a hash needed by sub break_lists for labeling containers
682 @q = qw( k => && || ? : . );
683 @is_container_label_type{@q} = (1) x scalar(@q);
685 @q = qw( die confess croak warn );
686 @is_die_confess_croak_warn{@q} = (1) x scalar(@q);
688 @q = qw( my our local );
689 @is_my_our_local{@q} = (1) x scalar(@q);
691 # Braces -bbht etc must follow these. Note: experimentation with
692 # including a simple comma shows that it adds little and can lead
693 # to poor formatting in complex lists.
695 @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
699 @is_counted_type{@q} = (1) x scalar(@q);
703 { ## begin closure to count instances
705 # methods to count instances
707 sub get_count { return $_count; }
708 sub _increment_count { return ++$_count }
709 sub _decrement_count { return --$_count }
710 } ## end closure to count instances
714 my ( $class, @args ) = @_;
716 # we are given an object with a write_line() method to take lines
718 sink_object => undef,
719 diagnostics_object => undef,
720 logger_object => undef,
721 length_function => sub { return length( $_[0] ) },
722 is_encoded_data => EMPTY_STRING,
725 my %args = ( %defaults, @args );
727 my $length_function = $args{length_function};
728 my $is_encoded_data = $args{is_encoded_data};
729 my $fh_tee = $args{fh_tee};
730 my $logger_object = $args{logger_object};
731 my $diagnostics_object = $args{diagnostics_object};
733 # we create another object with a get_line() and peek_ahead() method
734 my $sink_object = $args{sink_object};
735 my $file_writer_object =
736 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
738 # initialize closure variables...
739 set_logger_object($logger_object);
740 set_diagnostics_object($diagnostics_object);
741 initialize_lp_vars();
742 initialize_csc_vars();
743 initialize_break_lists();
744 initialize_undo_ci();
745 initialize_process_line_of_CODE();
746 initialize_grind_batch_of_CODE();
747 initialize_final_indentation_adjustment();
748 initialize_postponed_breakpoint();
749 initialize_batch_variables();
750 initialize_write_line();
752 my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
754 file_writer_object => $file_writer_object,
755 logger_object => $logger_object,
756 diagnostics_object => $diagnostics_object,
757 length_function => $length_function
760 write_logfile_entry("\nStarting tokenization pass...\n");
762 if ( $rOpts->{'entab-leading-whitespace'} ) {
764 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
767 elsif ( $rOpts->{'tabs'} ) {
768 write_logfile_entry("Indentation will be with a tab character\n");
772 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
775 # Initialize the $self array reference.
776 # To add an item, first add a constant index in the BEGIN block above.
779 # Basic data structures...
780 $self->[_rlines_] = []; # = ref to array of lines of the file
781 $self->[_rlines_new_] = []; # = ref to array of output lines
783 # 'rLL' = reference to the continuous liner array of all tokens in a file.
784 # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
785 # 'LL' stuck because it is easy to type. The 'rLL' array is updated
786 # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
787 # with '$K' by convention.
789 $self->[_Klimit_] = undef; # = maximum K index for rLL.
791 # Indexes into the rLL list
792 $self->[_K_opening_container_] = {};
793 $self->[_K_closing_container_] = {};
794 $self->[_K_opening_ternary_] = {};
795 $self->[_K_closing_ternary_] = {};
796 $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
798 # Array of phantom semicolons, in case we ever need to undo them
799 $self->[_rK_phantom_semicolons_] = undef;
801 # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
802 # numbers with + or - indicating opening or closing. This list represents
803 # the entire container tree and is invariant under reformatting. It can be
804 # used to quickly travel through the tree. Indexes in the rSS array begin
805 # with '$I' by convention. The 'Iss' arrays give the indexes in this list
806 # of opening and closing sequence numbers.
808 $self->[_Iss_opening_] = [];
809 $self->[_Iss_closing_] = [];
811 # Arrays to help traverse the tree
812 $self->[_rdepth_of_opening_seqno_] = [];
813 $self->[_rblock_type_of_seqno_] = {};
814 $self->[_ris_asub_block_] = {};
815 $self->[_ris_sub_block_] = {};
817 # Mostly list characteristics and processing flags
818 $self->[_rtype_count_by_seqno_] = {};
819 $self->[_ris_function_call_paren_] = {};
820 $self->[_rlec_count_by_seqno_] = {};
821 $self->[_ris_broken_container_] = {};
822 $self->[_ris_permanently_broken_] = {};
823 $self->[_rhas_list_] = {};
824 $self->[_rhas_broken_list_] = {};
825 $self->[_rhas_broken_list_with_lec_] = {};
826 $self->[_rhas_code_block_] = {};
827 $self->[_rhas_broken_code_block_] = {};
828 $self->[_rhas_ternary_] = {};
829 $self->[_ris_excluded_lp_container_] = {};
830 $self->[_rlp_object_by_seqno_] = {};
831 $self->[_rwant_reduced_ci_] = {};
832 $self->[_rno_xci_by_seqno_] = {};
833 $self->[_rbrace_left_] = {};
834 $self->[_ris_bli_container_] = {};
835 $self->[_rparent_of_seqno_] = {};
836 $self->[_rchildren_of_seqno_] = {};
837 $self->[_ris_list_by_seqno_] = {};
839 $self->[_rbreak_container_] = {}; # prevent one-line blocks
840 $self->[_rshort_nested_] = {}; # blocks not forced open
841 $self->[_length_function_] = $length_function;
842 $self->[_is_encoded_data_] = $is_encoded_data;
845 $self->[_fh_tee_] = $fh_tee;
846 $self->[_sink_object_] = $sink_object;
847 $self->[_file_writer_object_] = $file_writer_object;
848 $self->[_vertical_aligner_object_] = $vertical_aligner_object;
849 $self->[_logger_object_] = $logger_object;
851 # Reference to the batch being processed
852 $self->[_this_batch_] = [];
854 # Memory of processed text...
855 $self->[_last_last_line_leading_level_] = 0;
856 $self->[_last_line_leading_level_] = 0;
857 $self->[_last_line_leading_type_] = '#';
858 $self->[_last_output_short_opening_token_] = 0;
859 $self->[_added_semicolon_count_] = 0;
860 $self->[_first_added_semicolon_at_] = 0;
861 $self->[_last_added_semicolon_at_] = 0;
862 $self->[_deleted_semicolon_count_] = 0;
863 $self->[_first_deleted_semicolon_at_] = 0;
864 $self->[_last_deleted_semicolon_at_] = 0;
865 $self->[_embedded_tab_count_] = 0;
866 $self->[_first_embedded_tab_at_] = 0;
867 $self->[_last_embedded_tab_at_] = 0;
868 $self->[_first_tabbing_disagreement_] = 0;
869 $self->[_last_tabbing_disagreement_] = 0;
870 $self->[_tabbing_disagreement_count_] = 0;
871 $self->[_in_tabbing_disagreement_] = 0;
872 $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
873 $self->[_saw_END_or_DATA_] = 0;
874 $self->[_first_brace_tabbing_disagreement_] = undef;
875 $self->[_in_brace_tabbing_disagreement_] = undef;
877 # Hashes related to container welding...
878 $self->[_radjusted_levels_] = [];
880 # Weld data structures
881 $self->[_rK_weld_left_] = {};
882 $self->[_rK_weld_right_] = {};
883 $self->[_rweld_len_right_at_K_] = {};
886 $self->[_rseqno_controlling_my_ci_] = {};
887 $self->[_ris_seqno_controlling_ci_] = {};
889 $self->[_rspecial_side_comment_type_] = {};
890 $self->[_maximum_level_] = 0;
891 $self->[_maximum_level_at_line_] = 0;
892 $self->[_maximum_BLOCK_level_] = 0;
893 $self->[_maximum_BLOCK_level_at_line_] = 0;
895 $self->[_rKrange_code_without_comments_] = [];
896 $self->[_rbreak_before_Kfirst_] = {};
897 $self->[_rbreak_after_Klast_] = {};
898 $self->[_rwant_container_open_] = {};
899 $self->[_converged_] = 0;
902 $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
903 $self->[_rending_multiline_qw_seqno_by_K_] = {};
904 $self->[_rKrange_multiline_qw_by_seqno_] = {};
905 $self->[_rmultiline_qw_has_extra_level_] = {};
907 $self->[_rcollapsed_length_by_seqno_] = {};
908 $self->[_rbreak_before_container_by_seqno_] = {};
909 $self->[_ris_essential_old_breakpoint_] = {};
910 $self->[_roverride_cab3_] = {};
911 $self->[_ris_assigned_structure_] = {};
913 $self->[_rseqno_non_indenting_brace_by_ix_] = {};
914 $self->[_rreduce_vertical_tightness_by_seqno_] = {};
916 # This flag will be updated later by a call to get_save_logfile()
917 $self->[_save_logfile_] = defined($logger_object);
919 # Be sure all variables in $self have been initialized above. To find the
920 # correspondence of index numbers and array names, copy a list to a file
921 # and use the unix 'nl' command to number lines 1..
924 foreach ( 0 .. _LAST_SELF_INDEX_ ) {
925 if ( !exists( $self->[$_] ) ) {
926 push @non_existant, $_;
930 Fault("These indexes in self not initialized: (@non_existant)\n");
936 # Safety check..this is not a class yet
937 if ( _increment_count() > 1 ) {
939 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
944 ######################################
945 # CODE SECTION 2: Some Basic Utilities
946 ######################################
950 # Verify that the rLL array has not been auto-vivified
951 my ( $self, $msg ) = @_;
952 my $rLL = $self->[_rLL_];
953 my $Klimit = $self->[_Klimit_];
955 if ( ( defined($Klimit) && $Klimit != $num - 1 )
956 || ( !defined($Klimit) && $num > 0 ) )
959 # This fault can occur if the array has been accessed for an index
960 # greater than $Klimit, which is the last token index. Just accessing
961 # the array above index $Klimit, not setting a value, can cause @rLL to
962 # increase beyond $Klimit. If this occurs, the problem can be located
963 # by making calls to this routine at different locations in
964 # sub 'finish_formatting'.
965 $Klimit = 'undef' if ( !defined($Klimit) );
966 $msg = EMPTY_STRING unless $msg;
967 Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
970 } ## end sub check_rLL
973 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
975 # Check the keys of a hash:
976 # $rtest = ref to hash to test
977 # $rvalid = ref to hash with valid keys
979 # $msg = a message to write in case of error
980 # $exact_match defines the type of check:
981 # = false: test hash must not have unknown key
982 # = true: test hash must have exactly same keys as known hash
984 grep { !exists $rvalid->{$_} } keys %{$rtest};
986 grep { !exists $rtest->{$_} } keys %{$rvalid};
987 my $error = @unknown_keys;
988 if ($exact_match) { $error ||= @missing_keys }
990 local $LIST_SEPARATOR = ')(';
991 my @expected_keys = sort keys %{$rvalid};
992 @unknown_keys = sort @unknown_keys;
994 ------------------------------------------------------------------------
995 Program error detected checking hash keys
997 Expected keys: (@expected_keys)
998 Unknown key(s): (@unknown_keys)
999 Missing key(s): (@missing_keys)
1000 ------------------------------------------------------------------------
1004 } ## end sub check_keys
1006 sub check_token_array {
1009 # Check for errors in the array of tokens. This is only called
1010 # when the DEVEL_MODE flag is set, so this Fault will only occur
1011 # during code development.
1012 my $rLL = $self->[_rLL_];
1013 foreach my $KK ( 0 .. @{$rLL} - 1 ) {
1014 my $nvars = @{ $rLL->[$KK] };
1015 if ( $nvars != _NVARS ) {
1017 my $type = $rLL->[$KK]->[_TYPE_];
1018 $type = '*' unless defined($type);
1020 # The number of variables per token node is _NVARS and was set when
1021 # the array indexes were generated. So if the number of variables
1022 # is different we have done something wrong, like not store all of
1023 # them in sub 'write_line' when they were received from the
1026 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
1029 foreach my $var ( _TOKEN_, _TYPE_ ) {
1030 if ( !defined( $rLL->[$KK]->[$var] ) ) {
1031 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
1033 # This is a simple check that each token has some basic
1034 # variables. In other words, that there are no holes in the
1035 # array of tokens. Sub 'write_line' pushes tokens into the
1036 # $rLL array, so this should guarantee no gaps.
1037 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1042 } ## end sub check_token_array
1044 { ## begin closure check_line_hashes
1046 # This code checks that no autovivification occurs in the 'line' hash
1048 my %valid_line_hash;
1052 # These keys are defined for each line in the formatter
1053 # Each line must have exactly these quantities
1054 my @valid_line_keys = qw(
1057 _guessed_indentation_level
1064 _square_bracket_depth
1066 _ended_in_blank_token
1075 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1078 sub check_line_hashes {
1080 my $rlines = $self->[_rlines_];
1081 foreach my $rline ( @{$rlines} ) {
1082 my $iline = $rline->{_line_number};
1083 my $line_type = $rline->{_line_type};
1084 check_keys( $rline, \%valid_line_hash,
1085 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1088 } ## end sub check_line_hashes
1089 } ## end closure check_line_hashes
1091 { ## begin closure for logger routines
1094 # Called once per file to initialize the logger object
1095 sub set_logger_object {
1096 $logger_object = shift;
1100 sub get_logger_object {
1101 return $logger_object;
1104 sub get_input_stream_name {
1105 my $input_stream_name = EMPTY_STRING;
1106 if ($logger_object) {
1107 $input_stream_name = $logger_object->get_input_stream_name();
1109 return $input_stream_name;
1112 # interface to Perl::Tidy::Logger routines
1115 if ($logger_object) { $logger_object->warning($msg); }
1121 if ($logger_object) {
1122 $logger_object->complain($msg);
1127 sub write_logfile_entry {
1129 if ($logger_object) {
1130 $logger_object->write_logfile_entry(@msg);
1135 sub get_saw_brace_error {
1136 if ($logger_object) {
1137 return $logger_object->get_saw_brace_error();
1142 sub we_are_at_the_last_line {
1143 if ($logger_object) {
1144 $logger_object->we_are_at_the_last_line();
1149 } ## end closure for logger routines
1151 { ## begin closure for diagnostics routines
1152 my $diagnostics_object;
1154 # Called once per file to initialize the diagnostics object
1155 sub set_diagnostics_object {
1156 $diagnostics_object = shift;
1160 sub write_diagnostics {
1162 if ($diagnostics_object) {
1163 $diagnostics_object->write_diagnostics($msg);
1167 } ## end closure for diagnostics routines
1169 sub get_convergence_check {
1171 return $self->[_converged_];
1174 sub get_added_semicolon_count {
1176 return $self->[_added_semicolon_count_];
1179 sub get_output_line_number {
1181 my $vao = $self->[_vertical_aligner_object_];
1182 return $vao->get_output_line_number();
1185 sub want_blank_line {
1188 my $file_writer_object = $self->[_file_writer_object_];
1189 $file_writer_object->want_blank_line();
1193 sub write_unindented_line {
1194 my ( $self, $line ) = @_;
1196 my $file_writer_object = $self->[_file_writer_object_];
1197 $file_writer_object->write_line($line);
1201 sub consecutive_nonblank_lines {
1203 my $file_writer_object = $self->[_file_writer_object_];
1204 my $vao = $self->[_vertical_aligner_object_];
1205 return $file_writer_object->get_consecutive_nonblank_lines() +
1206 $vao->get_cached_line_count();
1211 my $max = shift @vals;
1212 for (@vals) { $max = $_ > $max ? $_ : $max }
1218 my $min = shift @vals;
1219 for (@vals) { $min = $_ < $min ? $_ : $min }
1225 # given a string containing words separated by whitespace,
1226 # return the list of words
1231 return split( /\s+/, $str );
1232 } ## end sub split_words
1234 ###########################################
1235 # CODE SECTION 3: Check and process options
1236 ###########################################
1240 # This routine is called to check the user-supplied run parameters
1241 # and to configure the control hashes to them.
1244 initialize_whitespace_hashes();
1245 initialize_bond_strength_hashes();
1247 # This function must be called early to get hashes with grep initialized
1248 initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
1250 # Make needed regex patterns for matching text.
1251 # NOTE: sub_matching_patterns must be made first because later patterns use
1252 # them; see RT #133130.
1253 make_sub_matching_pattern();
1254 make_static_block_comment_pattern();
1255 make_static_side_comment_pattern();
1256 make_closing_side_comment_prefix();
1257 make_closing_side_comment_list_pattern();
1258 $format_skipping_pattern_begin =
1259 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1260 $format_skipping_pattern_end =
1261 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1262 make_non_indenting_brace_pattern();
1264 # If closing side comments ARE selected, then we can safely
1265 # delete old closing side comments unless closing side comment
1266 # warnings are requested. This is a good idea because it will
1267 # eliminate any old csc's which fall below the line count threshold.
1268 # We cannot do this if warnings are turned on, though, because we
1269 # might delete some text which has been added. So that must
1270 # be handled when comments are created. And we cannot do this
1271 # with -io because -csc will be skipped altogether.
1272 if ( $rOpts->{'closing-side-comments'} ) {
1273 if ( !$rOpts->{'closing-side-comment-warnings'}
1274 && !$rOpts->{'indent-only'} )
1276 $rOpts->{'delete-closing-side-comments'} = 1;
1280 # If closing side comments ARE NOT selected, but warnings ARE
1281 # selected and we ARE DELETING csc's, then we will pretend to be
1282 # adding with a huge interval. This will force the comments to be
1283 # generated for comparison with the old comments, but not added.
1284 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1285 if ( $rOpts->{'delete-closing-side-comments'} ) {
1286 $rOpts->{'delete-closing-side-comments'} = 0;
1287 $rOpts->{'closing-side-comments'} = 1;
1288 $rOpts->{'closing-side-comment-interval'} = 100_000_000;
1294 make_block_brace_vertical_tightness_pattern();
1295 make_blank_line_pattern();
1296 make_keyword_group_list_pattern();
1298 # Make initial list of desired one line block types
1299 # They will be modified by 'prepare_cuddled_block_types'
1300 # NOTE: this line must come after is_sort_map_grep_eval is
1301 # initialized in sub 'initialize_grep_and_friends'
1302 %want_one_line_block = %is_sort_map_grep_eval;
1304 prepare_cuddled_block_types();
1305 if ( $rOpts->{'dump-cuddled-block-list'} ) {
1306 dump_cuddled_block_list(*STDOUT);
1311 if ( $rOpts->{'extended-line-up-parentheses'} ) {
1312 $rOpts->{'line-up-parentheses'} ||= 1;
1315 if ( $rOpts->{'line-up-parentheses'} ) {
1317 if ( $rOpts->{'indent-only'}
1318 || !$rOpts->{'add-newlines'}
1319 || !$rOpts->{'delete-old-newlines'} )
1322 -----------------------------------------------------------------------
1323 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1325 The -lp indentation logic requires that perltidy be able to coordinate
1326 arbitrarily large numbers of line breakpoints. This isn't possible
1328 -----------------------------------------------------------------------
1330 $rOpts->{'line-up-parentheses'} = 0;
1331 $rOpts->{'extended-line-up-parentheses'} = 0;
1334 if ( $rOpts->{'whitespace-cycle'} ) {
1336 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1338 $rOpts->{'whitespace-cycle'} = 0;
1342 # At present, tabs are not compatible with the line-up-parentheses style
1343 # (it would be possible to entab the total leading whitespace
1344 # just prior to writing the line, if desired).
1345 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1347 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
1349 $rOpts->{'tabs'} = 0;
1352 # Likewise, tabs are not compatible with outdenting..
1353 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1355 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1357 $rOpts->{'tabs'} = 0;
1360 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1362 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
1364 $rOpts->{'tabs'} = 0;
1367 if ( !$rOpts->{'space-for-semicolon'} ) {
1368 $want_left_space{'f'} = -1;
1371 if ( $rOpts->{'space-terminal-semicolon'} ) {
1372 $want_left_space{';'} = 1;
1375 # We should put an upper bound on any -sil=n value. Otherwise enormous
1376 # files could be created by mistake.
1377 for ( $rOpts->{'starting-indentation-level'} ) {
1378 if ( $_ && $_ > 100 ) {
1380 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1386 # Require -msp > 0 to avoid future parsing problems (issue c147)
1387 for ( $rOpts->{'minimum-space-to-comment'} ) {
1388 if ( !$_ || $_ <= 0 ) { $_ = 1 }
1391 # implement outdenting preferences for keywords
1392 %outdent_keyword = ();
1393 my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1395 @okw = qw(next last redo goto return); # defaults
1398 # FUTURE: if not a keyword, assume that it is an identifier
1400 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1401 $outdent_keyword{$_} = 1;
1404 Warn("ignoring '$_' in -okwl list; not a perl keyword");
1408 # setup hash for -kpit option
1409 %keyword_paren_inner_tightness = ();
1410 my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1411 if ( defined($kpit_value) && $kpit_value != 1 ) {
1413 split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1415 @kpit = qw(if elsif unless while until for foreach); # defaults
1418 # we will allow keywords and user-defined identifiers
1420 $keyword_paren_inner_tightness{$_} = $kpit_value;
1424 # implement user whitespace preferences
1425 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1426 @want_left_space{@q} = (1) x scalar(@q);
1429 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1430 @want_right_space{@q} = (1) x scalar(@q);
1433 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1434 @want_left_space{@q} = (-1) x scalar(@q);
1437 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1438 @want_right_space{@q} = (-1) x scalar(@q);
1440 if ( $rOpts->{'dump-want-left-space'} ) {
1441 dump_want_left_space(*STDOUT);
1445 if ( $rOpts->{'dump-want-right-space'} ) {
1446 dump_want_right_space(*STDOUT);
1450 # default keywords for which space is introduced before an opening paren
1451 # (at present, including them messes up vertical alignment)
1452 my @sak = qw(my local our and or xor err eq ne if else elsif until
1453 unless while for foreach return switch case given when catch);
1454 %space_after_keyword = map { $_ => 1 } @sak;
1456 # first remove any or all of these if desired
1457 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
1459 # -nsak='*' selects all the above keywords
1460 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
1461 @space_after_keyword{@q} = (0) x scalar(@q);
1464 # then allow user to add to these defaults
1465 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
1466 @space_after_keyword{@q} = (1) x scalar(@q);
1469 # implement user break preferences
1470 my $break_after = sub {
1472 foreach my $tok (@toks) {
1473 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
1474 my $lbs = $left_bond_strength{$tok};
1475 my $rbs = $right_bond_strength{$tok};
1476 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
1477 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1484 my $break_before = sub {
1486 foreach my $tok (@toks) {
1487 my $lbs = $left_bond_strength{$tok};
1488 my $rbs = $right_bond_strength{$tok};
1489 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
1490 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1497 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
1498 $break_before->(@all_operators)
1499 if ( $rOpts->{'break-before-all-operators'} );
1501 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
1502 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
1504 # make note if breaks are before certain key types
1505 %want_break_before = ();
1506 foreach my $tok ( @all_operators, ',' ) {
1507 $want_break_before{$tok} =
1508 $left_bond_strength{$tok} < $right_bond_strength{$tok};
1511 # Coordinate ?/: breaks, which must be similar
1512 # The small strength 0.01 which is added is 1% of the strength of one
1513 # indentation level and seems to work okay.
1514 if ( !$want_break_before{':'} ) {
1515 $want_break_before{'?'} = $want_break_before{':'};
1516 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
1517 $left_bond_strength{'?'} = NO_BREAK;
1520 # Only make a hash entry for the next parameters if values are defined.
1521 # That allows a quick check to be made later.
1522 %break_before_container_types = ();
1523 for ( $rOpts->{'break-before-hash-brace'} ) {
1524 $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
1526 for ( $rOpts->{'break-before-square-bracket'} ) {
1527 $break_before_container_types{'['} = $_ if $_ && $_ > 0;
1529 for ( $rOpts->{'break-before-paren'} ) {
1530 $break_before_container_types{'('} = $_ if $_ && $_ > 0;
1533 #--------------------------------------------------------------
1534 # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1535 #--------------------------------------------------------------
1536 # The -vmll and -lp parameters do not really work well together.
1537 # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1538 # NOTE: we could make this more precise by looking at any exclusion
1539 # flags for -lp, and allowing -bbx=2 for excluded types.
1540 if ( $rOpts->{'variable-maximum-line-length'}
1541 && $rOpts->{'ignore-old-breakpoints'}
1542 && $rOpts->{'line-up-parentheses'} )
1545 foreach my $key ( keys %break_before_container_types ) {
1546 if ( $break_before_container_types{$key} == 2 ) {
1547 $break_before_container_types{$key} = 1;
1548 push @changed, $key;
1553 # we could write a warning here
1557 #-----------------------------------------------------------
1558 # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1559 #-----------------------------------------------------------
1560 # The -vmll and -lp parameters do not really work well together.
1561 # This is a very crude fix for an unusual parameter combination.
1562 if ( $rOpts->{'variable-maximum-line-length'}
1563 && $rOpts->{'line-up-parentheses'}
1564 && $rOpts->{'continuation-indentation'} < 2 )
1566 $rOpts->{'continuation-indentation'} = 2;
1567 ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1570 %container_indentation_options = ();
1572 [ 'break-before-hash-brace-and-indent', '{' ],
1573 [ 'break-before-square-bracket-and-indent', '[' ],
1574 [ 'break-before-paren-and-indent', '(' ],
1577 my ( $key, $tok ) = @{$pair};
1578 my $opt = $rOpts->{$key};
1579 if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1582 # (1) -lp is not compatible with opt=2, silently set to opt=0
1583 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1585 if ( $rOpts->{'line-up-parentheses'}
1586 || $rOpts->{'indent-columns'} ==
1587 $rOpts->{'continuation-indentation'} )
1592 $container_indentation_options{$tok} = $opt;
1596 # Define here tokens which may follow the closing brace of a do statement
1597 # on the same line, as in:
1598 # } while ( $something);
1599 my @dof = qw(until while unless if ; : );
1601 @is_do_follower{@dof} = (1) x scalar(@dof);
1603 # what can follow a multi-line anonymous sub definition closing curly:
1604 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
1606 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
1608 # what can follow a one-line anonymous sub closing curly:
1609 # one-line anonymous subs also have ']' here...
1610 # see tk3.t and PP.pm
1611 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
1613 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
1615 # What can follow a closing curly of a block
1616 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
1617 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
1618 my @obf = qw# ; : => or and && || ) #;
1620 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
1622 $right_bond_strength{'{'} = WEAK;
1623 $left_bond_strength{'{'} = VERY_STRONG;
1625 # make -l=0 equal to -l=infinite
1626 if ( !$rOpts->{'maximum-line-length'} ) {
1627 $rOpts->{'maximum-line-length'} = 1_000_000;
1630 # make -lbl=0 equal to -lbl=infinite
1631 if ( !$rOpts->{'long-block-line-count'} ) {
1632 $rOpts->{'long-block-line-count'} = 1_000_000;
1635 my $ole = $rOpts->{'output-line-ending'};
1644 # Patch for RT #99514, a memoization issue.
1645 # Normally, the user enters one of 'dos', 'win', etc, and we change the
1646 # value in the options parameter to be the corresponding line ending
1647 # character. But, if we are using memoization, on later passes through
1648 # here the option parameter will already have the desired ending
1649 # character rather than the keyword 'dos', 'win', etc. So
1650 # we must check to see if conversion has already been done and, if so,
1651 # bypass the conversion step.
1652 my %endings_inverted = (
1653 "\015\012" => 'dos',
1654 "\015\012" => 'win',
1659 if ( defined( $endings_inverted{$ole} ) ) {
1661 # we already have valid line ending, nothing more to do
1665 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
1666 my $str = join SPACE, keys %endings;
1668 Unrecognized line ending '$ole'; expecting one of: $str
1671 if ( $rOpts->{'preserve-line-endings'} ) {
1672 Warn("Ignoring -ple; conflicts with -ole\n");
1673 $rOpts->{'preserve-line-endings'} = undef;
1678 # hashes used to simplify setting whitespace
1680 '{' => $rOpts->{'brace-tightness'},
1681 '}' => $rOpts->{'brace-tightness'},
1682 '(' => $rOpts->{'paren-tightness'},
1683 ')' => $rOpts->{'paren-tightness'},
1684 '[' => $rOpts->{'square-bracket-tightness'},
1685 ']' => $rOpts->{'square-bracket-tightness'},
1694 if ( $rOpts->{'ignore-old-breakpoints'} ) {
1697 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1698 $rOpts->{'break-at-old-method-breakpoints'} = 0;
1699 push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1701 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1702 $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1703 push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1705 if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1706 $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1707 push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1709 if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1710 $rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
1711 push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1713 if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1714 $rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
1715 push @conflicts, '--keep-old-breakpoints-after (-kba)';
1719 my $msg = join( "\n ",
1720 " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1726 # Note: These additional parameters are made inactive by -iob.
1727 # They are silently turned off here because they are on by default.
1728 # We would generate unexpected warnings if we issued a warning.
1729 $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
1730 $rOpts->{'break-at-old-logical-breakpoints'} = 0;
1731 $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
1732 $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1735 %keep_break_before_type = ();
1736 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1737 'kbb', \%keep_break_before_type );
1739 %keep_break_after_type = ();
1740 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1741 'kba', \%keep_break_after_type );
1743 #------------------------------------------------------------
1744 # Make global vars for frequently used options for efficiency
1745 #------------------------------------------------------------
1747 $rOpts_add_newlines = $rOpts->{'add-newlines'};
1748 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
1749 $rOpts_blank_lines_after_opening_block =
1750 $rOpts->{'blank-lines-after-opening-block'};
1751 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
1752 $rOpts_block_brace_vertical_tightness =
1753 $rOpts->{'block-brace-vertical-tightness'};
1754 $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
1755 $rOpts_break_at_old_attribute_breakpoints =
1756 $rOpts->{'break-at-old-attribute-breakpoints'};
1757 $rOpts_break_at_old_comma_breakpoints =
1758 $rOpts->{'break-at-old-comma-breakpoints'};
1759 $rOpts_break_at_old_keyword_breakpoints =
1760 $rOpts->{'break-at-old-keyword-breakpoints'};
1761 $rOpts_break_at_old_logical_breakpoints =
1762 $rOpts->{'break-at-old-logical-breakpoints'};
1763 $rOpts_break_at_old_semicolon_breakpoints =
1764 $rOpts->{'break-at-old-semicolon-breakpoints'};
1765 $rOpts_break_at_old_ternary_breakpoints =
1766 $rOpts->{'break-at-old-ternary-breakpoints'};
1767 $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
1768 $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
1769 $rOpts_closing_side_comment_else_flag =
1770 $rOpts->{'closing-side-comment-else-flag'};
1771 $rOpts_closing_side_comment_maximum_text =
1772 $rOpts->{'closing-side-comment-maximum-text'};
1773 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
1774 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
1775 $rOpts_delete_closing_side_comments =
1776 $rOpts->{'delete-closing-side-comments'};
1777 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
1778 $rOpts_extended_continuation_indentation =
1779 $rOpts->{'extended-continuation-indentation'};
1780 $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
1781 $rOpts_format_skipping = $rOpts->{'format-skipping'};
1782 $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
1783 $rOpts_function_paren_vertical_alignment =
1784 $rOpts->{'function-paren-vertical-alignment'};
1785 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
1786 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
1787 $rOpts_ignore_side_comment_lengths =
1788 $rOpts->{'ignore-side-comment-lengths'};
1789 $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
1790 $rOpts_indent_columns = $rOpts->{'indent-columns'};
1791 $rOpts_indent_only = $rOpts->{'indent-only'};
1792 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
1793 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
1794 $rOpts_extended_line_up_parentheses =
1795 $rOpts->{'extended-line-up-parentheses'};
1796 $rOpts_logical_padding = $rOpts->{'logical-padding'};
1797 $rOpts_maximum_consecutive_blank_lines =
1798 $rOpts->{'maximum-consecutive-blank-lines'};
1799 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
1800 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
1801 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
1802 $rOpts_opening_brace_always_on_right =
1803 $rOpts->{'opening-brace-always-on-right'};
1804 $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
1805 $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
1806 $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
1807 $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
1808 $rOpts_outdent_static_block_comments =
1809 $rOpts->{'outdent-static-block-comments'};
1810 $rOpts_recombine = $rOpts->{'recombine'};
1811 $rOpts_short_concatenation_item_length =
1812 $rOpts->{'short-concatenation-item-length'};
1813 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
1814 $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
1815 $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'};
1816 $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
1817 $rOpts_tee_pod = $rOpts->{'tee-pod'};
1818 $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
1819 $rOpts_valign = $rOpts->{'valign'};
1820 $rOpts_valign_code = $rOpts->{'valign-code'};
1821 $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
1822 $rOpts_variable_maximum_line_length =
1823 $rOpts->{'variable-maximum-line-length'};
1825 # Note that both opening and closing tokens can access the opening
1826 # and closing flags of their container types.
1827 %opening_vertical_tightness = (
1828 '(' => $rOpts->{'paren-vertical-tightness'},
1829 '{' => $rOpts->{'brace-vertical-tightness'},
1830 '[' => $rOpts->{'square-bracket-vertical-tightness'},
1831 ')' => $rOpts->{'paren-vertical-tightness'},
1832 '}' => $rOpts->{'brace-vertical-tightness'},
1833 ']' => $rOpts->{'square-bracket-vertical-tightness'},
1836 %closing_vertical_tightness = (
1837 '(' => $rOpts->{'paren-vertical-tightness-closing'},
1838 '{' => $rOpts->{'brace-vertical-tightness-closing'},
1839 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1840 ')' => $rOpts->{'paren-vertical-tightness-closing'},
1841 '}' => $rOpts->{'brace-vertical-tightness-closing'},
1842 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1845 # assume flag for '>' same as ')' for closing qw quotes
1846 %closing_token_indentation = (
1847 ')' => $rOpts->{'closing-paren-indentation'},
1848 '}' => $rOpts->{'closing-brace-indentation'},
1849 ']' => $rOpts->{'closing-square-bracket-indentation'},
1850 '>' => $rOpts->{'closing-paren-indentation'},
1853 # flag indicating if any closing tokens are indented
1854 $some_closing_token_indentation =
1855 $rOpts->{'closing-paren-indentation'}
1856 || $rOpts->{'closing-brace-indentation'}
1857 || $rOpts->{'closing-square-bracket-indentation'}
1858 || $rOpts->{'indent-closing-brace'};
1860 %opening_token_right = (
1861 '(' => $rOpts->{'opening-paren-right'},
1862 '{' => $rOpts->{'opening-hash-brace-right'},
1863 '[' => $rOpts->{'opening-square-bracket-right'},
1866 %stack_opening_token = (
1867 '(' => $rOpts->{'stack-opening-paren'},
1868 '{' => $rOpts->{'stack-opening-hash-brace'},
1869 '[' => $rOpts->{'stack-opening-square-bracket'},
1872 %stack_closing_token = (
1873 ')' => $rOpts->{'stack-closing-paren'},
1874 '}' => $rOpts->{'stack-closing-hash-brace'},
1875 ']' => $rOpts->{'stack-closing-square-bracket'},
1878 # Create a table of maximum line length vs level for later efficient use.
1879 # We will make the tables very long to be sure it will not be exceeded.
1880 # But we have to choose a fixed length. A check will be made at the start
1881 # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
1882 # my standard test problems have indentation levels of about 150, so this
1883 # should be fairly large. If the choice of a maximum level ever becomes
1884 # an issue then these table values could be returned in a sub with a simple
1885 # memoization scheme.
1887 # Also create a table of the maximum spaces available for text due to the
1888 # level only. If a line has continuation indentation, then that space must
1889 # be subtracted from the table value. This table is used for preliminary
1890 # estimates in welding, extended_ci, BBX, and marking short blocks.
1891 use constant LEVEL_TABLE_MAX => 1000;
1894 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1895 my $indent = $level * $rOpts_indent_columns;
1896 $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
1897 $maximum_text_length_at_level[$level] =
1898 $rOpts_maximum_line_length - $indent;
1901 # Correct the maximum_text_length table if the -wc=n flag is used
1902 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
1903 if ($rOpts_whitespace_cycle) {
1904 if ( $rOpts_whitespace_cycle > 0 ) {
1905 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1906 my $level_mod = $level % $rOpts_whitespace_cycle;
1907 my $indent = $level_mod * $rOpts_indent_columns;
1908 $maximum_text_length_at_level[$level] =
1909 $rOpts_maximum_line_length - $indent;
1913 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
1917 # Correct the tables if the -vmll flag is used. These values override the
1919 if ($rOpts_variable_maximum_line_length) {
1920 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1921 $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
1922 $maximum_line_length_at_level[$level] =
1923 $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
1927 # Define two measures of indentation level, alpha and beta, at which some
1928 # formatting features come under stress and need to start shutting down.
1929 # Some combination of the two will be used to shut down different
1930 # formatting features.
1931 # Put a reasonable upper limit on stress level (say 100) in case the
1932 # whitespace-cycle variable is used.
1933 my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
1935 # Find stress_level_alpha, targeted at very short maximum line lengths.
1936 $stress_level_alpha = $stress_level_limit + 1;
1937 foreach my $level_test ( 0 .. $stress_level_limit ) {
1938 my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
1939 my $excess_inside_space =
1941 $rOpts_continuation_indentation -
1942 $rOpts_indent_columns - 8;
1943 if ( $excess_inside_space <= 0 ) {
1944 $stress_level_alpha = $level_test;
1949 # Find stress level beta, a stress level targeted at formatting
1950 # at deep levels near the maximum line length. We start increasing
1951 # from zero and stop at the first level which shows no more space.
1953 # 'const' is a fixed number of spaces for a typical variable.
1954 # Cases b1197-b1204 work ok with const=12 but not with const=8
1956 my $denom = max( 1, $rOpts_indent_columns );
1957 $stress_level_beta = 0;
1958 foreach my $level ( 0 .. $stress_level_limit ) {
1959 my $remaining_cycles = max(
1962 $maximum_text_length_at_level[$level] -
1963 $rOpts_continuation_indentation - $const
1966 last if ( $remaining_cycles <= 3 ); # 2 does not work
1967 $stress_level_beta = $level;
1970 initialize_weld_nested_exclusion_rules();
1972 %line_up_parentheses_control_hash = ();
1973 $line_up_parentheses_control_is_lxpl = 1;
1974 my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
1975 my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
1976 if ( $lpxl && $lpil ) {
1978 You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
1982 $line_up_parentheses_control_is_lxpl = 1;
1983 initialize_line_up_parentheses_control_hash(
1984 $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
1987 $line_up_parentheses_control_is_lxpl = 0;
1988 initialize_line_up_parentheses_control_hash(
1989 $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
1993 } ## end sub check_options
1995 use constant ALIGN_GREP_ALIASES => 0;
1997 sub initialize_grep_and_friends {
2000 # Initialize or re-initialize hashes with 'grep' and grep aliases. This
2001 # must be done after each set of options because new grep aliases may be
2004 # re-initialize the hash ... this is critical!
2005 %is_sort_map_grep = ();
2007 my @q = qw(sort map grep);
2008 @is_sort_map_grep{@q} = (1) x scalar(@q);
2010 # Note that any 'grep-alias-list' string has been preprocessed to be a
2011 # trimmed, space-separated list.
2012 my @grep_aliases = split /\s+/, $str;
2013 @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
2015 ##@q = qw(sort map grep eval);
2016 %is_sort_map_grep_eval = %is_sort_map_grep;
2017 $is_sort_map_grep_eval{'eval'} = 1;
2019 ##@q = qw(sort map grep eval do);
2020 %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
2021 $is_sort_map_grep_eval_do{'do'} = 1;
2023 # These block types can take ci. This is used by the -xci option.
2024 # Note that the 'sub' in this list is an anonymous sub. To be more correct
2025 # we could remove sub and use ASUB pattern to also handle a
2026 # prototype/signature. But that would slow things down and would probably
2028 ##@q = qw( do sub eval sort map grep );
2029 %is_block_with_ci = %is_sort_map_grep_eval_do;
2030 $is_block_with_ci{'sub'} = 1;
2032 %is_keyword_returning_list = ();
2041 push @q, @grep_aliases;
2042 @is_keyword_returning_list{@q} = (1) x scalar(@q);
2044 # This code enables vertical alignment of grep aliases for testing. It has
2045 # not been found to be beneficial, so it is off by default. But it is
2046 # useful for precise testing of the grep alias coding.
2047 if (ALIGN_GREP_ALIASES) {
2059 $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
2063 } ## end sub initialize_grep_and_friends
2065 sub initialize_weld_nested_exclusion_rules {
2066 %weld_nested_exclusion_rules = ();
2068 my $opt_name = 'weld-nested-exclusion-list';
2069 my $str = $rOpts->{$opt_name};
2070 return unless ($str);
2073 return unless ($str);
2075 # There are four container tokens.
2083 # We are parsing an exclusion list for nested welds. The list is a string
2084 # with spaces separating any number of items. Each item consists of three
2085 # pieces of information:
2086 # <optional position> <optional type> <type of container>
2087 # < ^ or . > < k or K > < ( [ { >
2089 # The last character is the required container type and must be one of:
2091 # [ = square bracket
2094 # An optional leading position indicator:
2095 # ^ means the leading token position in the weld
2096 # . means a secondary token position in the weld
2097 # no position indicator means all positions match
2099 # An optional alphanumeric character between the position and container
2100 # token selects to which the rule applies:
2102 # K = any non-keyword
2104 # F = not a function call
2105 # w = function or keyword
2106 # W = not a function or keyword
2107 # no letter means any preceding type matches
2110 # ^( - the weld must not start with a paren
2111 # .( - the second and later tokens may not be parens
2112 # ( - no parens in weld
2113 # ^K( - exclude a leading paren not preceded by a keyword
2114 # .k( - exclude a secondary paren preceded by a keyword
2115 # [ { - exclude all brackets and braces
2117 my @items = split /\s+/, $str;
2120 foreach my $item (@items) {
2121 my $item_save = $item;
2122 my $tok = chop($item);
2123 my $key = $token_keys{$tok};
2124 if ( !defined($key) ) {
2125 $msg1 .= " '$item_save'";
2128 if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
2129 $weld_nested_exclusion_rules{$key} = [];
2131 my $rflags = $weld_nested_exclusion_rules{$key};
2133 # A 'q' means do not weld quotes
2134 if ( $tok eq 'q' ) {
2143 if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
2145 $select = $2 if ($2);
2148 $msg1 .= " '$item_save'";
2154 if ( $pos eq '^' || $pos eq '*' ) {
2155 if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
2158 $rflags->[0] = $select;
2160 if ( $pos eq '.' || $pos eq '*' ) {
2161 if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
2164 $rflags->[1] = $select;
2166 if ($err) { $msg2 .= " '$item_save'"; }
2170 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2176 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
2178 Only the last will be used.
2182 } ## end sub initialize_weld_nested_exclusion_rules
2184 sub initialize_line_up_parentheses_control_hash {
2185 my ( $str, $opt_name ) = @_;
2186 return unless ($str);
2189 return unless ($str);
2191 # The format is space separated items, where each item must consist of a
2192 # string with a token type preceded by an optional text token and followed
2196 # = (flag1)(key)(flag2), where
2201 my @items = split /\s+/, $str;
2204 foreach my $item (@items) {
2205 my $item_save = $item;
2206 my ( $flag1, $key, $flag2 );
2207 if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2213 $msg1 .= " '$item_save'";
2217 if ( !defined($key) ) {
2218 $msg1 .= " '$item_save'";
2222 # Check for valid flag1
2223 if ( !defined($flag1) ) { $flag1 = '*' }
2224 elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2225 $msg1 .= " '$item_save'";
2229 # Check for valid flag2
2230 # 0 or blank: ignore container contents
2231 # 1 all containers with sublists match
2232 # 2 all containers with sublists, code blocks or ternary operators match
2233 # ... this could be extended in the future
2234 if ( !defined($flag2) ) { $flag2 = 0 }
2235 elsif ( $flag2 !~ /^[012]$/ ) {
2236 $msg1 .= " '$item_save'";
2240 if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2241 $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2245 # check for multiple conflicting specifications
2246 my $rflags = $line_up_parentheses_control_hash{$key};
2248 if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2250 $rflags->[0] = $flag1;
2252 if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2254 $rflags->[1] = $flag2;
2256 $msg2 .= " '$item_save'" if ($err);
2261 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2267 Multiple specifications were encountered in the $opt_name at:
2269 Only the last will be used.
2273 # Speedup: we can turn off -lp if it is not actually used
2274 if ($line_up_parentheses_control_is_lxpl) {
2276 foreach my $key (qw# ( { [ #) {
2277 my $rflags = $line_up_parentheses_control_hash{$key};
2278 if ( defined($rflags) ) {
2279 my ( $flag1, $flag2 ) = @{$rflags};
2280 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
2281 if ($flag2) { $all_off = 0; last }
2285 $rOpts->{'line-up-parentheses'} = EMPTY_STRING;
2290 } ## end sub initialize_line_up_parentheses_control_hash
2292 use constant DEBUG_KB => 0;
2294 sub initialize_keep_old_breakpoints {
2295 my ( $str, $short_name, $rkeep_break_hash ) = @_;
2299 my @list = split_words($str);
2300 if ( DEBUG_KB && @list ) {
2301 local $LIST_SEPARATOR = SPACE;
2303 DEBUG_KB entering for '$short_name' with str=$str\n";
2308 # Ignore kbb='(' and '[' and '{': can cause unstable math formatting
2309 # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
2310 if ( $short_name eq 'kbb' ) {
2311 @list = grep { !m/[\(\[\{]/ } @list;
2313 elsif ( $short_name eq 'kba' ) {
2314 @list = grep { !m/[\)\]\}]/ } @list;
2317 # pull out any any leading container code, like f( or *{
2318 # For example: 'f(' becomes flags hash entry '(' => 'f'
2319 foreach my $item (@list) {
2320 if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2327 foreach my $type (@list) {
2328 if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2329 push @unknown_types, $type;
2333 if (@unknown_types) {
2334 my $num = @unknown_types;
2335 local $LIST_SEPARATOR = SPACE;
2337 $num unrecognized token types were input with --$short_name :
2342 @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
2344 foreach my $key ( keys %flags ) {
2345 my $flag = $flags{$key};
2347 if ( length($flag) != 1 ) {
2349 Multiple entries given for '$key' in '$short_name'
2352 elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2354 Unknown flag '$flag' given for '$key' in '$short_name'
2357 elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2359 Unknown flag '$flag' given for '$key' in '$short_name'
2363 $rkeep_break_hash->{$key} = $flag;
2366 if ( DEBUG_KB && @list ) {
2368 local $LIST_SEPARATOR = SPACE;
2371 DEBUG_KB -$short_name flag: $str
2380 } ## end sub initialize_keep_old_breakpoints
2382 sub initialize_whitespace_hashes {
2384 # This is called once before formatting begins to initialize these global
2385 # hashes, which control the use of whitespace around tokens:
2390 # %space_after_keyword
2392 # Many token types are identical to the tokens themselves.
2393 # See the tokenizer for a complete list. Here are some special types:
2395 # f = semicolon in for statement
2398 # Note that :: is excluded since it should be contained in an identifier
2399 # Note that '->' is excluded because it never gets space
2400 # parentheses and brackets are excluded since they are handled specially
2401 # curly braces are included but may be overridden by logic, such as
2404 # NEW_TOKENS: create a whitespace rule here. This can be as
2405 # simple as adding your new letter to @spaces_both_sides, for
2408 my @opening_type = qw< L { ( [ >;
2409 @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
2411 my @closing_type = qw< R } ) ] >;
2412 @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
2414 my @spaces_both_sides = qw#
2415 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2416 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2417 &&= ||= //= <=> A k f w F n C Y U G v
2420 my @spaces_left_side = qw<
2421 t ! ~ m p { \ h pp mm Z j
2423 push( @spaces_left_side, '#' ); # avoids warning message
2425 my @spaces_right_side = qw<
2426 ; } ) ] R J ++ -- **=
2428 push( @spaces_right_side, ',' ); # avoids warning message
2430 %want_left_space = ();
2431 %want_right_space = ();
2432 %binary_ws_rules = ();
2434 # Note that we setting defaults here. Later in processing
2435 # the values of %want_left_space and %want_right_space
2436 # may be overridden by any user settings specified by the
2437 # -wls and -wrs parameters. However the binary_whitespace_rules
2438 # are hardwired and have priority.
2439 @want_left_space{@spaces_both_sides} =
2440 (1) x scalar(@spaces_both_sides);
2441 @want_right_space{@spaces_both_sides} =
2442 (1) x scalar(@spaces_both_sides);
2443 @want_left_space{@spaces_left_side} =
2444 (1) x scalar(@spaces_left_side);
2445 @want_right_space{@spaces_left_side} =
2446 (-1) x scalar(@spaces_left_side);
2447 @want_left_space{@spaces_right_side} =
2448 (-1) x scalar(@spaces_right_side);
2449 @want_right_space{@spaces_right_side} =
2450 (1) x scalar(@spaces_right_side);
2451 $want_left_space{'->'} = WS_NO;
2452 $want_right_space{'->'} = WS_NO;
2453 $want_left_space{'**'} = WS_NO;
2454 $want_right_space{'**'} = WS_NO;
2455 $want_right_space{'CORE::'} = WS_NO;
2457 # These binary_ws_rules are hardwired and have priority over the above
2458 # settings. It would be nice to allow adjustment by the user,
2459 # but it would be complicated to specify.
2461 # hash type information must stay tightly bound
2463 $binary_ws_rules{'i'}{'L'} = WS_NO;
2464 $binary_ws_rules{'i'}{'{'} = WS_YES;
2465 $binary_ws_rules{'k'}{'{'} = WS_YES;
2466 $binary_ws_rules{'U'}{'{'} = WS_YES;
2467 $binary_ws_rules{'i'}{'['} = WS_NO;
2468 $binary_ws_rules{'R'}{'L'} = WS_NO;
2469 $binary_ws_rules{'R'}{'{'} = WS_NO;
2470 $binary_ws_rules{'t'}{'L'} = WS_NO;
2471 $binary_ws_rules{'t'}{'{'} = WS_NO;
2472 $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
2473 $binary_ws_rules{'}'}{'L'} = WS_NO;
2474 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
2475 $binary_ws_rules{'$'}{'L'} = WS_NO;
2476 $binary_ws_rules{'$'}{'{'} = WS_NO;
2477 $binary_ws_rules{'@'}{'L'} = WS_NO;
2478 $binary_ws_rules{'@'}{'{'} = WS_NO;
2479 $binary_ws_rules{'='}{'L'} = WS_YES;
2480 $binary_ws_rules{'J'}{'J'} = WS_YES;
2482 # the following includes ') {'
2483 # as in : if ( xxx ) { yyy }
2484 $binary_ws_rules{']'}{'L'} = WS_NO;
2485 $binary_ws_rules{']'}{'{'} = WS_NO;
2486 $binary_ws_rules{')'}{'{'} = WS_YES;
2487 $binary_ws_rules{')'}{'['} = WS_NO;
2488 $binary_ws_rules{']'}{'['} = WS_NO;
2489 $binary_ws_rules{']'}{'{'} = WS_NO;
2490 $binary_ws_rules{'}'}{'['} = WS_NO;
2491 $binary_ws_rules{'R'}{'['} = WS_NO;
2493 $binary_ws_rules{']'}{'++'} = WS_NO;
2494 $binary_ws_rules{']'}{'--'} = WS_NO;
2495 $binary_ws_rules{')'}{'++'} = WS_NO;
2496 $binary_ws_rules{')'}{'--'} = WS_NO;
2498 $binary_ws_rules{'R'}{'++'} = WS_NO;
2499 $binary_ws_rules{'R'}{'--'} = WS_NO;
2501 $binary_ws_rules{'i'}{'Q'} = WS_YES;
2502 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
2504 $binary_ws_rules{'i'}{'('} = WS_NO;
2506 $binary_ws_rules{'w'}{'('} = WS_NO;
2507 $binary_ws_rules{'w'}{'{'} = WS_YES;
2510 } ## end sub initialize_whitespace_hashes
2512 my %is_special_ws_type;
2518 # The following hash is used to skip over needless if tests.
2519 # Be sure to update it when adding new checks in its block.
2520 my @q = qw(k w i C m - Q);
2522 @is_special_ws_type{@q} = (1) x scalar(@q);
2524 # These hashes replace slower regex tests
2526 @is_wCUG{@q} = (1) x scalar(@q);
2529 @is_wi{@q} = (1) x scalar(@q);
2532 use constant DEBUG_WHITE => 0;
2534 sub set_whitespace_flags {
2536 # This routine is called once per file to set whitespace flags for that
2537 # file. This routine examines each pair of nonblank tokens and sets a flag
2538 # indicating if white space is needed.
2540 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2541 # BEFORE token $j is needed, with the following values:
2543 # WS_NO = -1 do not want a space BEFORE token $j
2544 # WS_OPTIONAL= 0 optional space or $j is a whitespace
2545 # WS_YES = 1 want a space BEFORE token $j
2550 my $rLL = $self->[_rLL_];
2551 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2552 my $jmax = @{$rLL} - 1;
2554 my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
2555 my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2556 my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
2558 my $rwhitespace_flags = [];
2559 my $ris_function_call_paren = {};
2561 return $rwhitespace_flags if ( $jmax < 0 );
2563 my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2565 my ( $rtokh, $token, $type );
2566 my $rtokh_last = $rLL->[0];
2567 my $rtokh_last_last = $rtokh_last;
2569 my $last_type = EMPTY_STRING;
2570 my $last_token = EMPTY_STRING;
2572 my $j_tight_closing_paren = -1;
2574 $rtokh = [ @{ $rLL->[0] } ];
2578 $rtokh->[_TOKEN_] = $token;
2579 $rtokh->[_TYPE_] = $type;
2580 $rtokh->[_TYPE_SEQUENCE_] = EMPTY_STRING;
2581 $rtokh->[_LINE_INDEX_] = 0;
2583 # This is some logic moved to a sub to avoid deep nesting of if stmts
2584 my $ws_in_container = sub {
2588 if ( $j + 1 > $jmax ) { return (WS_NO) }
2590 # Patch to count '-foo' as single token so that
2591 # each of $a{-foo} and $a{foo} and $a{'foo'} do
2592 # not get spaces with default formatting.
2596 && $last_token eq '{'
2597 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
2599 # Patch to count a sign separated from a number as a single token, as
2600 # in the following line. Otherwise, it takes two steps to converge:
2602 if ( ( $type eq 'm' || $type eq 'p' )
2604 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
2605 && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
2606 && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
2611 # $j_next is where a closing token should be if
2612 # the container has a single token
2613 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
2615 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
2619 if ( $j_next > $jmax ) { return WS_NO }
2620 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
2621 my $type_next = $rLL->[$j_next]->[_TYPE_];
2623 # for tightness = 1, if there is just one token
2624 # within the matching pair, we will keep it tight
2626 $tok_next eq $matching_token{$last_token}
2628 # but watch out for this: [ [ ] (misc.t)
2629 && $last_token ne $token
2631 # double diamond is usually spaced
2637 # remember where to put the space for the closing paren
2638 $j_tight_closing_paren = $j_next;
2644 # Local hashes to set spaces around container tokens according to their
2645 # sequence numbers. These are set as keywords are examined.
2646 # They are controlled by the -kpit and -kpitl flags.
2647 my %opening_container_inside_ws;
2648 my %closing_container_inside_ws;
2649 my $set_container_ws_by_keyword = sub {
2651 return unless (%keyword_paren_inner_tightness);
2653 my ( $word, $sequence_number ) = @_;
2655 # We just saw a keyword (or other function name) followed by an opening
2656 # paren. Now check to see if the following paren should have special
2657 # treatment for its inside space. If so we set a hash value using the
2658 # sequence number as key.
2659 if ( $word && $sequence_number ) {
2660 my $tightness = $keyword_paren_inner_tightness{$word};
2661 if ( defined($tightness) && $tightness != 1 ) {
2662 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
2663 $opening_container_inside_ws{$sequence_number} = $ws_flag;
2664 $closing_container_inside_ws{$sequence_number} = $ws_flag;
2670 my ( $ws_1, $ws_2, $ws_3, $ws_4 );
2672 # main loop over all tokens to define the whitespace flags
2673 foreach my $j ( 0 .. $jmax ) {
2675 if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
2676 $rwhitespace_flags->[$j] = WS_OPTIONAL;
2680 $rtokh_last_last = $rtokh_last;
2682 $rtokh_last = $rtokh;
2683 $last_token = $token;
2686 $rtokh = $rLL->[$j];
2687 $token = $rtokh->[_TOKEN_];
2688 $type = $rtokh->[_TYPE_];
2692 #---------------------------------------------------------------
2693 # Whitespace Rules Section 1:
2694 # Handle space on the inside of opening braces.
2695 #---------------------------------------------------------------
2698 if ( $is_opening_type{$last_type} ) {
2700 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2701 my $block_type = $rblock_type_of_seqno->{$seqno};
2702 my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
2703 my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
2705 $j_tight_closing_paren = -1;
2707 # let us keep empty matched braces together: () {} []
2709 if ( $token eq $matching_token{$last_token} ) {
2719 # we're considering the right of an opening brace
2720 # tightness = 0 means always pad inside with space
2721 # tightness = 1 means pad inside if "complex"
2722 # tightness = 2 means never pad inside with space
2725 if ( $last_type eq '{'
2726 && $last_token eq '{'
2727 && $last_block_type )
2729 $tightness = $rOpts_block_brace_tightness;
2731 else { $tightness = $tightness{$last_token} }
2733 #=============================================================
2734 # Patch for test problem <<snippets/fabrice_bug.in>>
2735 # We must always avoid spaces around a bare word beginning
2737 # my $before = ${^PREMATCH};
2738 # Because all of the following cause an error in perl:
2739 # my $before = ${ ^PREMATCH };
2740 # my $before = ${ ^PREMATCH};
2741 # my $before = ${^PREMATCH };
2742 # So if brace tightness flag is -bt=0 we must temporarily reset
2743 # to bt=1. Note that here we must set tightness=1 and not 2 so
2744 # that the closing space is also avoided
2745 # (via the $j_tight_closing_paren flag in coding)
2746 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2748 #=============================================================
2750 if ( $tightness <= 0 ) {
2753 elsif ( $tightness > 1 ) {
2757 $ws = $ws_in_container->($j);
2761 # check for special cases which override the above rules
2762 if ( %opening_container_inside_ws && $last_seqno ) {
2763 my $ws_override = $opening_container_inside_ws{$last_seqno};
2764 if ($ws_override) { $ws = $ws_override }
2767 $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
2770 } ## end setting space flag inside opening tokens
2772 #---------------------------------------------------------------
2773 # Whitespace Rules Section 2:
2774 # Special checks for certain types ...
2775 #---------------------------------------------------------------
2776 # The hash '%is_special_ws_type' significantly speeds up this routine,
2777 # but be sure to update it if a new check is added.
2778 # Currently has types: qw(k w i C m - Q #)
2779 if ( $is_special_ws_type{$type} ) {
2780 if ( $type eq 'i' ) {
2782 # never a space before ->
2783 if ( substr( $token, 0, 2 ) eq '->' ) {
2788 elsif ( $type eq 'k' ) {
2790 # Keywords 'for', 'foreach' are special cases for -kpit since
2791 # the opening paren does not always immediately follow the
2792 # keyword. So we have to search forward for the paren in this
2793 # case. I have limited the search to 10 tokens ahead, just in
2794 # case somebody has a big file and no opening paren. This
2795 # should be enough for all normal code. Added the level check
2797 if ( $is_for_foreach{$token}
2798 && %keyword_paren_inner_tightness
2799 && defined( $keyword_paren_inner_tightness{$token} )
2802 my $level = $rLL->[$j]->[_LEVEL_];
2804 ## NOTE: we might use the KNEXT variable to avoid this loop
2805 ## but profiling shows that little would be saved
2806 foreach my $inc ( 1 .. 9 ) {
2808 last if ( $jp > $jmax );
2809 last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
2810 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
2811 my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
2812 $set_container_ws_by_keyword->( $token, $seqno_p );
2818 # retain any space between '-' and bare word
2819 elsif ( $type eq 'w' || $type eq 'C' ) {
2820 $ws = WS_OPTIONAL if $last_type eq '-';
2822 # never a space before ->
2823 if ( substr( $token, 0, 2 ) eq '->' ) {
2828 # retain any space between '-' and bare word; for example
2829 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2830 # $myhash{USER-NAME}='steve';
2831 elsif ( $type eq 'm' || $type eq '-' ) {
2832 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2835 # always space before side comment
2836 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2838 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
2839 # allow a space between a backslash and single or double quote
2840 # to avoid fooling html formatters
2841 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
2843 if ($rOpts_space_backslash_quote) {
2844 if ( $rOpts_space_backslash_quote == 1 ) {
2847 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2848 else { } # shouldnt happen
2854 } ## end elsif ( $is_special_ws_type{$type} ...
2856 #---------------------------------------------------------------
2857 # Whitespace Rules Section 3:
2858 # Handle space on inside of closing brace pairs.
2859 #---------------------------------------------------------------
2862 elsif ( $is_closing_type{$type} ) {
2864 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2865 if ( $j == $j_tight_closing_paren ) {
2867 $j_tight_closing_paren = -1;
2872 if ( !defined($ws) ) {
2875 my $block_type = $rblock_type_of_seqno->{$seqno};
2876 if ( $type eq '}' && $token eq '}' && $block_type ) {
2877 $tightness = $rOpts_block_brace_tightness;
2879 else { $tightness = $tightness{$token} }
2881 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2885 # check for special cases which override the above rules
2886 if ( %closing_container_inside_ws && $seqno ) {
2887 my $ws_override = $closing_container_inside_ws{$seqno};
2888 if ($ws_override) { $ws = $ws_override }
2891 $ws_4 = $ws_3 = $ws_2 = $ws
2893 } ## end setting space flag inside closing tokens
2895 #---------------------------------------------------------------
2896 # Whitespace Rules Section 4:
2897 #---------------------------------------------------------------
2899 elsif ( $is_opening_type{$type} ) {
2901 if ( $token eq '(' ) {
2903 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2905 # This will have to be tweaked as tokenization changes.
2906 # We usually want a space at '} (', for example:
2907 # <<snippets/space1.in>>
2908 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2911 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2912 # At present, the above & block is marked as type L/R so this
2913 # case won't go through here.
2914 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
2916 # NOTE: some older versions of Perl had occasional problems if
2917 # spaces are introduced between keywords or functions and
2918 # opening parens. So the default is not to do this except is
2919 # certain cases. The current Perl seems to tolerate spaces.
2921 # Space between keyword and '('
2922 elsif ( $last_type eq 'k' ) {
2924 unless ( $rOpts_space_keyword_paren
2925 || $space_after_keyword{$last_token} );
2927 # Set inside space flag if requested
2928 $set_container_ws_by_keyword->( $last_token, $seqno );
2931 # Space between function and '('
2932 # -----------------------------------------------------
2933 # 'w' and 'i' checks for something like:
2934 # myfun( &myfun( ->myfun(
2935 # -----------------------------------------------------
2937 # Note that at this point an identifier may still have a
2938 # leading arrow, but the arrow will be split off during token
2939 # respacing. After that, the token may become a bare word
2940 # without leading arrow. The point is, it is best to mark
2941 # function call parens right here before that happens.
2942 # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
2943 # NOTE: this would be the place to allow spaces between
2944 # repeated parens, like () () (), as in case c017, but I
2945 # decided that would not be a good idea.
2947 ##$last_type =~ /^[wCUG]$/
2948 $is_wCUG{$last_type}
2950 ##$last_type =~ /^[wi]$/
2954 $last_token =~ /^([\&]|->)/
2956 # or -> or & split from bareword by newline (b1337)
2958 $last_token =~ /^\w/
2960 $rtokh_last_last->[_TYPE_] eq '->'
2961 || ( $rtokh_last_last->[_TYPE_] eq 't'
2962 && $rtokh_last_last->[_TOKEN_] =~
2970 $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
2971 $set_container_ws_by_keyword->( $last_token, $seqno );
2972 $ris_function_call_paren->{$seqno} = 1;
2975 # space between something like $i and ( in 'snippets/space2.in'
2976 # for $i ( 0 .. 20 ) {
2977 # FIXME: eventually, type 'i' could be split into multiple
2978 # token types so this can be a hardwired rule.
2979 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2983 # allow constant function followed by '()' to retain no space
2984 elsif ($last_type eq 'C'
2985 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2991 # patch for SWITCH/CASE: make space at ']{' optional
2992 # since the '{' might begin a case or when block
2993 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2997 # keep space between 'sub' and '{' for anonymous sub definition
2998 if ( $type eq '{' ) {
2999 if ( $last_token eq 'sub' ) {
3003 # this is needed to avoid no space in '){'
3004 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
3006 # avoid any space before the brace or bracket in something like
3007 # @opts{'a','b',...}
3008 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
3012 } ## end if ( $is_opening_type{$type} ) {
3014 # always preserver whatever space was used after a possible
3015 # filehandle (except _) or here doc operator
3018 && ( ( $last_type eq 'Z' && $last_token ne '_' )
3019 || $last_type eq 'h' )
3028 if ( !defined($ws) ) {
3030 #---------------------------------------------------------------
3031 # Whitespace Rules Section 4:
3032 # Use the binary rule table.
3033 #---------------------------------------------------------------
3034 $ws = $binary_ws_rules{$last_type}{$type};
3035 $ws_4 = $ws if DEBUG_WHITE;
3037 #---------------------------------------------------------------
3038 # Whitespace Rules Section 5:
3039 # Apply default rules not covered above.
3040 #---------------------------------------------------------------
3042 # If we fall through to here, look at the pre-defined hash tables
3043 # for the two tokens, and:
3044 # if (they are equal) use the common value
3045 # if (either is zero or undef) use the other
3046 # if (either is -1) use it
3060 if ( !defined($ws) ) {
3061 my $wl = $want_left_space{$type};
3062 my $wr = $want_right_space{$last_type};
3063 if ( !defined($wl) ) {
3064 $ws = defined($wr) ? $wr : 0;
3066 elsif ( !defined($wr) ) {
3071 ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3076 # Treat newline as a whitespace. Otherwise, we might combine
3077 # 'Send' and '-recipients' here according to the above rules:
3078 # <<snippets/space3.in>>
3079 # my $msg = new Fax::Send
3080 # -recipients => $to,
3083 && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3088 $rwhitespace_flags->[$j] = $ws;
3091 my $str = substr( $last_token, 0, 15 );
3092 $str .= SPACE x ( 16 - length($str) );
3093 if ( !defined($ws_1) ) { $ws_1 = "*" }
3094 if ( !defined($ws_2) ) { $ws_2 = "*" }
3095 if ( !defined($ws_3) ) { $ws_3 = "*" }
3096 if ( !defined($ws_4) ) { $ws_4 = "*" }
3098 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3100 # reset for next pass
3101 $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3105 if ( $rOpts->{'tight-secret-operators'} ) {
3106 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3108 $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3109 return $rwhitespace_flags;
3111 } ## end sub set_whitespace_flags
3113 sub dump_want_left_space {
3115 local $LIST_SEPARATOR = "\n";
3117 These values are the main control of whitespace to the left of a token type;
3118 They may be altered with the -wls parameter.
3119 For a list of token types, use perltidy --dump-token-types (-dtt)
3120 1 means the token wants a space to its left
3121 -1 means the token does not want a space to its left
3122 ------------------------------------------------------------------------
3124 foreach my $key ( sort keys %want_left_space ) {
3125 $fh->print("$key\t$want_left_space{$key}\n");
3128 } ## end sub dump_want_left_space
3130 sub dump_want_right_space {
3132 local $LIST_SEPARATOR = "\n";
3134 These values are the main control of whitespace to the right of a token type;
3135 They may be altered with the -wrs parameter.
3136 For a list of token types, use perltidy --dump-token-types (-dtt)
3137 1 means the token wants a space to its right
3138 -1 means the token does not want a space to its right
3139 ------------------------------------------------------------------------
3141 foreach my $key ( sort keys %want_right_space ) {
3142 $fh->print("$key\t$want_right_space{$key}\n");
3145 } ## end sub dump_want_right_space
3147 { ## begin closure is_essential_whitespace
3149 my %is_sort_grep_map;
3153 my %essential_whitespace_filter_l1;
3154 my %essential_whitespace_filter_r1;
3155 my %essential_whitespace_filter_l2;
3156 my %essential_whitespace_filter_r2;
3157 my %is_type_with_space_before_bareword;
3158 my %is_special_variable_char;
3164 # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3165 # grep aliases on purpose, since here we are looking parens, not braces
3166 @q = qw(sort grep map);
3167 @is_sort_grep_map{@q} = (1) x scalar(@q);
3169 @q = qw(for foreach);
3170 @is_for_foreach{@q} = (1) x scalar(@q);
3173 .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
3174 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3176 @is_digraph{@q} = (1) x scalar(@q);
3178 @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3179 @is_trigraph{@q} = (1) x scalar(@q);
3181 # These are used as a speedup filters for sub is_essential_whitespace.
3184 # These left side token types USUALLY do not require a space:
3185 @q = qw( ; { } [ ] L R );
3189 @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3191 # BUT some might if followed by these right token types
3192 @q = qw( pp mm << <<= h );
3193 @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3196 # These right side filters usually do not require a space
3200 @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3202 # BUT some might if followed by these left token types
3204 @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3206 # Keep a space between certain types and any bareword:
3207 # Q: keep a space between a quote and a bareword to prevent the
3208 # bareword from becoming a quote modifier.
3209 # &: do not remove space between an '&' and a bare word because
3210 # it may turn into a function evaluation, like here
3211 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3212 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3214 @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3216 # These are the only characters which can (currently) form special
3217 # variables, like $^W: (issue c066, c068).
3219 qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3220 @{is_special_variable_char}{@q} = (1) x scalar(@q);
3224 sub is_essential_whitespace {
3226 # Essential whitespace means whitespace which cannot be safely deleted
3227 # without risking the introduction of a syntax error.
3228 # We are given three tokens and their types:
3229 # ($tokenl, $typel) is the token to the left of the space in question
3230 # ($tokenr, $typer) is the token to the right of the space in question
3231 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3233 # Note1: This routine should almost never need to be changed. It is
3234 # for avoiding syntax problems rather than for formatting.
3236 # Note2: The -mangle option causes large numbers of calls to this
3237 # routine and therefore is a good test. So if a change is made, be sure
3238 # to use nytprof to profile with both old and reviesed coding using the
3239 # -mangle option and check differences.
3241 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3243 # This is potentially a very slow routine but the following quick
3244 # filters typically catch and handle over 90% of the calls.
3246 # Filter 1: usually no space required after common types ; , [ ] { } ( )
3248 if ( $essential_whitespace_filter_l1{$typel}
3249 && !$essential_whitespace_filter_r1{$typer} );
3251 # Filter 2: usually no space before common types ; ,
3253 if ( $essential_whitespace_filter_r2{$typer}
3254 && !$essential_whitespace_filter_l2{$typel} );
3256 # Filter 3: Handle side comments: a space is only essential if the left
3257 # token ends in '$' For example, we do not want to create $#foo below:
3266 # Also, I prefer not to put a ? and # together because ? used to be
3267 # a pattern delimiter and spacing was used if guessing was needed.
3269 if ( $typer eq '#' ) {
3273 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3277 my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3278 my $tokenr_is_open_paren = $tokenr eq '(';
3279 my $token_joined = $tokenl . $tokenr;
3280 my $tokenl_is_dash = $tokenl eq '-';
3284 # never combine two bare words or numbers
3285 # examples: and ::ok(1)
3287 # for bla::bla:: abc
3288 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3289 # $input eq"quit" to make $inputeq"quit"
3290 # my $size=-s::SINK if $file; <==OK but we won't do it
3291 # don't join something like: for bla::bla:: abc
3292 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3293 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3294 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3296 # do not combine a number with a concatenation dot
3297 # example: pom.caputo:
3298 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3299 || $typel eq 'n' && $tokenr eq '.'
3300 || $typer eq 'n' && $tokenl eq '.'
3302 # cases of a space before a bareword...
3304 $tokenr_is_bareword && (
3306 # do not join a minus with a bare word, because you might form
3307 # a file test operator. Example from Complex.pm:
3308 # if (CORE::abs($z - i) < $eps);
3309 # "z-i" would be taken as a file test.
3310 $tokenl_is_dash && length($tokenr) == 1
3312 # and something like this could become ambiguous without space
3314 # use constant III=>1;
3318 || $tokenl_is_dash && $typer =~ /^[wC]$/
3320 # keep space between types Q & and a bareword
3321 || $is_type_with_space_before_bareword{$typel}
3323 # +-: binary plus and minus before a bareword could get
3324 # converted into unary plus and minus on next pass through the
3325 # tokenizer. This can lead to blinkers: cases b660 b670 b780
3326 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3327 # follows an operator
3328 || ( ( $typel eq '+' || $typel eq '-' )
3329 && $typell !~ /^[niC\)\}\]R]$/ )
3331 # keep a space between a token ending in '$' and any word;
3332 # this caused trouble: "die @$ if $@"
3333 || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3335 # don't combine $$ or $# with any alphanumeric
3336 # (testfile mangle.t with --mangle)
3341 ) ## end $tokenr_is_bareword
3344 # '= -' should not become =- or you will get a warning
3346 # || ($tokenr eq '-')
3348 # do not join a bare word with a minus, like between 'Send' and
3349 # '-recipients' here <<snippets/space3.in>>
3350 # my $msg = new Fax::Send
3351 # -recipients => $to,
3353 # This is the safest thing to do. If we had the token to the right of
3354 # the minus we could do a better check.
3356 # And do not combine a bareword and a quote, like this:
3357 # oops "Your login, $Bad_Login, is not valid";
3358 # It can cause a syntax error if oops is a sub
3359 || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3361 # perl is very fussy about spaces before <<
3362 || substr( $tokenr, 0, 2 ) eq '<<'
3364 # avoid combining tokens to create new meanings. Example:
3365 # $a+ +$b must not become $a++$b
3366 || ( $is_digraph{$token_joined} )
3367 || $is_trigraph{$token_joined}
3369 # another example: do not combine these two &'s:
3370 # allow_options & &OPT_EXECCGI
3371 || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3373 # retain any space after possible filehandle
3374 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3377 # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3378 # space after type Y. Otherwise, it will get parsed as type 'Z' later
3379 # and any space would have to be added back manually if desired.
3382 # Perl is sensitive to whitespace after the + here:
3383 # $b = xvals $a + 0.1 * yvals $a;
3384 || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3387 $tokenr_is_open_paren && (
3389 # keep paren separate in 'use Foo::Bar ()'
3390 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3392 # OLD: keep any space between filehandle and paren:
3393 # file mangle.t with --mangle:
3394 # NEW: this test is no longer necessary here (moved above)
3397 # must have space between grep and left paren; "grep(" will fail
3398 || $is_sort_grep_map{$tokenl}
3400 # don't stick numbers next to left parens, as in:
3401 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3404 ) ## end $tokenr_is_open_paren
3406 # retain any space after here doc operator ( hereerr.t)
3409 # be careful with a space around ++ and --, to avoid ambiguity as to
3410 # which token it applies
3411 || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3412 || ( $typel eq '++' || $typel eq '--' )
3413 && $tokenr !~ /^[\;\}\)\]]/
3415 # need space after foreach my; for example, this will fail in
3416 # older versions of Perl:
3417 # foreach my$ft(@filetypes)...
3421 && substr( $tokenr, 0, 1 ) eq '$'
3424 && $is_for_foreach{$tokenll}
3427 # Keep space after like $^ if needed to avoid forming a different
3428 # special variable (issue c068). For example:
3429 # my $aa = $^ ? "none" : "ok";
3431 && length($tokenl) == 2
3432 && substr( $tokenl, 1, 1 ) eq '^'
3433 && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3435 # We must be sure that a space between a ? and a quoted string
3436 # remains if the space before the ? remains. [Loca.pm, lockarea]
3438 # $b=join $comma ? ',' : ':', @_; # ok
3439 # $b=join $comma?',' : ':', @_; # ok!
3440 # $b=join $comma ?',' : ':', @_; # error!
3441 # Not really required:
3442 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
3444 # Space stacked labels...
3445 # Not really required: Perl seems to accept non-spaced labels.
3446 ## || $typel eq 'J' && $typer eq 'J'
3448 ; # the value of this long logic sequence is the result we want
3450 } ## end sub is_essential_whitespace
3451 } ## end closure is_essential_whitespace
3453 { ## begin closure new_secret_operator_whitespace
3455 my %secret_operators;
3456 my %is_leading_secret_token;
3460 # token lists for perl secret operators as compiled by Philippe Bruhat
3461 # at: https://metacpan.org/module/perlsecret
3462 %secret_operators = (
3463 'Goatse' => [qw#= ( ) =#], #=( )=
3464 'Venus1' => [qw#0 +#], # 0+
3465 'Venus2' => [qw#+ 0#], # +0
3466 'Enterprise' => [qw#) x ! !#], # ()x!!
3467 'Kite1' => [qw#~ ~ <>#], # ~~<>
3468 'Kite2' => [qw#~~ <>#], # ~~<>
3469 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
3470 'Bang bang ' => [qw#! !#], # !!
3473 # The following operators and constants are not included because they
3474 # are normally kept tight by perltidy:
3478 # Make a lookup table indexed by the first token of each operator:
3479 # first token => [list, list, ...]
3480 foreach my $value ( values(%secret_operators) ) {
3481 my $tok = $value->[0];
3482 push @{ $is_leading_secret_token{$tok} }, $value;
3486 sub new_secret_operator_whitespace {
3488 my ( $rlong_array, $rwhitespace_flags ) = @_;
3490 # Loop over all tokens in this line
3491 my ( $token, $type );
3492 my $jmax = @{$rlong_array} - 1;
3493 foreach my $j ( 0 .. $jmax ) {
3495 $token = $rlong_array->[$j]->[_TOKEN_];
3496 $type = $rlong_array->[$j]->[_TYPE_];
3498 # Skip unless this token might start a secret operator
3499 next if ( $type eq 'b' );
3500 next unless ( $is_leading_secret_token{$token} );
3502 # Loop over all secret operators with this leading token
3503 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
3505 foreach my $tok ( @{$rpattern} ) {
3510 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
3512 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
3521 # set flags to prevent spaces within this operator
3522 foreach my $jj ( $j + 1 .. $jend ) {
3523 $rwhitespace_flags->[$jj] = WS_NO;
3528 } ## End Loop over all operators
3529 } ## End loop over all tokens
3532 } ## end closure new_secret_operator_whitespace
3534 { ## begin closure set_bond_strengths
3536 # These routines and variables are involved in deciding where to break very
3539 my %is_good_keyword_breakpoint;
3541 my %is_container_token;
3543 my %binary_bond_strength_nospace;
3544 my %binary_bond_strength;
3553 sub initialize_bond_strength_hashes {
3556 @q = qw(if unless while until for foreach);
3557 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
3559 @q = qw(lt gt le ge);
3560 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3562 @q = qw/ ( [ { } ] ) /;
3563 @is_container_token{@q} = (1) x scalar(@q);
3565 # The decision about where to break a line depends upon a "bond
3566 # strength" between tokens. The LOWER the bond strength, the MORE
3567 # likely a break. A bond strength may be any value but to simplify
3568 # things there are several pre-defined strength levels:
3570 # NO_BREAK => 10000;
3571 # VERY_STRONG => 100;
3575 # VERY_WEAK => 0.55;
3577 # The strength values are based on trial-and-error, and need to be
3578 # tweaked occasionally to get desired results. Some comments:
3580 # 1. Only relative strengths are important. small differences
3581 # in strengths can make big formatting differences.
3582 # 2. Each indentation level adds one unit of bond strength.
3583 # 3. A value of NO_BREAK makes an unbreakable bond
3584 # 4. A value of VERY_WEAK is the strength of a ','
3585 # 5. Values below NOMINAL are considered ok break points.
3586 # 6. Values above NOMINAL are considered poor break points.
3588 # The bond strengths should roughly follow precedence order where
3589 # possible. If you make changes, please check the results very
3590 # carefully on a variety of scripts. Testing with the -extrude
3591 # options is particularly helpful in exercising all of the rules.
3593 # Wherever possible, bond strengths are defined in the following
3594 # tables. There are two main stages to setting bond strengths and
3595 # two types of tables:
3597 # The first stage involves looking at each token individually and
3598 # defining left and right bond strengths, according to if we want
3599 # to break to the left or right side, and how good a break point it
3600 # is. For example tokens like =, ||, && make good break points and
3601 # will have low strengths, but one might want to break on either
3602 # side to put them at the end of one line or beginning of the next.
3604 # The second stage involves looking at certain pairs of tokens and
3605 # defining a bond strength for that particular pair. This second
3606 # stage has priority.
3608 #---------------------------------------------------------------
3609 # Bond Strength BEGIN Section 1.
3610 # Set left and right bond strengths of individual tokens.
3611 #---------------------------------------------------------------
3613 # NOTE: NO_BREAK's set in this section first are HINTS which will
3614 # probably not be honored. Essential NO_BREAKS's should be set in
3615 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
3616 # of this subroutine.
3618 # Note that we are setting defaults in this section. The user
3619 # cannot change bond strengths but can cause the left and right
3620 # bond strengths of any token type to be swapped through the use of
3621 # the -wba and -wbb flags. In this way the user can determine if a
3622 # breakpoint token should appear at the end of one line or the
3623 # beginning of the next line.
3625 %right_bond_strength = ();
3626 %left_bond_strength = ();
3627 %binary_bond_strength_nospace = ();
3628 %binary_bond_strength = ();
3632 # The hash keys in this section are token types, plus the text of
3633 # certain keywords like 'or', 'and'.
3635 # no break around possible filehandle
3636 $left_bond_strength{'Z'} = NO_BREAK;
3637 $right_bond_strength{'Z'} = NO_BREAK;
3639 # never put a bare word on a new line:
3640 # example print (STDERR, "bla"); will fail with break after (
3641 $left_bond_strength{'w'} = NO_BREAK;
3643 # blanks always have infinite strength to force breaks after
3645 $right_bond_strength{'b'} = NO_BREAK;
3647 # try not to break on exponentiation
3648 @q = qw# ** .. ... <=> #;
3649 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3650 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3652 # The comma-arrow has very low precedence but not a good break point
3653 $left_bond_strength{'=>'} = NO_BREAK;
3654 $right_bond_strength{'=>'} = NOMINAL;
3656 # ok to break after label
3657 $left_bond_strength{'J'} = NO_BREAK;
3658 $right_bond_strength{'J'} = NOMINAL;
3659 $left_bond_strength{'j'} = STRONG;
3660 $right_bond_strength{'j'} = STRONG;
3661 $left_bond_strength{'A'} = STRONG;
3662 $right_bond_strength{'A'} = STRONG;
3664 $left_bond_strength{'->'} = STRONG;
3665 $right_bond_strength{'->'} = VERY_STRONG;
3667 $left_bond_strength{'CORE::'} = NOMINAL;
3668 $right_bond_strength{'CORE::'} = NO_BREAK;
3670 # breaking AFTER modulus operator is ok:
3672 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3673 @right_bond_strength{@q} =
3674 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
3676 # Break AFTER math operators * and /
3678 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3679 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3681 # Break AFTER weakest math operators + and -
3682 # Make them weaker than * but a bit stronger than '.'
3684 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3685 @right_bond_strength{@q} =
3686 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
3688 # Define left strength of unary plus and minus (fixes case b511)
3689 $left_bond_strength{p} = $left_bond_strength{'+'};
3690 $left_bond_strength{m} = $left_bond_strength{'-'};
3692 # And make right strength of unary plus and minus very high.
3693 # Fixes cases b670 b790
3694 $right_bond_strength{p} = NO_BREAK;
3695 $right_bond_strength{m} = NO_BREAK;
3697 # breaking BEFORE these is just ok:
3699 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3700 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
3702 # breaking before the string concatenation operator seems best
3703 # because it can be hard to see at the end of a line
3704 $right_bond_strength{'.'} = STRONG;
3705 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
3708 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3709 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3711 # make these a little weaker than nominal so that they get
3712 # favored for end-of-line characters
3713 @q = qw< != == =~ !~ ~~ !~~ >;
3714 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3715 @right_bond_strength{@q} =
3716 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
3719 @q = qw# < > | & >= <= #;
3720 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
3721 @right_bond_strength{@q} =
3722 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
3724 # breaking either before or after a quote is ok
3725 # but bias for breaking before a quote
3726 $left_bond_strength{'Q'} = NOMINAL;
3727 $right_bond_strength{'Q'} = NOMINAL + 0.02;
3728 $left_bond_strength{'q'} = NOMINAL;
3729 $right_bond_strength{'q'} = NOMINAL;
3731 # starting a line with a keyword is usually ok
3732 $left_bond_strength{'k'} = NOMINAL;
3734 # we usually want to bond a keyword strongly to what immediately
3735 # follows, rather than leaving it stranded at the end of a line
3736 $right_bond_strength{'k'} = STRONG;
3738 $left_bond_strength{'G'} = NOMINAL;
3739 $right_bond_strength{'G'} = STRONG;
3741 # assignment operators
3743 = **= += *= &= <<= &&=
3744 -= /= |= >>= ||= //=
3749 # Default is to break AFTER various assignment operators
3750 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3751 @right_bond_strength{@q} =
3752 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
3754 # Default is to break BEFORE '&&' and '||' and '//'
3755 # set strength of '||' to same as '=' so that chains like
3756 # $a = $b || $c || $d will break before the first '||'
3757 $right_bond_strength{'||'} = NOMINAL;
3758 $left_bond_strength{'||'} = $right_bond_strength{'='};
3760 # same thing for '//'
3761 $right_bond_strength{'//'} = NOMINAL;
3762 $left_bond_strength{'//'} = $right_bond_strength{'='};
3764 # set strength of && a little higher than ||
3765 $right_bond_strength{'&&'} = NOMINAL;
3766 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
3768 $left_bond_strength{';'} = VERY_STRONG;
3769 $right_bond_strength{';'} = VERY_WEAK;
3770 $left_bond_strength{'f'} = VERY_STRONG;
3772 # make right strength of for ';' a little less than '='
3773 # to make for contents break after the ';' to avoid this:
3774 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
3775 # $number_of_fields )
3776 # and make it weaker than ',' and 'and' too
3777 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
3779 # The strengths of ?/: should be somewhere between
3780 # an '=' and a quote (NOMINAL),
3781 # make strength of ':' slightly less than '?' to help
3782 # break long chains of ? : after the colons
3783 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
3784 $right_bond_strength{':'} = NO_BREAK;
3785 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
3786 $right_bond_strength{'?'} = NO_BREAK;
3788 $left_bond_strength{','} = VERY_STRONG;
3789 $right_bond_strength{','} = VERY_WEAK;
3791 # remaining digraphs and trigraphs not defined above
3792 @q = qw( :: <> ++ --);
3793 @left_bond_strength{@q} = (WEAK) x scalar(@q);
3794 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3796 # Set bond strengths of certain keywords
3797 # make 'or', 'err', 'and' slightly weaker than a ','
3798 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
3799 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
3800 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
3801 $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
3802 $right_bond_strength{'and'} = NOMINAL;
3803 $right_bond_strength{'or'} = NOMINAL;
3804 $right_bond_strength{'err'} = NOMINAL;
3805 $right_bond_strength{'xor'} = NOMINAL;
3807 #---------------------------------------------------------------
3808 # Bond Strength BEGIN Section 2.
3809 # Set binary rules for bond strengths between certain token types.
3810 #---------------------------------------------------------------
3812 # We have a little problem making tables which apply to the
3813 # container tokens. Here is a list of container tokens and
3816 # type tokens // meaning
3817 # { {, [, ( // indent
3818 # } }, ], ) // outdent
3819 # [ [ // left non-structural [ (enclosing an array index)
3820 # ] ] // right non-structural square bracket
3821 # ( ( // left non-structural paren
3822 # ) ) // right non-structural paren
3823 # L { // left non-structural curly brace (enclosing a key)
3824 # R } // right non-structural curly brace
3826 # Some rules apply to token types and some to just the token
3827 # itself. We solve the problem by combining type and token into a
3828 # new hash key for the container types.
3830 # If a rule applies to a token 'type' then we need to make rules
3831 # for each of these 'type.token' combinations:
3842 # If a rule applies to a token then we need to make rules for
3843 # these 'type.token' combinations:
3852 # allow long lines before final { in an if statement, as in:
3857 # Otherwise, the line before the { tends to be too short.
3859 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
3860 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
3862 # break on something like '} (', but keep this stronger than a ','
3863 # example is in 'howe.pl'
3864 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3865 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3867 # keep matrix and hash indices together
3868 # but make them a little below STRONG to allow breaking open
3869 # something like {'some-word'}{'some-very-long-word'} at the }{
3871 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3872 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3873 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3874 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3876 # increase strength to the point where a break in the following
3877 # will be after the opening paren rather than at the arrow:
3879 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
3881 # Note that the following alternative strength would make the break at the
3882 # '->' rather than opening the '('. Both have advantages and disadvantages.
3883 # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
3885 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3886 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3887 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3888 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3889 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3890 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3892 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3893 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3894 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3895 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3897 #---------------------------------------------------------------
3898 # Binary NO_BREAK rules
3899 #---------------------------------------------------------------
3901 # use strict requires that bare word and => not be separated
3902 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
3903 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
3905 # Never break between a bareword and a following paren because
3906 # perl may give an error. For example, if a break is placed
3907 # between 'to_filehandle' and its '(' the following line will
3908 # give a syntax error [Carp.pm]: my( $no) =fileno(
3909 # to_filehandle( $in)) ;
3910 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
3911 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
3912 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
3913 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
3915 # use strict requires that bare word within braces not start new
3917 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
3919 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
3921 # The following two rules prevent a syntax error caused by breaking up
3922 # a construction like '{-y}'. The '-' quotes the 'y' and prevents
3923 # it from being taken as a transliteration. We have to keep
3924 # token types 'L m w' together to prevent this error.
3925 $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
3926 $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
3928 # keep 'bareword-' together, but only if there is no space between
3929 # the word and dash. Do not keep together if there is a space.
3930 # example 'use perl6-alpha'
3931 $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
3933 # use strict requires that bare word and => not be separated
3934 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
3936 # use strict does not allow separating type info from trailing { }
3937 # testfile is readmail.pl
3938 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
3939 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
3941 # As a defensive measure, do not break between a '(' and a
3942 # filehandle. In some cases, this can cause an error. For
3943 # example, the following program works:
3950 # But this program fails:
3958 # This is normally only a problem with the 'extrude' option
3959 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
3960 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
3962 # never break between sub name and opening paren
3963 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
3964 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
3966 # keep '}' together with ';'
3967 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
3969 # Breaking before a ++ can cause perl to guess wrong. For
3970 # example the following line will cause a syntax error
3971 # with -extrude if we break between '$i' and '++' [fixstyle2]
3972 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
3973 $nobreak_lhs{'++'} = NO_BREAK;
3975 # Do not break before a possible file handle
3976 $nobreak_lhs{'Z'} = NO_BREAK;
3978 # use strict hates bare words on any new line. For
3979 # example, a break before the underscore here provokes the
3980 # wrath of use strict:
3981 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
3982 $nobreak_rhs{'F'} = NO_BREAK;
3983 $nobreak_rhs{'CORE::'} = NO_BREAK;
3985 # To prevent the tokenizer from switching between types 'w' and 'G' we
3986 # need to avoid breaking between type 'G' and the following code block
3987 # brace. Fixes case b929.
3988 $nobreak_rhs{G} = NO_BREAK;
3990 #---------------------------------------------------------------
3991 # Bond Strength BEGIN Section 3.
3992 # Define tables and values for applying a small bias to the above
3994 #---------------------------------------------------------------
3995 # Adding a small 'bias' to strengths is a simple way to make a line
3996 # break at the first of a sequence of identical terms. For
3997 # example, to force long string of conditional operators to break
3998 # with each line ending in a ':', we can add a small number to the
3999 # bond strength of each ':' (colon.t)
4000 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
4001 %bias_hash = map { $_ => 0 } @bias_tokens;
4002 $delta_bias = 0.0001; # a very small strength level
4005 } ## end sub initialize_bond_strength_hashes
4007 use constant DEBUG_BOND => 0;
4009 sub set_bond_strengths {
4013 my $rbond_strength_to_go = [];
4015 my $rLL = $self->[_rLL_];
4016 my $rK_weld_right = $self->[_rK_weld_right_];
4017 my $rK_weld_left = $self->[_rK_weld_left_];
4018 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
4020 # patch-its always ok to break at end of line
4021 $nobreak_to_go[$max_index_to_go] = 0;
4023 # we start a new set of bias values for each line
4026 my $code_bias = -.01; # bias for closing block braces
4030 my $token_length = 1;
4032 my $last_nonblank_type = $type;
4033 my $last_nonblank_token = $token;
4034 my $list_str = $left_bond_strength{'?'};
4036 my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4038 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4039 $next_nonblank_type, $next_token, $next_type,
4040 $total_nesting_depth, );
4042 # main loop to compute bond strengths between each pair of tokens
4043 foreach my $i ( 0 .. $max_index_to_go ) {
4045 if ( $type ne 'b' ) {
4046 $last_nonblank_type = $type;
4047 $last_nonblank_token = $token;
4049 $type = $types_to_go[$i];
4051 # strength on both sides of a blank is the same
4052 if ( $type eq 'b' && $last_type ne 'b' ) {
4053 $rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
4054 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4058 $token = $tokens_to_go[$i];
4059 $token_length = $token_lengths_to_go[$i];
4060 $block_type = $block_type_to_go[$i];
4062 $next_type = $types_to_go[$i_next];
4063 $next_token = $tokens_to_go[$i_next];
4064 $total_nesting_depth = $nesting_depth_to_go[$i_next];
4065 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4066 $next_nonblank_type = $types_to_go[$i_next_nonblank];
4067 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4069 my $seqno = $type_sequence_to_go[$i];
4070 my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4072 # We are computing the strength of the bond between the current
4073 # token and the NEXT token.
4075 #---------------------------------------------------------------
4076 # Bond Strength Section 1:
4077 # First Approximation.
4078 # Use minimum of individual left and right tabulated bond
4080 #---------------------------------------------------------------
4081 my $bsr = $right_bond_strength{$type};
4082 my $bsl = $left_bond_strength{$next_nonblank_type};
4084 # define right bond strengths of certain keywords
4085 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
4086 $bsr = $right_bond_strength{$token};
4088 elsif ( $token eq 'ne' or $token eq 'eq' ) {
4092 # set terminal bond strength to the nominal value
4093 # this will cause good preceding breaks to be retained
4094 if ( $i_next_nonblank > $max_index_to_go ) {
4097 # But weaken the bond at a 'missing terminal comma'. If an
4098 # optional comma is missing at the end of a broken list, use
4099 # the strength of a comma anyway to make formatting the same as
4100 # if it were there. Fixes issue c133.
4101 if ( !defined($bsr) || $bsr > VERY_WEAK ) {
4102 my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
4103 if ( $ris_list_by_seqno->{$seqno_px} ) {
4104 my $KK = $K_to_go[$max_index_to_go];
4105 my $Kn = $self->K_next_nonblank($KK);
4106 my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
4107 if ( $seqno_n && $seqno_n eq $seqno_px ) {
4114 # define right bond strengths of certain keywords
4115 if ( $next_nonblank_type eq 'k'
4116 && defined( $left_bond_strength{$next_nonblank_token} ) )
4118 $bsl = $left_bond_strength{$next_nonblank_token};
4120 elsif ($next_nonblank_token eq 'ne'
4121 or $next_nonblank_token eq 'eq' )
4125 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
4126 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
4129 # Use the minimum of the left and right strengths. Note: it might
4130 # seem that we would want to keep a NO_BREAK if either token has
4131 # this value. This didn't work, for example because in an arrow
4132 # list, it prevents the comma from separating from the following
4133 # bare word (which is probably quoted by its arrow). So necessary
4134 # NO_BREAK's have to be handled as special cases in the final
4136 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
4137 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
4138 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4139 $bond_str_1 = $bond_str if (DEBUG_BOND);
4141 #---------------------------------------------------------------
4142 # Bond Strength Section 2:
4143 # Apply hardwired rules..
4144 #---------------------------------------------------------------
4146 # Patch to put terminal or clauses on a new line: Weaken the bond
4147 # at an || followed by die or similar keyword to make the terminal
4148 # or clause fall on a new line, like this:
4151 # || die "Cannot add broadcast: No class identifier found";
4153 # Otherwise the break will be at the previous '=' since the || and
4154 # = have the same starting strength and the or is biased, like
4158 # shift || die "Cannot add broadcast: No class identifier found";
4160 # In any case if the user places a break at either the = or the ||
4161 # it should remain there.
4162 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
4164 # /^(die|confess|croak|warn)$/
4165 if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
4166 if ( $want_break_before{$token} && $i > 0 ) {
4167 $rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
4169 # keep bond strength of a token and its following blank
4171 if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4172 $rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
4176 $bond_str -= $delta_bias;
4181 # good to break after end of code blocks
4182 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
4184 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4185 $code_bias += $delta_bias;
4188 if ( $type eq 'k' ) {
4190 # allow certain control keywords to stand out
4191 if ( $next_nonblank_type eq 'k'
4192 && $is_last_next_redo_return{$token} )
4194 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4197 # Don't break after keyword my. This is a quick fix for a
4198 # rare problem with perl. An example is this line from file
4201 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4202 # $this->{'question'} ) )
4204 if ( $token eq 'my' ) {
4205 $bond_str = NO_BREAK;
4210 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
4212 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4213 $bond_str = $list_str if ( $bond_str > $list_str );
4216 # keywords like 'unless', 'if', etc, within statements
4218 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4219 $bond_str = VERY_WEAK / 1.05;
4223 # try not to break before a comma-arrow
4224 elsif ( $next_nonblank_type eq '=>' ) {
4225 if ( $bond_str < STRONG ) { $bond_str = STRONG }
4228 #---------------------------------------------------------------
4229 # Additional hardwired NOBREAK rules
4230 #---------------------------------------------------------------
4232 # map1.t -- correct for a quirk in perl
4234 && $next_nonblank_type eq 'i'
4235 && $last_nonblank_type eq 'k'
4236 && $is_sort_map_grep{$last_nonblank_token} )
4238 # /^(sort|map|grep)$/ )
4240 $bond_str = NO_BREAK;
4243 # extrude.t: do not break before paren at:
4245 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4246 $bond_str = NO_BREAK;
4249 # OLD COMMENT: In older version of perl, use strict can cause
4250 # problems with breaks before bare words following opening parens.
4251 # For example, this will fail under older versions if a break is
4252 # made between '(' and 'MAIL':
4254 # use strict; open( MAIL, "a long filename or command"); close MAIL;
4256 # NEW COMMENT: Third fix for b1213:
4257 # This option does not seem to be needed any longer, and it can
4258 # cause instabilities. It can be turned off, but to minimize
4259 # changes to existing formatting it is retained only in the case
4260 # where the previous token was 'open' and there was no line break.
4261 # Even this could eventually be removed if it causes instability.
4262 if ( $type eq '{' ) {
4265 && $next_nonblank_type eq 'w'
4266 && $last_nonblank_type eq 'k'
4267 && $last_nonblank_token eq 'open'
4268 && !$old_breakpoint_to_go[$i] )
4270 $bond_str = NO_BREAK;
4274 # Do not break between a possible filehandle and a ? or / and do
4275 # not introduce a break after it if there is no blank
4277 elsif ( $type eq 'Z' ) {
4282 # if there is no blank and we do not want one. Examples:
4283 # print $x++ # do not break after $x
4284 # print HTML"HELLO" # break ok after HTML
4287 && defined( $want_left_space{$next_type} )
4288 && $want_left_space{$next_type} == WS_NO
4291 # or we might be followed by the start of a quote,
4292 # and this is not an existing breakpoint; fixes c039.
4293 || !$old_breakpoint_to_go[$i]
4294 && substr( $next_nonblank_token, 0, 1 ) eq '/'
4298 $bond_str = NO_BREAK;
4302 # Breaking before a ? before a quote can cause trouble if
4303 # they are not separated by a blank.
4304 # Example: a syntax error occurs if you break before the ? here
4305 # my$logic=join$all?' && ':' || ',@regexps;
4306 # From: Professional_Perl_Programming_Code/multifind.pl
4307 if ( $next_nonblank_type eq '?' ) {
4308 $bond_str = NO_BREAK
4309 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4312 # Breaking before a . followed by a number
4313 # can cause trouble if there is no intervening space
4314 # Example: a syntax error occurs if you break before the .2 here
4315 # $str .= pack($endian.2, ensurrogate($ord));
4316 # From: perl58/Unicode.pm
4317 elsif ( $next_nonblank_type eq '.' ) {
4318 $bond_str = NO_BREAK
4319 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4323 elsif ( $type eq 'w' ) {
4324 $bond_str = NO_BREAK
4325 if ( !$old_breakpoint_to_go[$i]
4326 && substr( $next_nonblank_token, 0, 1 ) eq '/' );
4329 $bond_str_2 = $bond_str if (DEBUG_BOND);
4331 #---------------------------------------------------------------
4332 # End of hardwired rules
4333 #---------------------------------------------------------------
4335 #---------------------------------------------------------------
4336 # Bond Strength Section 3:
4337 # Apply table rules. These have priority over the above
4339 #---------------------------------------------------------------
4341 my $tabulated_bond_str;
4343 my $rtype = $next_nonblank_type;
4344 if ( $seqno && $is_container_token{$token} ) {
4345 $ltype = $type . $token;
4348 if ( $next_nonblank_seqno
4349 && $is_container_token{$next_nonblank_token} )
4351 $rtype = $next_nonblank_type . $next_nonblank_token;
4353 # Alternate Fix #1 for issue b1299. This version makes the
4354 # decision as soon as possible. See Alternate Fix #2 also.
4355 # Do not separate a bareword identifier from its paren: b1299
4356 # This is currently needed for stability because if the bareword
4357 # gets separated from a preceding '->' and following '(' then
4358 # the tokenizer may switch from type 'i' to type 'w'. This
4359 # patch will prevent this by keeping it adjacent to its '('.
4360 ## if ( $next_nonblank_token eq '('
4362 ## && substr( $token, 0, 1 ) =~ /^\w$/ )
4368 # apply binary rules which apply regardless of space between tokens
4369 if ( $binary_bond_strength{$ltype}{$rtype} ) {
4370 $bond_str = $binary_bond_strength{$ltype}{$rtype};
4371 $tabulated_bond_str = $bond_str;
4374 # apply binary rules which apply only if no space between tokens
4375 if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4376 $bond_str = $binary_bond_strength{$ltype}{$next_type};
4377 $tabulated_bond_str = $bond_str;
4380 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4381 $bond_str = NO_BREAK;
4382 $tabulated_bond_str = $bond_str;
4385 $bond_str_3 = $bond_str if (DEBUG_BOND);
4387 # If the hardwired rules conflict with the tabulated bond
4388 # strength then there is an inconsistency that should be fixed
4390 && $tabulated_bond_str
4392 && $bond_str_1 != $bond_str_2
4393 && $bond_str_2 != $tabulated_bond_str
4396 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4399 #-----------------------------------------------------------------
4400 # Bond Strength Section 4:
4401 # Modify strengths of certain tokens which often occur in sequence
4402 # by adding a small bias to each one in turn so that the breaks
4403 # occur from left to right.
4405 # Note that we only changing strengths by small amounts here,
4406 # and usually increasing, so we should not be altering any NO_BREAKs.
4407 # Other routines which check for NO_BREAKs will use a tolerance
4408 # of one to avoid any problem.
4409 #-----------------------------------------------------------------
4411 # The bias tables use special keys:
4412 # $type - if not keyword
4413 # $token - if keyword, but map some keywords together
4415 $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4417 $next_nonblank_type eq 'k'
4418 ? $next_nonblank_token eq 'err'
4420 : $next_nonblank_token
4421 : $next_nonblank_type;
4424 if ( defined( $bias{$left_key} ) ) {
4425 if ( !$want_break_before{$left_key} ) {
4426 $bias{$left_key} += $delta_bias;
4427 $bond_str += $bias{$left_key};
4432 if ( defined( $bias{$right_key} ) ) {
4433 if ( $want_break_before{$right_key} ) {
4435 # for leading '.' align all but 'short' quotes; the idea
4436 # is to not place something like "\n" on a single line.
4437 if ( $right_key eq '.' ) {
4439 $last_nonblank_type eq '.'
4440 && ( $token_length <=
4441 $rOpts_short_concatenation_item_length )
4442 && ( !$is_closing_token{$token} )
4445 $bias{$right_key} += $delta_bias;
4449 $bias{$right_key} += $delta_bias;
4451 $bond_str += $bias{$right_key};
4455 $bond_str_4 = $bond_str if (DEBUG_BOND);
4457 #---------------------------------------------------------------
4458 # Bond Strength Section 5:
4459 # Fifth Approximation.
4460 # Take nesting depth into account by adding the nesting depth
4461 # to the bond strength.
4462 #---------------------------------------------------------------
4465 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
4466 if ( $total_nesting_depth > 0 ) {
4467 $strength = $bond_str + $total_nesting_depth;
4470 $strength = $bond_str;
4474 $strength = NO_BREAK;
4476 # For critical code such as lines with here targets we must
4477 # be absolutely sure that we do not allow a break. So for
4478 # these the nobreak flag exceeds 1 as a signal. Otherwise we
4479 # can run into trouble when small tolerances are added.
4480 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
4483 #---------------------------------------------------------------
4484 # Bond Strength Section 6:
4485 # Sixth Approximation. Welds.
4486 #---------------------------------------------------------------
4488 # Do not allow a break within welds
4489 if ( $total_weld_count && $seqno ) {
4490 my $KK = $K_to_go[$i];
4491 if ( $rK_weld_right->{$KK} ) {
4492 $strength = NO_BREAK;
4495 # But encourage breaking after opening welded tokens
4496 elsif ($rK_weld_left->{$KK}
4497 && $is_opening_token{$token} )
4503 # always break after side comment
4504 if ( $type eq '#' ) { $strength = 0 }
4506 $rbond_strength_to_go->[$i] = $strength;
4508 # Fix for case c001: be sure NO_BREAK's are enforced by later
4509 # routines, except at a '?' because '?' as quote delimiter is
4511 if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
4512 $nobreak_to_go[$i] ||= 1;
4516 my $str = substr( $token, 0, 15 );
4517 $str .= SPACE x ( 16 - length($str) );
4519 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
4521 # reset for next pass
4522 $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
4526 return $rbond_strength_to_go;
4527 } ## end sub set_bond_strengths
4528 } ## end closure set_bond_strengths
4532 # See if a pattern will compile. We have to use a string eval here,
4533 # but it should be safe because the pattern has been constructed
4536 eval "'##'=~/$pattern/";
4540 { ## begin closure prepare_cuddled_block_types
4544 # Add keywords here which really should not be cuddled
4546 my @q = qw(if unless for foreach while);
4547 @no_cuddle{@q} = (1) x scalar(@q);
4550 sub prepare_cuddled_block_types {
4552 # the cuddled-else style, if used, is controlled by a hash that
4555 # Include keywords here which should not be cuddled
4557 my $cuddled_string = EMPTY_STRING;
4558 if ( $rOpts->{'cuddled-else'} ) {
4561 $cuddled_string = 'elsif else continue catch finally'
4562 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
4564 # This is the old equivalent but more complex version
4565 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
4567 # Add users other blocks to be cuddled
4568 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
4569 if ($cuddled_block_list) {
4570 $cuddled_string .= SPACE . $cuddled_block_list;
4575 # If we have a cuddled string of the form
4576 # 'try-catch-finally'
4578 # we want to prepare a hash of the form
4580 # $rcuddled_block_types = {
4587 # use -dcbl to dump this hash
4589 # Multiple such strings are input as a space or comma separated list
4591 # If we get two lists with the same leading type, such as
4592 # -cbl = "-try-catch-finally -try-catch-otherwise"
4593 # then they will get merged as follows:
4594 # $rcuddled_block_types = {
4601 # This will allow either type of chain to be followed.
4603 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
4604 my @cuddled_strings = split /\s+/, $cuddled_string;
4606 $rcuddled_block_types = {};
4608 # process each dash-separated string...
4609 my $string_count = 0;
4610 foreach my $string (@cuddled_strings) {
4611 next unless $string;
4612 my @words = split /-+/, $string; # allow multiple dashes
4614 # we could look for and report possible errors here...
4615 next unless ( @words > 0 );
4617 # allow either '-continue' or *-continue' for arbitrary starting type
4620 # a single word without dashes is a secondary block type
4622 $start = shift @words;
4625 # always make an entry for the leading word. If none follow, this
4626 # will still prevent a wildcard from matching this word.
4627 if ( !defined( $rcuddled_block_types->{$start} ) ) {
4628 $rcuddled_block_types->{$start} = {};
4631 # The count gives the original word order in case we ever want it.
4634 foreach my $word (@words) {
4636 if ( $no_cuddle{$word} ) {
4638 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
4643 $rcuddled_block_types->{$start}->{$word} =
4644 1; #"$string_count.$word_count";
4646 # git#9: Remove this word from the list of desired one-line
4648 $want_one_line_block{$word} = 0;
4652 } ## end sub prepare_cuddled_block_types
4653 } ## end closure prepare_cuddled_block_types
4655 sub dump_cuddled_block_list {
4658 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
4659 # which controls this routine
4660 # my $rcuddled_block_types = {
4671 # SIMPLIFIED METHOD: the simplified method uses a wildcard for
4672 # the starting block type and puts all cuddled blocks together:
4673 # my $rcuddled_block_types = {
4682 # Both methods work, but the simplified method has proven to be adequate and
4685 my $cuddled_string = $rOpts->{'cuddled-block-list'};
4686 $cuddled_string = EMPTY_STRING unless $cuddled_string;
4688 my $flags = EMPTY_STRING;
4689 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
4690 $flags .= " -cbl='$cuddled_string'";
4692 unless ( $rOpts->{'cuddled-else'} ) {
4693 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
4697 ------------------------------------------------------------------------
4698 Hash of cuddled block types prepared for a run with these parameters:
4700 ------------------------------------------------------------------------
4704 $fh->print( Dumper($rcuddled_block_types) );
4707 ------------------------------------------------------------------------
4710 } ## end sub dump_cuddled_block_list
4712 sub make_static_block_comment_pattern {
4714 # create the pattern used to identify static block comments
4715 $static_block_comment_pattern = '^\s*##';
4717 # allow the user to change it
4718 if ( $rOpts->{'static-block-comment-prefix'} ) {
4719 my $prefix = $rOpts->{'static-block-comment-prefix'};
4720 $prefix =~ s/^\s*//;
4721 my $pattern = $prefix;
4723 # user may give leading caret to force matching left comments only
4724 if ( $prefix !~ /^\^#/ ) {
4725 if ( $prefix !~ /^#/ ) {
4727 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
4730 $pattern = '^\s*' . $prefix;
4732 if ( bad_pattern($pattern) ) {
4734 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
4737 $static_block_comment_pattern = $pattern;
4740 } ## end sub make_static_block_comment_pattern
4742 sub make_format_skipping_pattern {
4743 my ( $opt_name, $default ) = @_;
4744 my $param = $rOpts->{$opt_name};
4745 unless ($param) { $param = $default }
4747 if ( $param !~ /^#/ ) {
4748 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
4750 my $pattern = '^' . $param . '\s';
4751 if ( bad_pattern($pattern) ) {
4753 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
4757 } ## end sub make_format_skipping_pattern
4759 sub make_non_indenting_brace_pattern {
4761 # Create the pattern used to identify static side comments.
4762 # Note that we are ending the pattern in a \s. This will allow
4763 # the pattern to be followed by a space and some text, or a newline.
4764 # The pattern is used in sub 'non_indenting_braces'
4765 $non_indenting_brace_pattern = '^#<<<\s';
4767 # allow the user to change it
4768 if ( $rOpts->{'non-indenting-brace-prefix'} ) {
4769 my $prefix = $rOpts->{'non-indenting-brace-prefix'};
4770 $prefix =~ s/^\s*//;
4771 if ( $prefix !~ /^#/ ) {
4772 Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
4774 my $pattern = '^' . $prefix . '\s';
4775 if ( bad_pattern($pattern) ) {
4777 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
4780 $non_indenting_brace_pattern = $pattern;
4783 } ## end sub make_non_indenting_brace_pattern
4785 sub make_closing_side_comment_list_pattern {
4787 # turn any input list into a regex for recognizing selected block types
4788 $closing_side_comment_list_pattern = '^\w+';
4789 if ( defined( $rOpts->{'closing-side-comment-list'} )
4790 && $rOpts->{'closing-side-comment-list'} )
4792 $closing_side_comment_list_pattern =
4793 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
4796 } ## end sub make_closing_side_comment_list_pattern
4798 sub make_sub_matching_pattern {
4800 # Patterns for standardizing matches to block types for regular subs and
4801 # anonymous subs. Examples
4802 # 'sub process' is a named sub
4803 # 'sub ::m' is a named sub
4804 # 'sub' is an anonymous sub
4805 # 'sub:' is a label, not a sub
4806 # 'sub :' is a label, not a sub ( block type will be <sub:> )
4807 # sub'_ is a named sub ( block type will be <sub '_> )
4808 # 'substr' is a keyword
4809 # So note that named subs always have a space after 'sub'
4810 $SUB_PATTERN = '^sub\s'; # match normal sub
4811 $ASUB_PATTERN = '^sub$'; # match anonymous sub
4813 # Note (see also RT #133130): These patterns are used by
4814 # sub make_block_pattern, which is used for making most patterns.
4815 # So this sub needs to be called before other pattern-making routines.
4817 if ( $rOpts->{'sub-alias-list'} ) {
4819 # Note that any 'sub-alias-list' has been preprocessed to
4820 # be a trimmed, space-separated list which includes 'sub'
4821 # for example, it might be 'sub method fun'
4822 my $sub_alias_list = $rOpts->{'sub-alias-list'};
4823 $sub_alias_list =~ s/\s+/\|/g;
4824 $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4825 $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4828 } ## end sub make_sub_matching_pattern
4830 sub make_bl_pattern {
4832 # Set defaults lists to retain historical default behavior for -bl:
4833 my $bl_list_string = '*';
4834 my $bl_exclusion_list_string = 'sort map grep eval asub';
4836 if ( defined( $rOpts->{'brace-left-list'} )
4837 && $rOpts->{'brace-left-list'} )
4839 $bl_list_string = $rOpts->{'brace-left-list'};
4841 if ( $bl_list_string =~ /\bsub\b/ ) {
4842 $rOpts->{'opening-sub-brace-on-new-line'} ||=
4843 $rOpts->{'opening-brace-on-new-line'};
4845 if ( $bl_list_string =~ /\basub\b/ ) {
4846 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4847 $rOpts->{'opening-brace-on-new-line'};
4850 $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
4852 # for -bl, a list with '*' turns on -sbl and -asbl
4853 if ( $bl_pattern =~ /\.\*/ ) {
4854 $rOpts->{'opening-sub-brace-on-new-line'} ||=
4855 $rOpts->{'opening-brace-on-new-line'};
4856 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4857 $rOpts->{'opening-anonymous-brace-on-new-line'};
4860 if ( defined( $rOpts->{'brace-left-exclusion-list'} )
4861 && $rOpts->{'brace-left-exclusion-list'} )
4863 $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
4864 if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
4865 $rOpts->{'opening-sub-brace-on-new-line'} = 0;
4867 if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
4868 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
4872 $bl_exclusion_pattern =
4873 make_block_pattern( '-blxl', $bl_exclusion_list_string );
4875 } ## end sub make_bl_pattern
4877 sub make_bli_pattern {
4879 # default list of block types for which -bli would apply
4880 my $bli_list_string = 'if else elsif unless while for foreach do : sub';
4881 my $bli_exclusion_list_string = SPACE;
4883 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
4884 && $rOpts->{'brace-left-and-indent-list'} )
4886 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
4889 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
4891 if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
4892 && $rOpts->{'brace-left-and-indent-exclusion-list'} )
4894 $bli_exclusion_list_string =
4895 $rOpts->{'brace-left-and-indent-exclusion-list'};
4897 $bli_exclusion_pattern =
4898 make_block_pattern( '-blixl', $bli_exclusion_list_string );
4900 } ## end sub make_bli_pattern
4902 sub make_keyword_group_list_pattern {
4904 # turn any input list into a regex for recognizing selected block types.
4905 # Here are the defaults:
4906 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
4907 $keyword_group_list_comment_pattern = EMPTY_STRING;
4908 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
4909 && $rOpts->{'keyword-group-blanks-list'} )
4911 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
4914 foreach my $word (@words) {
4915 if ( $word eq 'BC' || $word eq 'SBC' ) {
4916 push @comment_list, $word;
4917 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
4920 push @keyword_list, $word;
4923 $keyword_group_list_pattern =
4924 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
4925 $keyword_group_list_comment_pattern =
4926 make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
4929 } ## end sub make_keyword_group_list_pattern
4931 sub make_block_brace_vertical_tightness_pattern {
4933 # turn any input list into a regex for recognizing selected block types
4934 $block_brace_vertical_tightness_pattern =
4935 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4936 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
4937 && $rOpts->{'block-brace-vertical-tightness-list'} )
4939 $block_brace_vertical_tightness_pattern =
4940 make_block_pattern( '-bbvtl',
4941 $rOpts->{'block-brace-vertical-tightness-list'} );
4944 } ## end sub make_block_brace_vertical_tightness_pattern
4946 sub make_blank_line_pattern {
4948 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
4949 my $key = 'blank-lines-before-closing-block-list';
4950 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4951 $blank_lines_before_closing_block_pattern =
4952 make_block_pattern( '-blbcl', $rOpts->{$key} );
4955 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
4956 $key = 'blank-lines-after-opening-block-list';
4957 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4958 $blank_lines_after_opening_block_pattern =
4959 make_block_pattern( '-blaol', $rOpts->{$key} );
4962 } ## end sub make_blank_line_pattern
4964 sub make_block_pattern {
4966 # given a string of block-type keywords, return a regex to match them
4967 # The only tricky part is that labels are indicated with a single ':'
4968 # and the 'sub' token text may have additional text after it (name of
4973 # input string: "if else elsif unless while for foreach do : sub";
4974 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4978 # To distinguish between anonymous subs and named subs, use 'sub' to
4979 # indicate a named sub, and 'asub' to indicate an anonymous sub
4981 my ( $abbrev, $string ) = @_;
4982 my @list = split_words($string);
4986 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
4989 if ( $i eq 'sub' ) {
4991 elsif ( $i eq 'asub' ) {
4993 elsif ( $i eq ';' ) {
4996 elsif ( $i eq '{' ) {
4999 elsif ( $i eq ':' ) {
5000 push @words, '\w+:';
5002 elsif ( $i =~ /^\w/ ) {
5006 Warn("unrecognized block type $i after $abbrev, ignoring\n");
5010 # Fix 2 for c091, prevent the pattern from matching an empty string
5011 # '1 ' is an impossible block name.
5012 if ( !@words ) { push @words, "1 " }
5014 my $pattern = '(' . join( '|', @words ) . ')$';
5015 my $sub_patterns = EMPTY_STRING;
5016 if ( $seen{'sub'} ) {
5017 $sub_patterns .= '|' . $SUB_PATTERN;
5019 if ( $seen{'asub'} ) {
5020 $sub_patterns .= '|' . $ASUB_PATTERN;
5022 if ($sub_patterns) {
5023 $pattern = '(' . $pattern . $sub_patterns . ')';
5025 $pattern = '^' . $pattern;
5027 } ## end sub make_block_pattern
5029 sub make_static_side_comment_pattern {
5031 # create the pattern used to identify static side comments
5032 $static_side_comment_pattern = '^##';
5034 # allow the user to change it
5035 if ( $rOpts->{'static-side-comment-prefix'} ) {
5036 my $prefix = $rOpts->{'static-side-comment-prefix'};
5037 $prefix =~ s/^\s*//;
5038 my $pattern = '^' . $prefix;
5039 if ( bad_pattern($pattern) ) {
5041 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5044 $static_side_comment_pattern = $pattern;
5047 } ## end sub make_static_side_comment_pattern
5049 sub make_closing_side_comment_prefix {
5051 # Be sure we have a valid closing side comment prefix
5052 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5053 my $csc_prefix_pattern;
5054 if ( !defined($csc_prefix) ) {
5055 $csc_prefix = '## end';
5056 $csc_prefix_pattern = '^##\s+end';
5059 my $test_csc_prefix = $csc_prefix;
5060 if ( $test_csc_prefix !~ /^#/ ) {
5061 $test_csc_prefix = '#' . $test_csc_prefix;
5064 # make a regex to recognize the prefix
5065 my $test_csc_prefix_pattern = $test_csc_prefix;
5067 # escape any special characters
5068 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5070 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5072 # allow exact number of intermediate spaces to vary
5073 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5075 # make sure we have a good pattern
5076 # if we fail this we probably have an error in escaping
5079 if ( bad_pattern($test_csc_prefix_pattern) ) {
5081 # shouldn't happen..must have screwed up escaping, above
5084 Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5088 # just warn and keep going with defaults
5090 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5092 Warn("Please consider using a simpler -cscp prefix\n");
5093 Warn("Using default -cscp instead; please check output\n");
5096 $csc_prefix = $test_csc_prefix;
5097 $csc_prefix_pattern = $test_csc_prefix_pattern;
5100 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5101 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5103 } ## end sub make_closing_side_comment_prefix
5105 ##################################################
5106 # CODE SECTION 4: receive lines from the tokenizer
5107 ##################################################
5109 { ## begin closure write_line
5113 # Variables used by sub check_sequence_numbers:
5115 my %saw_opening_seqno;
5116 my %saw_closing_seqno;
5119 sub initialize_write_line {
5121 $nesting_depth = undef;
5123 $last_seqno = SEQ_ROOT;
5124 %saw_opening_seqno = ();
5125 %saw_closing_seqno = ();
5128 } ## end sub initialize_write_line
5130 sub check_sequence_numbers {
5132 # Routine for checking sequence numbers. This only needs to be
5133 # done occasionally in DEVEL_MODE to be sure everything is working
5135 my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5136 my $jmax = @{$rtokens} - 1;
5137 return unless ( $jmax >= 0 );
5138 foreach my $j ( 0 .. $jmax ) {
5139 my $seqno = $rtype_sequence->[$j];
5140 my $token = $rtokens->[$j];
5141 my $type = $rtoken_type->[$j];
5142 $seqno = EMPTY_STRING unless ( defined($seqno) );
5144 "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5148 # Sequence numbers are generated for opening tokens, so every opening
5149 # token should be sequenced. Closing tokens will be unsequenced
5150 # if they do not have a matching opening token.
5151 if ( $is_opening_sequence_token{$token}
5157 $err_msg Unexpected opening token without sequence number
5164 # Save starting seqno to identify sequence method:
5165 # New method starts with 2 and has continuous numbering
5166 # Old method starts with >2 and may have gaps
5167 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
5169 if ( $is_opening_sequence_token{$token} ) {
5171 # New method should have continuous numbering
5172 if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5175 $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5179 $last_seqno = $seqno;
5181 # Numbers must be unique
5182 if ( $saw_opening_seqno{$seqno} ) {
5183 my $lno = $saw_opening_seqno{$seqno};
5186 $err_msg Already saw an opening tokens at line $lno with this sequence number
5190 $saw_opening_seqno{$seqno} = $input_line_no;
5193 # only one closing item per seqno
5194 elsif ( $is_closing_sequence_token{$token} ) {
5195 if ( $saw_closing_seqno{$seqno} ) {
5196 my $lno = $saw_closing_seqno{$seqno};
5199 $err_msg Already saw a closing token with this seqno at line $lno
5203 $saw_closing_seqno{$seqno} = $input_line_no;
5205 # Every closing seqno must have an opening seqno
5206 if ( !$saw_opening_seqno{$seqno} ) {
5209 $err_msg Saw a closing token but no opening token with this seqno
5215 # Sequenced items must be opening or closing
5219 $err_msg Unexpected token type with a sequence number
5226 } ## end sub check_sequence_numbers
5230 # This routine receives lines one-by-one from the tokenizer and stores
5231 # them in a format suitable for further processing. After the last
5232 # line has been sent, the tokenizer will call sub 'finish_formatting'
5233 # to do the actual formatting.
5235 my ( $self, $line_of_tokens_old ) = @_;
5236 my $rLL = $self->[_rLL_];
5237 my $Klimit = $self->[_Klimit_];
5238 my $rlines_new = $self->[_rlines_];
5240 my $K_opening_container = $self->[_K_opening_container_];
5241 my $K_closing_container = $self->[_K_closing_container_];
5242 my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5243 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5244 my $rSS = $self->[_rSS_];
5245 my $Iss_opening = $self->[_Iss_opening_];
5246 my $Iss_closing = $self->[_Iss_closing_];
5249 my $line_of_tokens = {};
5254 _guessed_indentation_level
5260 _square_bracket_depth
5265 $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
5268 # Data needed by Logger
5269 $line_of_tokens->{_level_0} = 0;
5270 $line_of_tokens->{_ci_level_0} = 0;
5271 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5272 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5274 # Needed to avoid trimming quotes
5275 $line_of_tokens->{_ended_in_blank_token} = undef;
5277 my $line_type = $line_of_tokens_old->{_line_type};
5278 my $line_number = $line_of_tokens_old->{_line_number};
5279 my $CODE_type = EMPTY_STRING;
5282 # Handle line of non-code
5283 if ( $line_type ne 'CODE' ) {
5284 $tee_output ||= $rOpts_tee_pod
5285 && substr( $line_type, 0, 3 ) eq 'POD';
5288 # Handle line of code
5291 my $rtokens = $line_of_tokens_old->{_rtokens};
5292 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
5293 my $rblock_type = $line_of_tokens_old->{_rblock_type};
5294 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
5295 my $rlevels = $line_of_tokens_old->{_rlevels};
5296 my $rci_levels = $line_of_tokens_old->{_rci_levels};
5298 my $jmax = @{$rtokens} - 1;
5300 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5303 && check_sequence_numbers( $rtokens, $rtoken_type,
5304 $rtype_sequence, $line_number );
5306 # Find the starting nesting depth ...
5307 # It must be the value of variable 'level' of the first token
5308 # because the nesting depth is used as a token tag in the
5309 # vertical aligner and is compared to actual levels.
5310 # So vertical alignment problems will occur with any other
5312 if ( !defined($nesting_depth) ) {
5313 $nesting_depth = $rlevels->[0];
5314 $nesting_depth = 0 if ( $nesting_depth < 0 );
5315 $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
5318 foreach my $j ( 0 .. $jmax ) {
5320 # Do not clip the 'level' variable yet. We will do this
5321 # later, in sub 'store_token_to_go'. The reason is that in
5322 # files with level errors, the logic in 'weld_cuddled_else'
5323 # uses a stack logic that will give bad welds if we clip
5325 ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
5327 # Handle tokens with sequence numbers ...
5328 my $seqno = $rtype_sequence->[$j];
5330 my $token = $rtokens->[$j];
5332 if ( $is_opening_token{$token} ) {
5333 $K_opening_container->{$seqno} = @{$rLL};
5334 $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
5337 # Save a sequenced block type at its opening token.
5338 # Note that unsequenced block types can occur in
5339 # unbalanced code with errors but are ignored here.
5340 if ( $rblock_type->[$j] ) {
5341 my $block_type = $rblock_type->[$j];
5342 $rblock_type_of_seqno->{$seqno} = $block_type;
5343 if ( substr( $block_type, 0, 3 ) eq 'sub'
5344 || $rOpts_sub_alias_list )
5346 if ( $block_type =~ /$ASUB_PATTERN/ ) {
5347 $self->[_ris_asub_block_]->{$seqno} = 1;
5349 elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5350 $self->[_ris_sub_block_]->{$seqno} = 1;
5355 elsif ( $is_closing_token{$token} ) {
5357 # The opening depth should always be defined, and
5358 # it should equal $nesting_depth-1. To protect
5359 # against unforseen error conditions, however, we
5360 # will check this and fix things if necessary. For
5361 # a test case see issue c055.
5363 $rdepth_of_opening_seqno->[$seqno];
5364 if ( !defined($opening_depth) ) {
5365 $opening_depth = $nesting_depth - 1;
5366 $opening_depth = 0 if ( $opening_depth < 0 );
5367 $rdepth_of_opening_seqno->[$seqno] =
5370 # This is not fatal but should not happen. The
5371 # tokenizer generates sequence numbers
5372 # incrementally upon encountering each new
5373 # opening token, so every positive sequence
5374 # number should correspond to an opening token.
5377 No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
5381 $K_closing_container->{$seqno} = @{$rLL};
5382 $nesting_depth = $opening_depth;
5385 elsif ( $token eq '?' ) {
5387 elsif ( $token eq ':' ) {
5391 # The only sequenced types output by the tokenizer are
5392 # the opening & closing containers and the ternary
5393 # types. So we would only get here if the tokenizer has
5394 # been changed to mark some other tokens with sequence
5395 # numbers, or if an error has been introduced in a
5396 # hash such as %is_opening_container
5400 Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
5401 Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
5407 $Iss_opening->[$seqno] = @{$rSS};
5409 # For efficiency, we find the maximum level of
5410 # opening tokens of any type. The actual maximum
5411 # level will be that of their contents which is 1
5412 # greater. That will be fixed in sub
5413 # 'finish_formatting'.
5414 my $level = $rlevels->[$j];
5415 if ( $level > $self->[_maximum_level_] ) {
5416 $self->[_maximum_level_] = $level;
5417 $self->[_maximum_level_at_line_] = $line_number;
5420 else { $Iss_closing->[$seqno] = @{$rSS} }
5421 push @{$rSS}, $sign * $seqno;
5425 $seqno = EMPTY_STRING unless ( defined($seqno) );
5430 _TOKEN_, _TYPE_, _TYPE_SEQUENCE_,
5431 _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
5434 $rtokens->[$j], $rtoken_type->[$j],
5435 $seqno, $rlevels->[$j],
5436 $rci_levels->[$j], $line_number - 1,
5438 push @{$rLL}, \@tokary;
5439 } ## end foreach my $j ( 0 .. $jmax )
5441 $Klimit = @{$rLL} - 1;
5443 # Need to remember if we can trim the input line
5444 $line_of_tokens->{_ended_in_blank_token} =
5445 $rtoken_type->[$jmax] eq 'b';
5447 $line_of_tokens->{_level_0} = $rlevels->[0];
5448 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
5449 $line_of_tokens->{_nesting_blocks_0} =
5450 $line_of_tokens_old->{_nesting_blocks_0};
5451 $line_of_tokens->{_nesting_tokens_0} =
5452 $line_of_tokens_old->{_nesting_tokens_0};
5454 } ## end if ( $jmax >= 0 )
5457 $rOpts_tee_block_comments
5459 && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5462 $rOpts_tee_side_comments
5464 && $Klimit > $Kfirst
5465 && $rLL->[$Klimit]->[_TYPE_] eq '#';
5467 } ## end if ( $line_type eq 'CODE')
5469 # Finish storing line variables
5471 my $fh_tee = $self->[_fh_tee_];
5472 my $line_text = $line_of_tokens_old->{_line_text};
5473 $fh_tee->print($line_text) if ($fh_tee);
5476 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
5477 $line_of_tokens->{_code_type} = $CODE_type;
5478 $self->[_Klimit_] = $Klimit;
5480 push @{$rlines_new}, $line_of_tokens;
5482 } ## end sub write_line
5483 } ## end closure write_line
5485 #############################################
5486 # CODE SECTION 5: Pre-process the entire file
5487 #############################################
5489 sub finish_formatting {
5491 my ( $self, $severe_error ) = @_;
5493 # The file has been tokenized and is ready to be formatted.
5494 # All of the relevant data is stored in $self, ready to go.
5496 # Check the maximum level. If it is extremely large we will give up and
5497 # output the file verbatim. Note that the actual maximum level is 1
5498 # greater than the saved value, so we fix that here.
5499 $self->[_maximum_level_] += 1;
5500 my $maximum_level = $self->[_maximum_level_];
5501 my $maximum_table_index = $#maximum_line_length_at_level;
5502 if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
5503 $severe_error ||= 1;
5505 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
5506 Something may be wrong; formatting will be skipped.
5510 # output file verbatim if severe error or no formatting requested
5511 if ( $severe_error || $rOpts->{notidy} ) {
5512 $self->dump_verbatim();
5517 # Update the 'save_logfile' flag based to include any tokenization errors.
5518 # We can save time by skipping logfile calls if it is not going to be saved.
5519 my $logger_object = $self->[_logger_object_];
5520 if ($logger_object) {
5521 $self->[_save_logfile_] = $logger_object->get_save_logfile();
5524 my $rix_side_comments = $self->set_CODE_type();
5526 $self->find_non_indenting_braces($rix_side_comments);
5528 # Handle any requested side comment deletions. It is easier to get
5529 # this done here rather than farther down the pipeline because IO
5530 # lines take a different route, and because lines with deleted HSC
5531 # become BL lines. We have already handled any tee requests in sub
5532 # getline, so it is safe to delete side comments now.
5533 $self->delete_side_comments($rix_side_comments)
5534 if ( $rOpts_delete_side_comments
5535 || $rOpts_delete_closing_side_comments );
5537 # Verify that the line hash does not have any unknown keys.
5538 $self->check_line_hashes() if (DEVEL_MODE);
5540 # Make a pass through all tokens, adding or deleting any whitespace as
5541 # required. Also make any other changes, such as adding semicolons.
5542 # All token changes must be made here so that the token data structure
5543 # remains fixed for the rest of this iteration.
5544 $self->respace_tokens();
5546 $self->set_excluded_lp_containers();
5548 $self->find_multiline_qw();
5550 $self->keep_old_line_breaks();
5552 # Implement any welding needed for the -wn or -cb options
5553 $self->weld_containers();
5555 $self->collapsed_lengths()
5556 if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
5558 # Locate small nested blocks which should not be broken
5559 $self->mark_short_nested_blocks();
5561 $self->adjust_indentation_levels();
5563 # Verify that the main token array looks OK. If this ever causes a fault
5564 # then place similar checks before the sub calls above to localize the
5566 $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
5568 # Finishes formatting and write the result to the line sink.
5569 # Eventually this call should just change the 'rlines' data according to the
5570 # new line breaks and then return so that we can do an internal iteration
5571 # before continuing with the next stages of formatting.
5572 $self->process_all_lines();
5574 # A final routine to tie up any loose ends
5577 } ## end sub finish_formatting
5582 # Examine each line of code and set a flag '$CODE_type' to describe it.
5583 # Also return a list of lines with side comments.
5585 my $rLL = $self->[_rLL_];
5586 my $Klimit = $self->[_Klimit_];
5587 my $rlines = $self->[_rlines_];
5588 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5590 my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
5591 my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
5592 my $rOpts_static_block_comment_prefix =
5593 $rOpts->{'static-block-comment-prefix'};
5595 # Remember indexes of lines with side comments
5596 my @ix_side_comments;
5598 my $In_format_skipping_section = 0;
5599 my $Saw_VERSION_in_this_file = 0;
5600 my $has_side_comment = 0;
5601 my ( $Kfirst, $Klast );
5604 # Loop to set CODE_type
5606 # Possible CODE_types
5607 # 'VB' = Verbatim - line goes out verbatim (a quote)
5608 # 'FS' = Format Skipping - line goes out verbatim
5610 # 'HSC' = Hanging Side Comment - fix this hanging side comment
5611 # 'SBCX'= Static Block Comment Without Leading Space
5612 # 'SBC' = Static Block Comment
5613 # 'BC' = Block Comment - an ordinary full line comment
5614 # 'IO' = Indent Only - line goes out unchanged except for indentation
5615 # 'NIN' = No Internal Newlines - line does not get broken
5616 # 'VER' = VERSION statement
5617 # '' = ordinary line of code with no restrictions
5620 foreach my $line_of_tokens ( @{$rlines} ) {
5622 my $input_line_no = $line_of_tokens->{_line_number};
5623 my $line_type = $line_of_tokens->{_line_type};
5625 my $Last_line_had_side_comment = $has_side_comment;
5626 if ($has_side_comment) {
5627 push @ix_side_comments, $ix_line - 1;
5629 $has_side_comment = 0;
5631 next unless ( $line_type eq 'CODE' );
5633 my $Klast_prev = $Klast;
5635 my $rK_range = $line_of_tokens->{_rK_range};
5636 ( $Kfirst, $Klast ) = @{$rK_range};
5638 my $last_CODE_type = $CODE_type;
5639 $CODE_type = EMPTY_STRING;
5641 my $input_line = $line_of_tokens->{_line_text};
5642 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
5644 my $is_block_comment = 0;
5645 if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5646 if ( $jmax == 0 ) { $is_block_comment = 1; }
5647 else { $has_side_comment = 1 }
5650 # Write line verbatim if we are in a formatting skip section
5651 if ($In_format_skipping_section) {
5653 # Note: extra space appended to comment simplifies pattern matching
5657 # optional fast pre-check
5658 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
5659 || $rOpts_format_skipping_end )
5661 && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
5662 /$format_skipping_pattern_end/
5665 $In_format_skipping_section = 0;
5666 write_logfile_entry(
5667 "Line $input_line_no: Exiting format-skipping section\n");
5673 # Check for a continued quote..
5674 if ( $line_of_tokens->{_starting_in_quote} ) {
5676 # A line which is entirely a quote or pattern must go out
5677 # verbatim. Note: the \n is contained in $input_line.
5679 if ( ( $input_line =~ "\t" ) ) {
5680 my $input_line_number = $line_of_tokens->{_line_number};
5681 $self->note_embedded_tab($input_line_number);
5688 # See if we are entering a formatting skip section
5692 # optional fast pre-check
5693 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
5694 || $rOpts_format_skipping_begin )
5696 && $rOpts_format_skipping
5697 && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
5698 /$format_skipping_pattern_begin/
5701 $In_format_skipping_section = 1;
5702 write_logfile_entry(
5703 "Line $input_line_no: Entering format-skipping section\n");
5708 # ignore trailing blank tokens (they will get deleted later)
5709 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
5720 if ($is_block_comment) {
5722 # see if this is a static block comment (starts with ## by default)
5723 my $is_static_block_comment = 0;
5724 my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
5727 # optional fast pre-check
5729 substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
5730 || $rOpts_static_block_comment_prefix
5733 && $rOpts_static_block_comments
5734 && $input_line =~ /$static_block_comment_pattern/
5737 $is_static_block_comment = 1;
5740 # Check for comments which are line directives
5741 # Treat exactly as static block comments without leading space
5742 # reference: perlsyn, near end, section Plain Old Comments (Not!)
5743 # example: '# line 42 "new_filename.plx"'
5746 && $input_line =~ /^\# \s*
5748 (?:\s("?)([^"]+)\2)? \s*
5752 $is_static_block_comment = 1;
5755 # look for hanging side comment ...
5757 $Last_line_had_side_comment # last line had side comment
5758 && !$no_leading_space # there is some leading space
5760 $is_static_block_comment # do not make static comment hanging
5764 # continuing an existing HSC chain?
5765 if ( $last_CODE_type eq 'HSC' ) {
5766 $has_side_comment = 1;
5771 # starting a new HSC chain?
5774 $rOpts->{'hanging-side-comments'} # user is allowing
5775 # hanging side comments
5778 && ( defined($Klast_prev) && $Klast_prev > 1 )
5780 # and the previous side comment was not static (issue c070)
5782 $rOpts->{'static-side-comments'}
5783 && $rLL->[$Klast_prev]->[_TOKEN_] =~
5784 /$static_side_comment_pattern/
5790 # and it is not a closing side comment (issue c070).
5791 my $K_penult = $Klast_prev - 1;
5792 $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
5794 ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
5795 && $rLL->[$K_penult]->[_TYPE_] eq '}'
5796 && $rLL->[$Klast_prev]->[_TOKEN_] =~
5797 /$closing_side_comment_prefix_pattern/ );
5799 if ( !$follows_csc ) {
5800 $has_side_comment = 1;
5807 if ($is_static_block_comment) {
5808 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
5811 elsif ($Last_line_had_side_comment
5812 && !$rOpts_maximum_consecutive_blank_lines
5813 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
5815 # Emergency fix to keep a block comment from becoming a hanging
5816 # side comment. This fix is for the case that blank lines
5817 # cannot be inserted. There is related code in sub
5818 # 'process_line_of_CODE'
5819 $CODE_type = 'SBCX';
5828 # End of comments. Handle a line of normal code:
5830 if ($rOpts_indent_only) {
5835 if ( !$rOpts_add_newlines ) {
5840 # Patch needed for MakeMaker. Do not break a statement
5841 # in which $VERSION may be calculated. See MakeMaker.pm;
5842 # this is based on the coding in it.
5843 # The first line of a file that matches this will be eval'd:
5844 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
5846 # *VERSION = \'1.01';
5847 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
5848 # We will pass such a line straight through without breaking
5849 # it unless -npvl is used.
5851 # Patch for problem reported in RT #81866, where files
5852 # had been flattened into a single line and couldn't be
5853 # tidied without -npvl. There are two parts to this patch:
5854 # First, it is not done for a really long line (80 tokens for now).
5855 # Second, we will only allow up to one semicolon
5856 # before the VERSION. We need to allow at least one semicolon
5857 # for statements like this:
5858 # require Exporter; our $VERSION = $Exporter::VERSION;
5859 # where both statements must be on a single line for MakeMaker
5861 if ( !$Saw_VERSION_in_this_file
5864 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
5866 $Saw_VERSION_in_this_file = 1;
5867 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
5869 # This code type has lower priority than others
5875 $line_of_tokens->{_code_type} = $CODE_type;
5878 if ($has_side_comment) {
5879 push @ix_side_comments, $ix_line;
5882 return \@ix_side_comments;
5883 } ## end sub set_CODE_type
5885 sub find_non_indenting_braces {
5887 my ( $self, $rix_side_comments ) = @_;
5888 return unless ( $rOpts->{'non-indenting-braces'} );
5889 my $rLL = $self->[_rLL_];
5890 my $Klimit = $self->[_Klimit_];
5891 return unless ( defined($rLL) && @{$rLL} );
5892 my $rlines = $self->[_rlines_];
5893 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5894 my $rseqno_non_indenting_brace_by_ix =
5895 $self->[_rseqno_non_indenting_brace_by_ix_];
5897 foreach my $ix ( @{$rix_side_comments} ) {
5898 my $line_of_tokens = $rlines->[$ix];
5899 my $line_type = $line_of_tokens->{_line_type};
5900 if ( $line_type ne 'CODE' ) {
5905 my $CODE_type = $line_of_tokens->{_code_type};
5906 my $rK_range = $line_of_tokens->{_rK_range};
5907 my ( $Kfirst, $Klast ) = @{$rK_range};
5908 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5913 next unless ( $Klast > $Kfirst ); # maybe HSC
5914 my $token_sc = $rLL->[$Klast]->[_TOKEN_];
5915 my $K_m = $Klast - 1;
5916 my $type_m = $rLL->[$K_m]->[_TYPE_];
5917 if ( $type_m eq 'b' && $K_m > $Kfirst ) {
5919 $type_m = $rLL->[$K_m]->[_TYPE_];
5921 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
5923 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
5925 # The pattern ends in \s but we have removed the newline, so
5926 # we added it back for the match. That way we require an exact
5927 # match to the special string and also allow additional text.
5930 && $is_opening_type{$type_m}
5931 && $token_sc =~ /$non_indenting_brace_pattern/ )
5933 $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
5938 } ## end sub find_non_indenting_braces
5940 sub delete_side_comments {
5941 my ( $self, $rix_side_comments ) = @_;
5943 # Given a list of indexes of lines with side comments, handle any
5944 # requested side comment deletions.
5946 my $rLL = $self->[_rLL_];
5947 my $rlines = $self->[_rlines_];
5948 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5949 my $rseqno_non_indenting_brace_by_ix =
5950 $self->[_rseqno_non_indenting_brace_by_ix_];
5952 foreach my $ix ( @{$rix_side_comments} ) {
5953 my $line_of_tokens = $rlines->[$ix];
5954 my $line_type = $line_of_tokens->{_line_type};
5956 # This fault shouldn't happen because we only saved CODE lines with
5957 # side comments in the TASK 1 loop above.
5958 if ( $line_type ne 'CODE' ) {
5962 Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
5968 my $CODE_type = $line_of_tokens->{_code_type};
5969 my $rK_range = $line_of_tokens->{_rK_range};
5970 my ( $Kfirst, $Klast ) = @{$rK_range};
5972 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5976 Did not find side comment near line $lno while deleting side comments
5982 my $delete_side_comment =
5983 $rOpts_delete_side_comments
5984 && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
5986 || $CODE_type eq 'HSC'
5987 || $CODE_type eq 'IO'
5988 || $CODE_type eq 'NIN' );
5990 # Do not delete special control side comments
5991 if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
5992 $delete_side_comment = 0;
5996 $rOpts_delete_closing_side_comments
5997 && !$delete_side_comment
6000 || $CODE_type eq 'HSC'
6001 || $CODE_type eq 'IO'
6002 || $CODE_type eq 'NIN' )
6005 my $token = $rLL->[$Klast]->[_TOKEN_];
6006 my $K_m = $Klast - 1;
6007 my $type_m = $rLL->[$K_m]->[_TYPE_];
6008 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
6009 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
6011 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
6013 && $token =~ /$closing_side_comment_prefix_pattern/
6014 && $block_type_m =~ /$closing_side_comment_list_pattern/ )
6016 $delete_side_comment = 1;
6019 } ## end if ( $rOpts_delete_closing_side_comments...)
6021 if ($delete_side_comment) {
6023 # We are actually just changing the side comment to a blank.
6024 # This may produce multiple blanks in a row, but sub respace_tokens
6025 # will check for this and fix it.
6026 $rLL->[$Klast]->[_TYPE_] = 'b';
6027 $rLL->[$Klast]->[_TOKEN_] = SPACE;
6029 # The -io option outputs the line text, so we have to update
6030 # the line text so that the comment does not reappear.
6031 if ( $CODE_type eq 'IO' ) {
6032 my $line = EMPTY_STRING;
6033 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
6034 $line .= $rLL->[$KK]->[_TOKEN_];
6037 $line_of_tokens->{_line_text} = $line . "\n";
6040 # If we delete a hanging side comment the line becomes blank.
6041 if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
6045 } ## end sub delete_side_comments
6049 my $rlines = $self->[_rlines_];
6050 foreach my $line ( @{$rlines} ) {
6051 my $input_line = $line->{_line_text};
6052 $self->write_unindented_line($input_line);
6061 my %is_nonlist_keyword;
6062 my %is_nonlist_type;
6064 my %is_unexpected_equals;
6068 # added 'U' to fix cases b1125 b1126 b1127
6070 @{wU}{@q} = (1) x scalar(@q);
6072 @q = qw(w i q Q G C Z);
6073 @{wiq}{@q} = (1) x scalar(@q);
6076 @{is_wit}{@q} = (1) x scalar(@q);
6079 @{is_sigil}{@q} = (1) x scalar(@q);
6081 # Parens following these keywords will not be marked as lists. Note that
6082 # 'for' is not included and is handled separately, by including 'f' in the
6083 # hash %is_counted_type, since it may or may not be a c-style for loop.
6084 @q = qw( if elsif unless and or );
6085 @is_nonlist_keyword{@q} = (1) x scalar(@q);
6087 # Parens following these types will not be marked as lists
6089 @is_nonlist_type{@q} = (1) x scalar(@q);
6092 @is_s_y_m_slash{@q} = (1) x scalar(@q);
6095 @is_unexpected_equals{@q} = (1) x scalar(@q);
6099 sub respace_tokens {
6102 return if $rOpts->{'indent-only'};
6104 # This routine is called once per file to do as much formatting as possible
6105 # before new line breaks are set.
6107 # This routine makes all necessary and possible changes to the tokenization
6108 # after the initial tokenization of the file. This is a tedious routine,
6109 # but basically it consists of inserting and deleting whitespace between
6110 # nonblank tokens according to the selected parameters. In a few cases
6111 # non-space characters are added, deleted or modified.
6113 # The goal of this routine is to create a new token array which only needs
6114 # the definition of new line breaks and padding to complete formatting. In
6115 # a few cases we have to cheat a little to achieve this goal. In
6116 # particular, we may not know if a semicolon will be needed, because it
6117 # depends on how the line breaks go. To handle this, we include the
6118 # semicolon as a 'phantom' which can be displayed as normal or as an empty
6121 # Method: The old tokens are copied one-by-one, with changes, from the old
6122 # linear storage array $rLL to a new array $rLL_new.
6124 my $rLL = $self->[_rLL_];
6125 my $Klimit_old = $self->[_Klimit_];
6126 my $rlines = $self->[_rlines_];
6127 my $length_function = $self->[_length_function_];
6128 my $is_encoded_data = $self->[_is_encoded_data_];
6130 my $rLL_new = []; # This is the new array
6132 my $Ktoken_vars; # the old K value of $rtoken_vars
6133 my ( $Kfirst_old, $Klast_old ); # Range of old line
6134 my $Klast_old_code; # K of last token if side comment
6135 my $Kmax = @{$rLL} - 1;
6137 my $CODE_type = EMPTY_STRING;
6138 my $line_type = EMPTY_STRING;
6140 # Set the whitespace flags, which indicate the token spacing preference.
6141 my $rwhitespace_flags = $self->set_whitespace_flags();
6143 # we will be setting token lengths as we go
6144 my $cumulative_length = 0;
6147 my %K_old_opening_by_seqno = (); # Note: old K index
6149 my $depth_next_max = 0;
6151 # Note that $K_opening_container and $K_closing_container have values
6152 # defined in sub get_line() for the previous K indexes. They were needed
6153 # in case option 'indent-only' was set, and we didn't get here. We no longer
6154 # need those and will eliminate them now to avoid any possible mixing of
6155 # old and new values.
6156 my $K_opening_container = $self->[_K_opening_container_] = {};
6157 my $K_closing_container = $self->[_K_closing_container_] = {};
6159 my $K_closing_ternary = $self->[_K_closing_ternary_];
6160 my $K_opening_ternary = $self->[_K_opening_ternary_];
6161 my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_];
6162 my $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
6163 my $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
6164 my $rhas_broken_list = $self->[_rhas_broken_list_];
6165 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
6166 my $rhas_code_block = $self->[_rhas_code_block_];
6167 my $rhas_list = $self->[_rhas_list_];
6168 my $rhas_ternary = $self->[_rhas_ternary_];
6169 my $ris_assigned_structure = $self->[_ris_assigned_structure_];
6170 my $ris_broken_container = $self->[_ris_broken_container_];
6171 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
6172 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
6173 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
6174 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
6175 my $roverride_cab3 = $self->[_roverride_cab3_];
6176 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
6177 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
6178 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6180 my $last_nonblank_code_type = ';';
6181 my $last_nonblank_code_token = ';';
6182 my $last_nonblank_block_type = EMPTY_STRING;
6183 my $last_last_nonblank_code_type = ';';
6184 my $last_last_nonblank_code_token = ';';
6186 my %K_first_here_doc_by_seqno;
6188 my $set_permanently_broken = sub {
6190 while ( defined($seqno) ) {
6191 $ris_permanently_broken->{$seqno} = 1;
6192 $seqno = $rparent_of_seqno->{$seqno};
6196 my $store_token = sub {
6199 # This will be the index of this item in the new array
6200 my $KK_new = @{$rLL_new};
6202 #------------------------------------------------------------------
6203 # NOTE: called once per token so coding efficiency is critical here
6204 #------------------------------------------------------------------
6206 my $type = $item->[_TYPE_];
6207 my $is_blank = $type eq 'b';
6208 my $block_type = EMPTY_STRING;
6210 # Do not output consecutive blanks. This situation should have been
6211 # prevented earlier, but it is worth checking because later routines
6212 # make this assumption.
6213 if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
6217 # check for a sequenced item (i.e., container or ?/:)
6218 my $type_sequence = $item->[_TYPE_SEQUENCE_];
6219 my $token = $item->[_TOKEN_];
6220 if ($type_sequence) {
6222 if ( $is_opening_token{$token} ) {
6224 $K_opening_container->{$type_sequence} = $KK_new;
6225 $block_type = $rblock_type_of_seqno->{$type_sequence};
6227 # Fix for case b1100: Count a line ending in ', [' as having
6228 # a line-ending comma. Otherwise, these commas can be hidden
6229 # with something like --opening-square-bracket-right
6230 if ( $last_nonblank_code_type eq ','
6231 && $Ktoken_vars == $Klast_old_code
6232 && $Ktoken_vars > $Kfirst_old )
6234 $rlec_count_by_seqno->{$type_sequence}++;
6237 if ( $last_nonblank_code_type eq '='
6238 || $last_nonblank_code_type eq '=>' )
6240 $ris_assigned_structure->{$type_sequence} =
6241 $last_nonblank_code_type;
6244 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6245 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6246 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
6247 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
6248 $seqno_stack{$depth_next} = $type_sequence;
6249 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
6252 if ( $depth_next > $depth_next_max ) {
6253 $depth_next_max = $depth_next;
6256 elsif ( $is_closing_token{$token} ) {
6258 $K_closing_container->{$type_sequence} = $KK_new;
6259 $block_type = $rblock_type_of_seqno->{$type_sequence};
6261 # Do not include terminal commas in counts
6262 if ( $last_nonblank_code_type eq ','
6263 || $last_nonblank_code_type eq '=>' )
6265 my $seqno = $seqno_stack{ $depth_next - 1 };
6267 $rtype_count_by_seqno->{$seqno}
6268 ->{$last_nonblank_code_type}--;
6270 if ( $Ktoken_vars == $Kfirst_old
6271 && $last_nonblank_code_type eq ','
6272 && $rlec_count_by_seqno->{$seqno} )
6274 $rlec_count_by_seqno->{$seqno}--;
6279 # Update the stack...
6284 # For ternary, note parent but do not include as child
6285 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6286 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6287 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
6289 # These are not yet used but could be useful
6290 if ( $token eq '?' ) {
6291 $K_opening_ternary->{$type_sequence} = $KK_new;
6293 elsif ( $token eq ':' ) {
6294 $K_closing_ternary->{$type_sequence} = $KK_new;
6298 # We really shouldn't arrive here, just being cautious:
6299 # The only sequenced types output by the tokenizer are the
6300 # opening & closing containers and the ternary types. Each
6301 # of those was checked above. So we would only get here
6302 # if the tokenizer has been changed to mark some other
6303 # tokens with sequence numbers.
6306 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
6313 # Find the length of this token. Later it may be adjusted if phantom
6314 # or ignoring side comment lengths.
6317 ? $length_function->($token)
6321 my $is_comment = $type eq '#';
6324 # trim comments if necessary
6325 my $ord = ord( substr( $token, -1, 1 ) );
6328 && ( $ord < ORD_PRINTABLE_MIN
6329 || $ord > ORD_PRINTABLE_MAX )
6330 && $token =~ s/\s+$//
6333 $token_length = $length_function->($token);
6334 $item->[_TOKEN_] = $token;
6337 # Mark length of side comments as just 1 if sc lengths are ignored
6338 if ( $rOpts_ignore_side_comment_lengths
6339 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
6343 my $seqno = $seqno_stack{ $depth_next - 1 };
6344 if ( defined($seqno)
6345 && !$ris_permanently_broken->{$seqno} )
6347 $set_permanently_broken->($seqno);
6351 $item->[_TOKEN_LENGTH_] = $token_length;
6353 # and update the cumulative length
6354 $cumulative_length += $token_length;
6356 # Save the length sum to just AFTER this token
6357 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
6359 if ( !$is_blank && !$is_comment ) {
6361 # Remember the most recent two non-blank, non-comment tokens.
6362 # NOTE: the phantom semicolon code may change the output stack
6363 # without updating these values. Phantom semicolons are considered
6364 # the same as blanks for now, but future needs might change that.
6365 # See the related note in sub '$add_phantom_semicolon'.
6366 $last_last_nonblank_code_type = $last_nonblank_code_type;
6367 $last_last_nonblank_code_token = $last_nonblank_code_token;
6369 $last_nonblank_code_type = $type;
6370 $last_nonblank_code_token = $token;
6371 $last_nonblank_block_type = $block_type;
6373 # count selected types
6374 if ( $is_counted_type{$type} ) {
6375 my $seqno = $seqno_stack{ $depth_next - 1 };
6376 if ( defined($seqno) ) {
6377 $rtype_count_by_seqno->{$seqno}->{$type}++;
6379 # Count line-ending commas for -bbx
6380 if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
6381 $rlec_count_by_seqno->{$seqno}++;
6384 # Remember index of first here doc target
6385 if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
6386 $K_first_here_doc_by_seqno{$seqno} = $KK_new;
6392 # For reference, here is how to get the parent sequence number.
6393 # This is not used because it is slower than finding it on the fly
6394 # in sub parent_seqno_by_K:
6396 # my $seqno_parent =
6397 # $type_sequence && $is_opening_token{$token}
6398 # ? $seqno_stack{ $depth_next - 2 }
6399 # : $seqno_stack{ $depth_next - 1 };
6400 # my $KK = @{$rLL_new};
6401 # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
6403 # and finally, add this item to the new array
6404 push @{$rLL_new}, $item;
6408 my $store_token_and_space = sub {
6409 my ( $item, $want_space ) = @_;
6411 # store a token with preceding space if requested and needed
6413 # First store the space
6416 && $rLL_new->[-1]->[_TYPE_] ne 'b'
6417 && $rOpts_add_whitespace )
6419 my $rcopy = [ @{$item} ];
6420 $rcopy->[_TYPE_] = 'b';
6421 $rcopy->[_TOKEN_] = SPACE;
6422 $rcopy->[_TYPE_SEQUENCE_] = EMPTY_STRING;
6424 $rcopy->[_LINE_INDEX_] =
6425 $rLL_new->[-1]->[_LINE_INDEX_];
6427 # Patch 23-Jan-2021 to fix -lp blinkers:
6428 # The level and ci_level of newly created spaces should be the same
6429 # as the previous token. Otherwise the coding for the -lp option
6430 # can create a blinking state in some rare cases.
6432 $rLL_new->[-1]->[_LEVEL_];
6433 $rcopy->[_CI_LEVEL_] =
6434 $rLL_new->[-1]->[_CI_LEVEL_];
6436 $store_token->($rcopy);
6440 $store_token->($item);
6444 my $add_phantom_semicolon = sub {
6448 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6449 return unless ( defined($Kp) );
6451 # we are only adding semicolons for certain block types
6452 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
6453 return unless ($type_sequence);
6454 my $block_type = $rblock_type_of_seqno->{$type_sequence};
6455 return unless ($block_type);
6457 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
6458 || $block_type =~ /^(sub|package)/
6459 || $block_type =~ /^\w+\:$/ );
6461 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
6462 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
6463 my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6465 # Do not add a semicolon if...
6469 # it would follow a comment (and be isolated)
6472 # it follows a code block ( because they are not always wanted
6473 # there and may add clutter)
6474 || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
6476 # it would follow a label
6479 # it would be inside a 'format' statement (and cause syntax error)
6481 && $token_p =~ /format/ )
6485 # Do not add a semicolon if it would impede a weld with an immediately
6486 # following closing token...like this
6488 # ^--No semicolon can go here
6490 # look at the previous token... note use of the _NEW rLL array here,
6491 # but sequence numbers are invariant.
6492 my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6494 # If it is also a CLOSING token we have to look closer...
6497 && $is_closing_token{$token_p}
6499 # we only need to look if there is just one inner container..
6500 && defined( $rchildren_of_seqno->{$type_sequence} )
6501 && @{ $rchildren_of_seqno->{$type_sequence} } == 1
6505 # Go back and see if the corresponding two OPENING tokens are also
6506 # together. Note that we are using the OLD K indexing here:
6507 my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
6508 if ( defined($K_outer_opening) ) {
6509 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
6510 if ( defined($K_nxt) ) {
6511 my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
6513 # Is the next token after the outer opening the same as
6514 # our inner closing (i.e. same sequence number)?
6515 # If so, do not insert a semicolon here.
6516 return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
6521 # We will insert an empty semicolon here as a placeholder. Later, if
6522 # it becomes the last token on a line, we will bring it to life. The
6523 # advantage of doing this is that (1) we just have to check line
6524 # endings, and (2) the phantom semicolon has zero width and therefore
6525 # won't cause needless breaks of one-line blocks.
6527 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
6528 && $want_left_space{';'} == WS_NO )
6531 # convert the blank into a semicolon..
6532 # be careful: we are working on the new stack top
6533 # on a token which has been stored.
6534 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
6536 # Convert the existing blank to:
6537 # a phantom semicolon for one_line_block option = 0 or 1
6538 # a real semicolon for one_line_block option = 2
6539 my $tok = EMPTY_STRING;
6541 if ( $rOpts_one_line_block_semicolons == 2 ) {
6546 $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
6547 $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
6548 $rLL_new->[$Ktop]->[_TYPE_] = ';';
6550 # NOTE: we are changing the output stack without updating variables
6551 # $last_nonblank_code_type, etc. Future needs might require that
6552 # those variables be updated here. For now, it seems ok to skip
6555 # Save list of new K indexes of phantom semicolons.
6556 # This will be needed if we want to undo them for iterations in
6558 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6560 # Then store a new blank
6561 $store_token->($rcopy);
6565 # Patch for issue c078: keep line indexes in order. If the top
6566 # token is a space that we are keeping (due to '-wls=';') then
6567 # we have to check that old line indexes stay in order.
6569 # instances in which side comments have been deleted and converted
6570 # into blanks, we may have filtered down multiple blanks into just
6571 # one. In that case the top blank may have a higher line number
6572 # than the previous nonblank token. Although the line indexes of
6573 # blanks are not really significant, we need to keep them in order
6574 # in order to pass error checks.
6575 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
6576 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
6577 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
6578 if ( $new_top_ix < $old_top_ix ) {
6579 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
6584 copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
6585 $store_token->($rcopy);
6586 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6593 # Check that a quote looks okay
6594 # This sub works but needs to by sync'd with the log file output
6595 # before it can be used.
6596 my ( $KK, $Kfirst, $line_number ) = @_;
6597 my $token = $rLL->[$KK]->[_TOKEN_];
6598 $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
6600 # The remainder of this routine looks for something like
6601 # '$var = s/xxx/yyy/;'
6602 # in case it should have been '$var =~ s/xxx/yyy/;'
6604 # Start by looking for a token beginning with one of: s y m / tr
6606 unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
6607 || substr( $token, 0, 2 ) eq 'tr' );
6609 # ... and preceded by one of: = == !=
6610 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6611 return unless ( defined($Kp) );
6612 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
6613 return unless ( $is_unexpected_equals{$previous_nonblank_type} );
6614 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
6616 my $previous_nonblank_type_2 = 'b';
6617 my $previous_nonblank_token_2 = EMPTY_STRING;
6618 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
6619 if ( defined($Kpp) ) {
6620 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
6621 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
6624 my $next_nonblank_token = EMPTY_STRING;
6626 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
6627 if ( $Kn <= $Kmax ) {
6628 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
6631 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
6632 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
6635 ##$token =~ /^(s|tr|y|m|\/)/
6636 ##&& $previous_nonblank_token =~ /^(=|==|!=)$/
6639 # preceded by simple scalar
6640 && $previous_nonblank_type_2 eq 'i'
6641 && $previous_nonblank_token_2 =~ /^\$/
6643 # followed by some kind of termination
6644 # (but give complaint if we can not see far enough ahead)
6645 && $next_nonblank_token =~ /^[; \)\}]$/
6647 # scalar is not declared
6648 ## =~ /^(my|our|local)$/
6649 && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
6652 my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
6653 my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
6655 "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
6661 #-------------------------------------------
6662 # Main loop to respace all lines of the file
6663 #-------------------------------------------
6666 foreach my $line_of_tokens ( @{$rlines} ) {
6668 my $input_line_number = $line_of_tokens->{_line_number};
6669 my $last_line_type = $line_type;
6670 $line_type = $line_of_tokens->{_line_type};
6671 next unless ( $line_type eq 'CODE' );
6672 my $last_CODE_type = $CODE_type;
6673 $CODE_type = $line_of_tokens->{_code_type};
6674 my $rK_range = $line_of_tokens->{_rK_range};
6675 my ( $Kfirst, $Klast ) = @{$rK_range};
6676 next unless defined($Kfirst);
6677 ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
6678 $Klast_old_code = $Klast_old;
6680 # Be sure an old K value is defined for sub $store_token
6681 $Ktoken_vars = $Kfirst;
6683 # Check for correct sequence of token indexes...
6684 # An error here means that sub write_line() did not correctly
6685 # package the tokenized lines as it received them. If we
6686 # get a fault here it has not output a continuous sequence
6687 # of K values. Or a line of CODE may have been mis-marked as
6688 # something else. There is no good way to continue after such an
6690 # FIXME: Calling Fault will produce zero output; it would be best to
6691 # find a way to dump the input file.
6692 if ( defined($last_K_out) ) {
6693 if ( $Kfirst != $last_K_out + 1 ) {
6695 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
6701 # The first token should always have been given index 0 by sub
6703 if ( $Kfirst != 0 ) {
6704 Fault("Program Bug: first K is $Kfirst but should be 0");
6707 $last_K_out = $Klast;
6709 # Handle special lines of code
6710 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
6712 # CODE_types are as follows.
6714 # 'VB' = Verbatim - line goes out verbatim
6715 # 'FS' = Format Skipping - line goes out verbatim, no blanks
6716 # 'IO' = Indent Only - only indentation may be changed
6717 # 'NIN' = No Internal Newlines - line does not get broken
6718 # 'HSC'=Hanging Side Comment - fix this hanging side comment
6719 # 'BC'=Block Comment - an ordinary full line comment
6720 # 'SBC'=Static Block Comment - a block comment which does not get
6722 # 'SBCX'=Static Block Comment Without Leading Space
6723 # 'VER'=VERSION statement
6724 # '' or (undefined) - no restructions
6726 # For a hanging side comment we insert an empty quote before
6727 # the comment so that it becomes a normal side comment and
6728 # will be aligned by the vertical aligner
6729 if ( $CODE_type eq 'HSC' ) {
6731 # Safety Check: This must be a line with one token (a comment)
6732 my $rvars_Kfirst = $rLL->[$Kfirst];
6733 if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) {
6735 # Note that even if the flag 'noadd-whitespace' is set, we
6736 # will make an exception here and allow a blank to be
6737 # inserted to push the comment to the right. We can think
6738 # of this as an adjustment of indentation rather than
6739 # whitespace between tokens. This will also prevent the
6740 # hanging side comment from getting converted to a block
6741 # comment if whitespace gets deleted, as for example with
6742 # the -extrude and -mangle options.
6744 copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
6745 $store_token->($rcopy);
6746 $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
6747 $store_token->($rcopy);
6748 $store_token->($rvars_Kfirst);
6753 # This line was mis-marked by sub scan_comment. Catch in
6754 # DEVEL_MODE, otherwise try to repair and keep going.
6756 "Program bug. A hanging side comment has been mismarked"
6759 $CODE_type = EMPTY_STRING;
6760 $line_of_tokens->{_code_type} = $CODE_type;
6764 if ( $CODE_type eq 'BL' ) {
6765 my $seqno = $seqno_stack{ $depth_next - 1 };
6766 if ( defined($seqno)
6767 && !$ris_permanently_broken->{$seqno}
6768 && $rOpts_maximum_consecutive_blank_lines )
6770 $set_permanently_broken->($seqno);
6774 # Copy tokens unchanged
6775 foreach my $KK ( $Kfirst .. $Klast ) {
6777 $store_token->( $rLL->[$KK] );
6782 # Handle normal line..
6784 # Define index of last token before any side comment for comma counts
6785 my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
6786 if ( ( $type_end eq '#' || $type_end eq 'b' )
6787 && $Klast_old_code > $Kfirst_old )
6790 if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
6791 && $Klast_old_code > $Kfirst_old )
6797 # Insert any essential whitespace between lines
6798 # if last line was normal CODE.
6799 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
6800 # because comments may disappear.
6801 if ( $last_line_type eq 'CODE' ) {
6802 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
6803 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
6805 is_essential_whitespace(
6806 $last_last_nonblank_code_token,
6807 $last_last_nonblank_code_type,
6808 $last_nonblank_code_token,
6809 $last_nonblank_code_type,
6816 # Copy this first token as blank, but use previous line number
6817 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', SPACE );
6818 $rcopy->[_LINE_INDEX_] =
6819 $rLL_new->[-1]->[_LINE_INDEX_];
6821 # The level and ci_level of newly created spaces should be the
6822 # same as the previous token. Otherwise blinking states can
6823 # be created if the -lp mode is used. See similar coding in
6824 # sub 'store_token_and_space'. Fixes cases b1109 b1110.
6826 $rLL_new->[-1]->[_LEVEL_];
6827 $rcopy->[_CI_LEVEL_] =
6828 $rLL_new->[-1]->[_CI_LEVEL_];
6830 $store_token->($rcopy);
6834 #-------------------------------------------------------
6835 # Loop to copy all tokens on this line, with any changes
6836 #-------------------------------------------------------
6838 foreach my $KK ( $Kfirst .. $Klast ) {
6840 $rtoken_vars = $rLL->[$KK];
6841 my $token = $rtoken_vars->[_TOKEN_];
6842 my $type = $rtoken_vars->[_TYPE_];
6843 my $last_type_sequence = $type_sequence;
6844 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6846 # Handle a blank space ...
6847 if ( $type eq 'b' ) {
6849 # Delete it if not wanted by whitespace rules
6850 # or we are deleting all whitespace
6851 # Note that whitespace flag is a flag indicating whether a
6852 # white space BEFORE the token is needed
6853 next if ( $KK >= $Klast ); # skip terminal blank
6854 my $Knext = $KK + 1;
6856 if ($rOpts_freeze_whitespace) {
6857 $store_token->($rtoken_vars);
6861 my $ws = $rwhitespace_flags->[$Knext];
6863 || $rOpts_delete_old_whitespace )
6866 my $token_next = $rLL->[$Knext]->[_TOKEN_];
6867 my $type_next = $rLL->[$Knext]->[_TYPE_];
6869 my $do_not_delete = is_essential_whitespace(
6870 $last_last_nonblank_code_token,
6871 $last_last_nonblank_code_type,
6872 $last_nonblank_code_token,
6873 $last_nonblank_code_type,
6878 # Note that repeated blanks will get filtered out here
6879 next unless ($do_not_delete);
6882 # make it just one character
6883 $rtoken_vars->[_TOKEN_] = SPACE;
6884 $store_token->($rtoken_vars);
6888 # Handle a nonblank token...
6890 if ($type_sequence) {
6892 # Insert a tentative missing semicolon if the next token is
6893 # a closing block brace
6898 # not preceded by a ';'
6899 && $last_nonblank_code_type ne ';'
6901 # and this is not a VERSION stmt (is all one line, we
6902 # are not inserting semicolons on one-line blocks)
6903 && $CODE_type ne 'VER'
6905 # and we are allowed to add semicolons
6906 && $rOpts->{'add-semicolons'}
6909 $add_phantom_semicolon->($KK);
6913 # Modify certain tokens here for whitespace
6914 # The following is not yet done, but could be:
6916 # ( $type =~ /^[wit]$/ )
6917 elsif ( $is_wit{$type} ) {
6919 # change '$ var' to '$var' etc
6920 # change '@ ' to '@'
6921 # Examples: <<snippets/space1.in>>
6922 my $ord = ord( substr( $token, 1, 1 ) );
6925 # quick test for possible blank at second char
6926 $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
6927 || $ord > ORD_PRINTABLE_MAX )
6930 my ( $sigil, $word ) = split /\s+/, $token, 2;
6932 # $sigil =~ /^[\$\&\%\*\@]$/ )
6933 if ( $is_sigil{$sigil} ) {
6935 $token .= $word if ( defined($word) ); # fix c104
6936 $rtoken_vars->[_TOKEN_] = $token;
6940 # Split identifiers with leading arrows, inserting blanks
6941 # if necessary. It is easier and safer here than in the
6942 # tokenizer. For example '->new' becomes two tokens, '->'
6943 # and 'new' with a possible blank between.
6945 # Note: there is a related patch in sub set_whitespace_flags
6946 elsif (length($token) > 2
6947 && substr( $token, 0, 2 ) eq '->'
6948 && $token =~ /^\-\>(.*)$/
6952 my $token_save = $1;
6953 my $type_save = $type;
6955 # Change '-> new' to '->new'
6956 $token_save =~ s/^\s+//g;
6958 # store a blank to left of arrow if necessary
6959 my $Kprev = $self->K_previous_nonblank($KK);
6960 if ( defined($Kprev)
6961 && $rLL->[$Kprev]->[_TYPE_] ne 'b'
6962 && $rOpts_add_whitespace
6963 && $want_left_space{'->'} == WS_YES )
6966 copy_token_as_type( $rtoken_vars, 'b', SPACE );
6967 $store_token->($rcopy);
6970 # then store the arrow
6971 my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
6972 $store_token->($rcopy);
6974 # store a blank after the arrow if requested
6975 # added for issue git #33
6976 if ( $want_right_space{'->'} == WS_YES ) {
6978 copy_token_as_type( $rtoken_vars, 'b', SPACE );
6979 $store_token->($rcopy_b);
6982 # then reset the current token to be the remainder,
6983 # and reset the whitespace flag according to the arrow
6984 $token = $rtoken_vars->[_TOKEN_] = $token_save;
6985 $type = $rtoken_vars->[_TYPE_] = $type_save;
6986 $store_token->($rtoken_vars);
6990 # Trim certain spaces in identifiers
6991 if ( $type eq 'i' ) {
6995 substr( $token, 0, 3 ) eq 'sub'
6996 || $rOpts_sub_alias_list
6998 && $token =~ /$SUB_PATTERN/
7002 # -spp = 0 : no space before opening prototype paren
7003 # -spp = 1 : stable (follow input spacing)
7004 # -spp = 2 : always space before opening prototype paren
7005 my $spp = $rOpts->{'space-prototype-paren'};
7006 if ( defined($spp) ) {
7007 if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
7008 elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
7011 # one space max, and no tabs
7012 $token =~ s/\s+/ /g;
7013 $rtoken_vars->[_TOKEN_] = $token;
7016 # clean up spaces in package identifiers, like
7017 # "package Bob::Dog;"
7018 elsif ( substr( $token, 0, 7 ) eq 'package'
7019 && $token =~ /^package\s/ )
7021 $token =~ s/\s+/ /g;
7022 $rtoken_vars->[_TOKEN_] = $token;
7025 # trim identifiers of trailing blanks which can occur
7026 # under some unusual circumstances, such as if the
7027 # identifier 'witch' has trailing blanks on input here:
7031 # () # prototype may be on new line ...
7033 my $ord_ch = ord( substr( $token, -1, 1 ) );
7036 # quick check for possible ending space
7037 $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
7038 || $ord_ch > ORD_PRINTABLE_MAX )
7041 $token =~ s/\s+$//g;
7042 $rtoken_vars->[_TOKEN_] = $token;
7048 elsif ( $type eq ';' ) {
7050 # Remove unnecessary semicolons, but not after bare
7051 # blocks, where it could be unsafe if the brace is
7054 $rOpts->{'delete-semicolons'}
7057 $last_nonblank_block_type
7058 && $last_nonblank_code_type eq '}'
7060 $is_block_without_semicolon{
7061 $last_nonblank_block_type}
7062 || $last_nonblank_block_type =~ /$SUB_PATTERN/
7063 || $last_nonblank_block_type =~ /^\w+:$/
7066 || $last_nonblank_code_type eq ';'
7071 # This looks like a deletable semicolon, but even if a
7072 # semicolon can be deleted it is not necessarily best to do
7073 # so. We apply these additional rules for deletion:
7074 # - Always ok to delete a ';' at the end of a line
7075 # - Never delete a ';' before a '#' because it would
7076 # promote it to a block comment.
7077 # - If a semicolon is not at the end of line, then only
7078 # delete if it is followed by another semicolon or closing
7079 # token. This includes the comment rule. It may take
7080 # two passes to get to a final state, but it is a little
7081 # safer. For example, keep the first semicolon here:
7082 # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
7083 # It is not required but adds some clarity.
7084 my $ok_to_delete = 1;
7085 if ( $KK < $Klast ) {
7086 my $Kn = $self->K_next_nonblank($KK);
7087 if ( defined($Kn) && $Kn <= $Klast ) {
7088 my $next_nonblank_token_type =
7089 $rLL->[$Kn]->[_TYPE_];
7090 $ok_to_delete = $next_nonblank_token_type eq ';'
7091 || $next_nonblank_token_type eq '}';
7095 # do not delete only nonblank token in a file
7097 my $Kp = $self->K_previous_code( undef, $rLL_new );
7098 my $Kn = $self->K_next_nonblank($KK);
7099 $ok_to_delete = defined($Kn) || defined($Kp);
7102 if ($ok_to_delete) {
7103 $self->note_deleted_semicolon($input_line_number);
7107 write_logfile_entry("Extra ';'\n");
7112 # Old patch to add space to something like "x10".
7113 # Note: This is now done in the Tokenizer, but this code remains
7115 elsif ( $type eq 'n' ) {
7116 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
7118 $rtoken_vars->[_TOKEN_] = $token;
7121 Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
7127 # check for a qw quote
7128 elsif ( $type eq 'q' ) {
7130 # trim blanks from right of qw quotes
7131 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
7134 $rtoken_vars->[_TOKEN_] = $token;
7135 $self->note_embedded_tab($input_line_number)
7136 if ( $token =~ "\t" );
7137 $store_token_and_space->(
7138 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
7141 } ## end if ( $type eq 'q' )
7143 # change 'LABEL :' to 'LABEL:'
7144 elsif ( $type eq 'J' ) {
7146 $rtoken_vars->[_TOKEN_] = $token;
7149 # check a quote for problems
7150 elsif ( $type eq 'Q' ) {
7151 $check_Q->( $KK, $Kfirst, $input_line_number );
7154 # Store this token with possible previous blank
7155 if ( $rwhitespace_flags->[$KK] == WS_YES ) {
7156 $store_token_and_space->( $rtoken_vars, 1 );
7159 $store_token->($rtoken_vars);
7165 # Walk backwards through the tokens, making forward links to sequence items.
7166 if ( @{$rLL_new} ) {
7168 foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) {
7169 $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
7170 if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
7172 $self->[_K_first_seq_item_] = $KNEXT;
7175 # Find and remember lists by sequence number
7176 foreach my $seqno ( keys %{$K_opening_container} ) {
7177 my $K_opening = $K_opening_container->{$seqno};
7178 next unless defined($K_opening);
7180 # code errors may leave undefined closing tokens
7181 my $K_closing = $K_closing_container->{$seqno};
7182 next unless defined($K_closing);
7184 my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
7185 my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
7186 my $line_diff = $lx_close - $lx_open;
7187 $ris_broken_container->{$seqno} = $line_diff;
7189 # See if this is a list
7191 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7193 my $comma_count = $rtype_count->{','};
7194 my $fat_comma_count = $rtype_count->{'=>'};
7195 my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
7197 # We will define a list to be a container with one or more commas
7198 # and no semicolons. Note that we have included the semicolons
7199 # in a 'for' container in the semicolon count to keep c-style for
7200 # statements from being formatted as lists.
7201 if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
7204 # We need to do one more check for a parenthesized list:
7205 # At an opening paren following certain tokens, such as 'if',
7206 # we do not want to format the contents as a list.
7207 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
7208 my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
7209 if ( defined($Kp) ) {
7210 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7211 if ( $type_p eq 'k' ) {
7212 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
7213 $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
7216 $is_list = 0 if ( $is_nonlist_type{$type_p} );
7223 # Look for a block brace marked as uncertain. If the tokenizer thinks
7224 # its guess is uncertain for the type of a brace following an unknown
7225 # bareword then it adds a trailing space as a signal. We can fix the
7226 # type here now that we have had a better look at the contents of the
7227 # container. This fixes case b1085. To find the corresponding code in
7228 # Tokenizer.pm search for 'b1085' with an editor.
7229 my $block_type = $rblock_type_of_seqno->{$seqno};
7230 if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
7232 # Always remove the trailing space
7233 $block_type =~ s/\s+$//;
7235 # Try to filter out parenless sub calls
7236 my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
7238 if ( defined($Knn1) ) {
7239 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
7241 my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
7242 my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
7244 # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
7245 if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
7249 # Convert to a hash brace if it looks like it holds a list
7252 $block_type = EMPTY_STRING;
7254 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
7255 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
7258 $rblock_type_of_seqno->{$seqno} = $block_type;
7261 # Handle a list container
7262 if ( $is_list && !$block_type ) {
7263 $ris_list_by_seqno->{$seqno} = $seqno;
7264 my $seqno_parent = $rparent_of_seqno->{$seqno};
7266 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7269 # for $rhas_list we need to save the minimum depth
7270 if ( !$rhas_list->{$seqno_parent}
7271 || $rhas_list->{$seqno_parent} > $depth )
7273 $rhas_list->{$seqno_parent} = $depth;
7277 $rhas_broken_list->{$seqno_parent} = 1;
7279 # Patch1: We need to mark broken lists with non-terminal
7280 # line-ending commas for the -bbx=2 parameter. This insures
7281 # that the list will stay broken. Otherwise the flag
7282 # -bbx=2 can be unstable. This fixes case b789 and b938.
7284 # Patch2: Updated to also require either one fat comma or
7285 # one more line-ending comma. Fixes cases b1069 b1070
7288 $rlec_count_by_seqno->{$seqno}
7289 && ( $rlec_count_by_seqno->{$seqno} > 1
7290 || $rtype_count_by_seqno->{$seqno}->{'=>'} )
7293 $rhas_broken_list_with_lec->{$seqno_parent} = 1;
7296 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7300 # Handle code blocks ...
7301 # The -lp option needs to know if a container holds a code block
7302 elsif ( $block_type && $rOpts_line_up_parentheses ) {
7303 my $seqno_parent = $rparent_of_seqno->{$seqno};
7304 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7305 $rhas_code_block->{$seqno_parent} = 1;
7306 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
7307 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7312 # Find containers with ternaries, needed for -lp formatting.
7313 foreach my $seqno ( keys %{$K_opening_ternary} ) {
7314 my $seqno_parent = $rparent_of_seqno->{$seqno};
7315 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7316 $rhas_ternary->{$seqno_parent} = 1;
7317 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7321 # Turn off -lp for containers with here-docs with text within a container,
7322 # since they have their own fixed indentation. Fixes case b1081.
7323 if ($rOpts_line_up_parentheses) {
7324 foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
7325 my $Kh = $K_first_here_doc_by_seqno{$seqno};
7326 my $Kc = $K_closing_container->{$seqno};
7327 my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
7328 my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
7329 next if ( $line_Kh == $line_Kc );
7330 $ris_excluded_lp_container->{$seqno} = 1;
7334 # Set a flag to turn off -cab=3 in complex structures. Otherwise,
7335 # instability can occur. When it is overridden the behavior of the closest
7336 # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
7337 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
7338 foreach my $seqno ( keys %{$K_opening_container} ) {
7340 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7341 next unless ( $rtype_count && $rtype_count->{'=>'} );
7343 # override -cab=3 if this contains a sub-list
7344 if ( $rhas_list->{$seqno} ) {
7345 $roverride_cab3->{$seqno} = 1;
7348 # or if this is a sub-list of its parent container
7350 my $seqno_parent = $rparent_of_seqno->{$seqno};
7351 if ( defined($seqno_parent)
7352 && $ris_list_by_seqno->{$seqno_parent} )
7354 $roverride_cab3->{$seqno} = 1;
7360 # Reset memory to be the new array
7361 $self->[_rLL_] = $rLL_new;
7363 if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
7364 $self->[_Klimit_] = $Klimit;
7366 # During development, verify that the new array still looks okay.
7367 DEVEL_MODE && $self->check_token_array();
7369 # reset the token limits of each line
7370 $self->resync_lines_and_tokens();
7373 } ## end sub respace_tokens
7375 sub copy_token_as_type {
7377 # This provides a quick way to create a new token by
7378 # slightly modifying an existing token.
7379 my ( $rold_token, $type, $token ) = @_;
7380 if ( $type eq 'b' ) {
7381 $token = SPACE unless defined($token);
7383 elsif ( $type eq 'q' ) {
7384 $token = EMPTY_STRING unless defined($token);
7386 elsif ( $type eq '->' ) {
7387 $token = '->' unless defined($token);
7389 elsif ( $type eq ';' ) {
7390 $token = ';' unless defined($token);
7394 # Unexpected type ... this sub will work as long as both $token and
7395 # $type are defined, but we should catch any unexpected types during
7399 sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
7407 my @rnew_token = @{$rold_token};
7408 $rnew_token[_TYPE_] = $type;
7409 $rnew_token[_TOKEN_] = $token;
7410 $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
7411 return \@rnew_token;
7412 } ## end sub copy_token_as_type
7414 sub Debug_dump_tokens {
7416 # a debug routine, not normally used
7417 my ( $self, $msg ) = @_;
7418 my $rLL = $self->[_rLL_];
7419 my $nvars = @{$rLL};
7420 print STDERR "$msg\n";
7421 print STDERR "ntokens=$nvars\n";
7422 print STDERR "K\t_TOKEN_\t_TYPE_\n";
7425 foreach my $item ( @{$rLL} ) {
7426 print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
7430 } ## end sub Debug_dump_tokens
7433 my ( $self, $KK, $rLL ) = @_;
7435 # return the index K of the next nonblank, non-comment token
7436 return unless ( defined($KK) && $KK >= 0 );
7438 # use the standard array unless given otherwise
7439 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7442 while ( $Knnb < $Num ) {
7443 if ( !defined( $rLL->[$Knnb] ) ) {
7445 # We seem to have encountered a gap in our array.
7446 # This shouldn't happen because sub write_line() pushed
7447 # items into the $rLL array.
7448 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7451 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
7452 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
7459 } ## end sub K_next_code
7461 sub K_next_nonblank {
7462 my ( $self, $KK, $rLL ) = @_;
7464 # return the index K of the next nonblank token, or
7465 # return undef if none
7466 return unless ( defined($KK) && $KK >= 0 );
7468 # The third arg allows this routine to be used on any array. This is
7469 # useful in sub respace_tokens when we are copying tokens from an old $rLL
7470 # to a new $rLL array. But usually the third arg will not be given and we
7471 # will just use the $rLL array in $self.
7472 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7475 return unless ( $Knnb < $Num );
7476 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7477 return unless ( ++$Knnb < $Num );
7478 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7480 # Backup loop. Very unlikely to get here; it means we have neighboring
7481 # blanks in the token stream.
7483 while ( $Knnb < $Num ) {
7485 # Safety check, this fault shouldn't happen: The $rLL array is the
7486 # main array of tokens, so all entries should be used. It is
7487 # initialized in sub write_line, and then re-initialized by sub
7488 # $store_token() within sub respace_tokens. Tokens are pushed on
7489 # so there shouldn't be any gaps.
7490 if ( !defined( $rLL->[$Knnb] ) ) {
7491 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7494 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
7498 } ## end sub K_next_nonblank
7500 sub K_previous_code {
7502 # return the index K of the previous nonblank, non-comment token
7503 # Call with $KK=undef to start search at the top of the array
7504 my ( $self, $KK, $rLL ) = @_;
7506 # use the standard array unless given otherwise
7507 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7509 if ( !defined($KK) ) { $KK = $Num }
7510 elsif ( $KK > $Num ) {
7512 # This fault can be caused by a programming error in which a bad $KK is
7513 # given. The caller should make the first call with KK_new=undef to
7516 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7521 while ( $Kpnb >= 0 ) {
7522 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
7523 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
7530 } ## end sub K_previous_code
7532 sub K_previous_nonblank {
7534 # return index of previous nonblank token before item K;
7535 # Call with $KK=undef to start search at the top of the array
7536 my ( $self, $KK, $rLL ) = @_;
7538 # use the standard array unless given otherwise
7539 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7541 if ( !defined($KK) ) { $KK = $Num }
7542 elsif ( $KK > $Num ) {
7544 # This fault can be caused by a programming error in which a bad $KK is
7545 # given. The caller should make the first call with KK_new=undef to
7548 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7553 return unless ( $Kpnb >= 0 );
7554 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7555 return unless ( --$Kpnb >= 0 );
7556 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7558 # Backup loop. We should not get here unless some routine
7559 # slipped repeated blanks into the token stream.
7560 return unless ( --$Kpnb >= 0 );
7561 while ( $Kpnb >= 0 ) {
7562 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
7566 } ## end sub K_previous_nonblank
7568 sub parent_seqno_by_K {
7570 # Return the sequence number of the parent container of token K, if any.
7572 my ( $self, $KK ) = @_;
7573 my $rLL = $self->[_rLL_];
7575 # The task is to jump forward to the next container token
7576 # and use the sequence number of either it or its parent.
7578 # For example, consider the following with seqno=5 of the '[' and ']'
7579 # being called with index K of the first token of each line:
7584 # sub { 99 }, 'do {&{%s} for 1,2}', # 5
7585 # '(&{})(&{})', undef, # 5
7586 # [ 2, 2, 0 ], 0 # 5
7589 # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
7590 # unbalanced files, last sequence number will either be undefined or it may
7591 # be at a deeper level. In either case we will just return SEQ_ROOT to
7592 # have a defined value and allow formatting to proceed.
7593 my $parent_seqno = SEQ_ROOT;
7594 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7595 if ($type_sequence) {
7596 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7599 my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
7600 if ( defined($Kt) ) {
7601 $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
7602 my $type = $rLL->[$Kt]->[_TYPE_];
7604 # if next container token is closing, it is the parent seqno
7605 if ( $is_closing_type{$type} ) {
7606 $parent_seqno = $type_sequence;
7609 # otherwise we want its parent container
7611 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7615 $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
7616 return $parent_seqno;
7617 } ## end sub parent_seqno_by_K
7619 sub is_in_block_by_i {
7620 my ( $self, $i ) = @_;
7623 # token at i is contained in a BLOCK
7624 # or is at root level
7625 # or there is some kind of error (i.e. unbalanced file)
7626 # returns false otherwise
7627 return 1 if ( $i < 0 ); # shouldn't happen, bad call
7628 my $seqno = $parent_seqno_to_go[$i];
7629 return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
7630 return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
7632 } ## end sub is_in_block_by_i
7634 sub is_in_list_by_i {
7635 my ( $self, $i ) = @_;
7637 # returns true if token at i is contained in a LIST
7638 # returns false otherwise
7639 my $seqno = $parent_seqno_to_go[$i];
7640 return unless ( $seqno && $seqno ne SEQ_ROOT );
7641 if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
7645 } ## end sub is_in_list_by_i
7649 # Return true if token K is in a list
7650 my ( $self, $KK ) = @_;
7652 my $parent_seqno = $self->parent_seqno_by_K($KK);
7653 return unless defined($parent_seqno);
7654 return $self->[_ris_list_by_seqno_]->{$parent_seqno};
7657 sub is_list_by_seqno {
7659 # Return true if the immediate contents of a container appears to be a
7661 my ( $self, $seqno ) = @_;
7662 return unless defined($seqno);
7663 return $self->[_ris_list_by_seqno_]->{$seqno};
7666 sub resync_lines_and_tokens {
7669 my $rLL = $self->[_rLL_];
7670 my $Klimit = $self->[_Klimit_];
7671 my $rlines = $self->[_rlines_];
7672 my @Krange_code_without_comments;
7673 my @Klast_valign_code;
7675 # Re-construct the arrays of tokens associated with the original input lines
7676 # since they have probably changed due to inserting and deleting blanks
7677 # and a few other tokens.
7679 # This is the next token and its line index:
7681 my $Kmax = defined($Klimit) ? $Klimit : -1;
7683 # Verify that old line indexes are in still order. If this error occurs,
7684 # check locations where sub 'respace_tokens' creates new tokens (like
7685 # blank spaces). It must have set a bad old line index.
7686 if ( DEVEL_MODE && defined($Klimit) ) {
7687 my $iline = $rLL->[0]->[_LINE_INDEX_];
7688 foreach my $KK ( 1 .. $Klimit ) {
7689 my $iline_last = $iline;
7690 $iline = $rLL->[$KK]->[_LINE_INDEX_];
7691 if ( $iline < $iline_last ) {
7693 my $token_m = $rLL->[$KK_m]->[_TOKEN_];
7694 my $token = $rLL->[$KK]->[_TOKEN_];
7695 my $type_m = $rLL->[$KK_m]->[_TYPE_];
7696 my $type = $rLL->[$KK]->[_TYPE_];
7698 Line indexes out of order at index K=$KK:
7699 at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
7700 at KK =$KK: old line=$iline, type='$type', token='$token',
7707 foreach my $line_of_tokens ( @{$rlines} ) {
7709 my $line_type = $line_of_tokens->{_line_type};
7710 if ( $line_type eq 'CODE' ) {
7712 # Get the old number of tokens on this line
7713 my $rK_range_old = $line_of_tokens->{_rK_range};
7714 my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
7716 if ( defined($Kfirst_old) ) {
7717 $Kdiff_old = $Klast_old - $Kfirst_old;
7720 # Find the range of NEW K indexes for the line:
7721 # $Kfirst = index of first token on line
7722 # $Klast = index of last token on line
7723 my ( $Kfirst, $Klast );
7725 my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
7727 # Optimization: Although the actual K indexes may be completely
7728 # changed after respacing, the number of tokens on any given line
7729 # will often be nearly unchanged. So we will see if we can start
7730 # our search by guessing that the new line has the same number
7731 # of tokens as the old line.
7732 my $Knext_guess = $Knext + $Kdiff_old;
7733 if ( $Knext_guess > $Knext
7734 && $Knext_guess < $Kmax
7735 && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
7738 # the guess is good, so we can start our search here
7739 $Knext = $Knext_guess + 1;
7742 while ($Knext <= $Kmax
7743 && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
7748 if ( $Knext > $Knext_beg ) {
7750 $Klast = $Knext - 1;
7752 # Delete any terminal blank token
7753 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
7755 if ( $Klast < $Knext_beg ) {
7760 $Kfirst = $Knext_beg;
7762 # Save ranges of non-comment code. This will be used by
7763 # sub keep_old_line_breaks.
7764 if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
7765 push @Krange_code_without_comments, [ $Kfirst, $Klast ];
7768 # Only save ending K indexes of code types which are blank
7769 # or 'VER'. These will be used for a convergence check.
7770 # See related code in sub 'convey_batch_to_vertical_aligner'
7771 my $CODE_type = $line_of_tokens->{_code_type};
7773 || $CODE_type eq 'VER' )
7775 push @Klast_valign_code, $Klast;
7780 # It is only safe to trim the actual line text if the input
7781 # line had a terminal blank token. Otherwise, we may be
7783 if ( $line_of_tokens->{_ended_in_blank_token} ) {
7784 $line_of_tokens->{_line_text} =~ s/\s+$//;
7786 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
7788 # Deleting semicolons can create new empty code lines
7789 # which should be marked as blank
7790 if ( !defined($Kfirst) ) {
7791 my $CODE_type = $line_of_tokens->{_code_type};
7792 if ( !$CODE_type ) {
7793 $line_of_tokens->{_code_type} = 'BL';
7799 # There shouldn't be any nodes beyond the last one. This routine is
7800 # relinking lines and tokens after the tokens have been respaced. A fault
7801 # here indicates some kind of bug has been introduced into the above loops.
7802 # There is not good way to keep going; we better stop here.
7803 # FIXME: This will produce zero output. it would be best to find a way to
7804 # dump the input file.
7805 if ( $Knext <= $Kmax ) {
7807 Fault("unexpected tokens at end of file when reconstructing lines");
7809 $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
7811 # Setup the convergence test in the FileWriter based on line-ending indexes
7812 my $file_writer_object = $self->[_file_writer_object_];
7813 $file_writer_object->setup_convergence_test( \@Klast_valign_code );
7815 # Mark essential old breakpoints if combination -iob -lp is used. These
7816 # two options do not work well together, but we can avoid turning -iob off
7817 # by ignoring -iob at certain essential line breaks.
7818 # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
7819 if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
7820 my %is_assignment_or_fat_comma = %is_assignment;
7821 $is_assignment_or_fat_comma{'=>'} = 1;
7822 my $ris_essential_old_breakpoint =
7823 $self->[_ris_essential_old_breakpoint_];
7824 my ( $Kfirst, $Klast );
7825 foreach my $line_of_tokens ( @{$rlines} ) {
7826 my $line_type = $line_of_tokens->{_line_type};
7827 if ( $line_type ne 'CODE' ) {
7828 ( $Kfirst, $Klast ) = ( undef, undef );
7831 my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
7832 ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
7834 next unless defined($Klast_prev);
7835 next unless defined($Kfirst);
7836 my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
7837 my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
7839 unless ( $is_assignment_or_fat_comma{$type_last}
7840 || $is_assignment_or_fat_comma{$type_first} );
7841 $ris_essential_old_breakpoint->{$Klast_prev} = 1;
7845 } ## end sub resync_lines_and_tokens
7847 sub keep_old_line_breaks {
7849 # Called once per file to find and mark any old line breaks which
7850 # should be kept. We will be translating the input hashes into
7853 # A flag is set as follows:
7854 # = 1 make a hard break (flush the current batch)
7855 # best for something like leading commas (-kbb=',')
7856 # = 2 make a soft break (keep building current batch)
7857 # best for something like leading ->
7861 my $rLL = $self->[_rLL_];
7862 my $rKrange_code_without_comments =
7863 $self->[_rKrange_code_without_comments_];
7864 my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
7865 my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
7866 my $rwant_container_open = $self->[_rwant_container_open_];
7867 my $K_opening_container = $self->[_K_opening_container_];
7868 my $ris_broken_container = $self->[_ris_broken_container_];
7869 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
7871 # This code moved here from sub break_lists to fix b1120
7872 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
7873 foreach my $item ( @{$rKrange_code_without_comments} ) {
7874 my ( $Kfirst, $Klast ) = @{$item};
7875 my $type = $rLL->[$Kfirst]->[_TYPE_];
7876 my $token = $rLL->[$Kfirst]->[_TOKEN_];
7878 # leading '->' use a value of 2 which causes a soft
7879 # break rather than a hard break
7880 if ( $type eq '->' ) {
7881 $rbreak_before_Kfirst->{$Kfirst} = 2;
7884 # leading ')->' use a special flag to insure that both
7885 # opening and closing parens get opened
7886 # Fix for b1120: only for parens, not braces
7887 elsif ( $token eq ')' ) {
7888 my $Kn = $self->K_next_nonblank($Kfirst);
7890 unless ( defined($Kn)
7892 && $rLL->[$Kn]->[_TYPE_] eq '->' );
7893 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
7894 next unless ($seqno);
7896 # Note: in previous versions there was a fix here to avoid
7897 # instability between conflicting -bom and -pvt or -pvtc flags.
7898 # The fix skipped -bom for a small line difference. But this
7899 # was troublesome, and instead the fix has been moved to
7900 # sub set_vertical_tightness_flags where priority is given to
7901 # the -bom flag over -pvt and -pvtc flags. Both opening and
7902 # closing paren flags are involved because even though -bom only
7903 # requests breaking before the closing paren, automated logic
7904 # opens the opening paren when the closing paren opens.
7905 # Relevant cases are b977, b1215, b1270, b1303
7907 $rwant_container_open->{$seqno} = 1;
7912 return unless ( %keep_break_before_type || %keep_break_after_type );
7914 my $check_for_break = sub {
7915 my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
7916 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7918 # non-container tokens use the type as the key
7920 my $type = $rLL->[$KK]->[_TYPE_];
7921 if ( $rkeep_break_hash->{$type} ) {
7922 $rbreak_hash->{$KK} = 1;
7926 # container tokens use the token as the key
7928 my $token = $rLL->[$KK]->[_TOKEN_];
7929 my $flag = $rkeep_break_hash->{$token};
7932 my $match = $flag eq '1' || $flag eq '*';
7934 # check for special matching codes
7936 if ( $token eq '(' || $token eq ')' ) {
7937 $match = $self->match_paren_flag( $KK, $flag );
7939 elsif ( $token eq '{' || $token eq '}' ) {
7941 # These tentative codes 'b' and 'B' for brace types are
7942 # placeholders for possible future brace types. They
7943 # are not documented and may be changed.
7945 $self->[_rblock_type_of_seqno_]->{$seqno};
7946 if ( $flag eq 'b' ) { $match = $block_type }
7947 elsif ( $flag eq 'B' ) { $match = !$block_type }
7949 # unknown code - no match
7953 $rbreak_hash->{$KK} = 1 if ($match);
7958 foreach my $item ( @{$rKrange_code_without_comments} ) {
7959 my ( $Kfirst, $Klast ) = @{$item};
7961 $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
7964 $Klast, \%keep_break_after_type, $rbreak_after_Klast
7968 } ## end sub keep_old_line_breaks
7970 sub weld_containers {
7972 # Called once per file to do any welding operations requested by --weld*
7976 # This count is used to eliminate needless calls for weld checks elsewhere
7977 $total_weld_count = 0;
7979 return if ( $rOpts->{'indent-only'} );
7980 return unless ($rOpts_add_newlines);
7982 # Important: sub 'weld_cuddled_blocks' must be called before
7983 # sub 'weld_nested_containers'. This is because the cuddled option needs to
7984 # use the original _LEVEL_ values of containers, but the weld nested
7985 # containers changes _LEVEL_ of welded containers.
7987 # Here is a good test case to be sure that both cuddling and welding
7988 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
7992 # if ($BOLD_MATH) { (
7993 # $labels, $comment,
7994 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
7996 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
8000 $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
8002 if ( $rOpts->{'weld-nested-containers'} ) {
8004 $self->weld_nested_containers();
8006 $self->weld_nested_quotes();
8009 #-------------------------------------------------------------
8010 # All welding is done. Finish setting up weld data structures.
8011 #-------------------------------------------------------------
8013 my $rLL = $self->[_rLL_];
8014 my $rK_weld_left = $self->[_rK_weld_left_];
8015 my $rK_weld_right = $self->[_rK_weld_right_];
8016 my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
8019 my @keys = keys %{$rK_weld_right};
8020 $total_weld_count = @keys;
8022 # First pass to process binary welds.
8023 # This loop is processed in unsorted order for efficiency.
8024 foreach my $Kstart (@keys) {
8025 my $Kend = $rK_weld_right->{$Kstart};
8027 # An error here would be due to an incorrect initialization introduced
8028 # in one of the above weld routines, like sub weld_nested.
8029 if ( $Kend <= $Kstart ) {
8030 Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
8035 # Set weld values for all tokens this welded pair
8036 foreach ( $Kstart + 1 .. $Kend ) {
8037 $rK_weld_left->{$_} = $Kstart;
8039 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
8040 $rK_weld_right->{$Kx} = $Kend;
8041 $rweld_len_right_at_K->{$Kx} =
8042 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
8043 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
8046 # Remember the leftmost index of welds which continue to the right
8047 if ( defined( $rK_weld_right->{$Kend} )
8048 && !defined( $rK_weld_left->{$Kstart} ) )
8050 push @K_multi_weld, $Kstart;
8054 # Second pass to process chains of welds (these are rare).
8055 # This has to be processed in sorted order.
8056 if (@K_multi_weld) {
8058 foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
8060 # Skip any interior K which was originally missing a left link
8061 next if ( $Kstart <= $Kend );
8063 # Find the end of this chain
8064 $Kend = $rK_weld_right->{$Kstart};
8065 my $Knext = $rK_weld_right->{$Kend};
8066 while ( defined($Knext) ) {
8068 $Knext = $rK_weld_right->{$Kend};
8071 # Set weld values this chain
8072 foreach ( $Kstart + 1 .. $Kend ) {
8073 $rK_weld_left->{$_} = $Kstart;
8075 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
8076 $rK_weld_right->{$Kx} = $Kend;
8077 $rweld_len_right_at_K->{$Kx} =
8078 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
8079 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
8085 } ## end sub weld_containers
8087 sub cumulative_length_before_K {
8088 my ( $self, $KK ) = @_;
8089 my $rLL = $self->[_rLL_];
8090 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8093 sub weld_cuddled_blocks {
8096 # Called once per file to handle cuddled formatting
8098 my $rK_weld_left = $self->[_rK_weld_left_];
8099 my $rK_weld_right = $self->[_rK_weld_right_];
8100 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8102 # This routine implements the -cb flag by finding the appropriate
8103 # closing and opening block braces and welding them together.
8104 return unless ( %{$rcuddled_block_types} );
8106 my $rLL = $self->[_rLL_];
8107 return unless ( defined($rLL) && @{$rLL} );
8108 my $rbreak_container = $self->[_rbreak_container_];
8110 my $K_opening_container = $self->[_K_opening_container_];
8111 my $K_closing_container = $self->[_K_closing_container_];
8113 my $length_to_opening_seqno = sub {
8115 my $KK = $K_opening_container->{$seqno};
8116 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8119 my $length_to_closing_seqno = sub {
8121 my $KK = $K_closing_container->{$seqno};
8122 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8126 my $is_broken_block = sub {
8128 # a block is broken if the input line numbers of the braces differ
8129 # we can only cuddle between broken blocks
8131 my $K_opening = $K_opening_container->{$seqno};
8132 return unless ( defined($K_opening) );
8133 my $K_closing = $K_closing_container->{$seqno};
8134 return unless ( defined($K_closing) );
8135 return $rbreak_container->{$seqno}
8136 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8137 $rLL->[$K_opening]->[_LINE_INDEX_];
8140 # A stack to remember open chains at all levels: This is a hash rather than
8141 # an array for safety because negative levels can occur in files with
8142 # errors. This allows us to keep processing with negative levels.
8143 # $in_chain{$level} = [$chain_type, $type_sequence];
8145 my $CBO = $rOpts->{'cuddled-break-option'};
8147 # loop over structure items to find cuddled pairs
8149 my $KNEXT = $self->[_K_first_seq_item_];
8150 while ( defined($KNEXT) ) {
8152 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8153 my $rtoken_vars = $rLL->[$KK];
8154 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8155 if ( !$type_sequence ) {
8156 next if ( $KK == 0 ); # first token in file may not be container
8158 # A fault here implies that an error was made in the little loop at
8159 # the bottom of sub 'respace_tokens' which set the values of
8160 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
8161 # loop control lines above.
8162 Fault("sequence = $type_sequence not defined at K=$KK")
8167 # NOTE: we must use the original levels here. They can get changed
8168 # by sub 'weld_nested_containers', so this routine must be called
8169 # before sub 'weld_nested_containers'.
8170 my $last_level = $level;
8171 $level = $rtoken_vars->[_LEVEL_];
8173 if ( $level < $last_level ) { $in_chain{$last_level} = undef }
8174 elsif ( $level > $last_level ) { $in_chain{$level} = undef }
8176 # We are only looking at code blocks
8177 my $token = $rtoken_vars->[_TOKEN_];
8178 my $type = $rtoken_vars->[_TYPE_];
8179 next unless ( $type eq $token );
8181 if ( $token eq '{' ) {
8183 my $block_type = $rblock_type_of_seqno->{$type_sequence};
8184 if ( !$block_type ) {
8186 # patch for unrecognized block types which may not be labeled
8187 my $Kp = $self->K_previous_nonblank($KK);
8188 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
8189 $Kp = $self->K_previous_nonblank($Kp);
8192 $block_type = $rLL->[$Kp]->[_TOKEN_];
8194 if ( $in_chain{$level} ) {
8196 # we are in a chain and are at an opening block brace.
8197 # See if we are welding this opening brace with the previous
8198 # block brace. Get their identification numbers:
8199 my $closing_seqno = $in_chain{$level}->[1];
8200 my $opening_seqno = $type_sequence;
8202 # The preceding block must be on multiple lines so that its
8203 # closing brace will start a new line.
8204 if ( !$is_broken_block->($closing_seqno) ) {
8205 next unless ( $CBO == 2 );
8206 $rbreak_container->{$closing_seqno} = 1;
8209 # we will let the trailing block be either broken or intact
8210 ## && $is_broken_block->($opening_seqno);
8212 # We can weld the closing brace to its following word ..
8213 my $Ko = $K_closing_container->{$closing_seqno};
8215 if ( defined($Ko) ) {
8216 $Kon = $self->K_next_nonblank($Ko);
8219 # ..unless it is a comment
8220 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
8222 # OK to weld these two tokens...
8223 $rK_weld_right->{$Ko} = $Kon;
8224 $rK_weld_left->{$Kon} = $Ko;
8226 # Set flag that we want to break the next container
8227 # so that the cuddled line is balanced.
8228 $rbreak_container->{$opening_seqno} = 1
8235 # We are not in a chain. Start a new chain if we see the
8236 # starting block type.
8237 if ( $rcuddled_block_types->{$block_type} ) {
8238 $in_chain{$level} = [ $block_type, $type_sequence ];
8242 $in_chain{$level} = [ $block_type, $type_sequence ];
8246 elsif ( $token eq '}' ) {
8247 if ( $in_chain{$level} ) {
8249 # We are in a chain at a closing brace. See if this chain
8251 my $Knn = $self->K_next_code($KK);
8254 my $chain_type = $in_chain{$level}->[0];
8255 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
8257 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
8261 # Note that we do not weld yet because we must wait until
8262 # we we are sure that an opening brace for this follows.
8263 $in_chain{$level}->[1] = $type_sequence;
8265 else { $in_chain{$level} = undef }
8270 } ## end sub weld_cuddled_blocks
8272 sub find_nested_pairs {
8275 # This routine is called once per file to do preliminary work needed for
8276 # the --weld-nested option. This information is also needed for adding
8279 my $rLL = $self->[_rLL_];
8280 return unless ( defined($rLL) && @{$rLL} );
8283 my $K_opening_container = $self->[_K_opening_container_];
8284 my $K_closing_container = $self->[_K_closing_container_];
8285 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8287 # We define an array of pairs of nested containers
8290 # Names of calling routines can either be marked as 'i' or 'w',
8291 # and they may invoke a sub call with an '->'. We will consider
8292 # any consecutive string of such types as a single unit when making
8293 # weld decisions. We also allow a leading !
8294 my $is_name_type = {
8302 # Loop over all closing container tokens
8303 foreach my $inner_seqno ( keys %{$K_closing_container} ) {
8304 my $K_inner_closing = $K_closing_container->{$inner_seqno};
8306 # See if it is immediately followed by another, outer closing token
8307 my $K_outer_closing = $K_inner_closing + 1;
8308 $K_outer_closing += 1
8309 if ( $K_outer_closing < $Num
8310 && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
8312 next unless ( $K_outer_closing < $Num );
8313 my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
8314 next unless ($outer_seqno);
8315 my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
8316 next unless ( $is_closing_token{$token_outer_closing} );
8318 # Now we have to check the opening tokens.
8319 my $K_outer_opening = $K_opening_container->{$outer_seqno};
8320 my $K_inner_opening = $K_opening_container->{$inner_seqno};
8321 next unless defined($K_outer_opening) && defined($K_inner_opening);
8323 my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
8324 my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
8326 # Verify that the inner opening token is the next container after the
8327 # outer opening token.
8328 my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
8329 next unless defined($K_io_check);
8330 if ( $K_io_check != $K_inner_opening ) {
8332 # The inner opening container does not immediately follow the outer
8333 # opening container, but we may still allow a weld if they are
8334 # separated by a sub signature. For example, we may have something
8335 # like this, where $K_io_check may be at the first 'x' instead of
8336 # 'io'. So we need to hop over the signature and see if we arrive
8341 # $obj->then( sub ( $code ) {
8343 # return $c->render(text => '', status => $code);
8348 next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
8349 next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
8350 my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
8351 next unless defined($seqno_signature);
8352 my $K_signature_closing = $K_closing_container->{$seqno_signature};
8353 next unless defined($K_signature_closing);
8354 my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
8356 unless ( defined($K_test) && $K_test == $K_inner_opening );
8358 # OK, we have arrived at 'io' in the above diagram. We should put
8359 # a limit on the length or complexity of the signature here. There
8360 # is no perfect way to do this, one way is to put a limit on token
8361 # count. For consistency with older versions, we should allow a
8362 # signature with a single variable to weld, but not with
8363 # multiple variables. A single variable as in 'sub ($code) {' can
8364 # have a $Kdiff of 2 to 4, depending on spacing.
8366 # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
8367 # 7, depending on spacing. So to keep formatting consistent with
8368 # previous versions, we will also avoid welding if there is a comma
8371 my $Kdiff = $K_signature_closing - $K_io_check;
8372 next if ( $Kdiff > 4 );
8375 foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
8376 if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
8378 next if ($saw_comma);
8381 # Yes .. this is a possible nesting pair.
8382 # They can be separated by a small amount.
8383 my $K_diff = $K_inner_opening - $K_outer_opening;
8385 # Count nonblank characters separating them.
8386 if ( $K_diff < 0 ) { next } # Shouldn't happen
8387 my $nonblank_count = 0;
8391 # Here is an example of a long identifier chain which counts as a
8392 # single nonblank here (this spans about 10 K indexes):
8393 # if ( !Boucherot::SetOfConnections->new->handler->execute(
8396 my $Kn_first = $K_outer_opening;
8397 my $Kn_last_nonblank;
8399 foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
8400 next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
8401 if ( !$nonblank_count ) { $Kn_first = $Kn }
8402 if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
8403 $Kn_last_nonblank = $Kn;
8405 # skip chain of identifier tokens
8406 my $last_type = $type;
8407 my $last_is_name = $is_name;
8408 $type = $rLL->[$Kn]->[_TYPE_];
8409 if ( $type eq '#' ) { $saw_comment = 1; last }
8410 $is_name = $is_name_type->{$type};
8411 next if ( $is_name && $last_is_name );
8414 last if ( $nonblank_count > 2 );
8417 # Do not weld across a comment .. fix for c058.
8418 next if ($saw_comment);
8420 # Patch for b1104: do not weld to a paren preceded by sort/map/grep
8421 # because the special line break rules may cause a blinking state
8422 if ( defined($Kn_last_nonblank)
8423 && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
8424 && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
8426 my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
8428 # Turn off welding at sort/map/grep (
8429 if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
8434 # adjacent opening containers, like: do {{
8435 $nonblank_count == 1
8437 # short item following opening paren, like: fun( yyy (
8438 || ( $nonblank_count == 2
8439 && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
8441 # anonymous sub + prototype or sig: )->then( sub ($code) {
8442 # ... but it seems best not to stack two structural blocks, like
8444 # sub make_anon_with_my_sub { sub {
8445 # because it probably hides the structure a little too much.
8446 || ( $inner_blocktype
8447 && $inner_blocktype eq 'sub'
8448 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
8449 && !$outer_blocktype )
8453 [ $inner_seqno, $outer_seqno, $K_inner_closing ];
8458 # The weld routine expects the pairs in order in the form
8459 # [$seqno_inner, $seqno_outer]
8460 # And they must be in the same order as the inner closing tokens
8461 # (otherwise, welds of three or more adjacent tokens will not work). The K
8462 # value of this inner closing token has temporarily been stored for
8466 # Drop the K index after sorting (it would cause trouble downstream)
8467 map { [ $_->[0], $_->[1] ] }
8469 # Sort on the K values
8470 sort { $a->[2] <=> $b->[2] } @nested_pairs;
8472 return \@nested_pairs;
8473 } ## end sub find_nested_pairs
8475 sub match_paren_flag {
8477 # Decide if this paren is excluded by user request:
8478 # undef matches no parens
8479 # '*' matches all parens
8480 # 'k' matches only if the previous nonblank token is a perl builtin
8481 # keyword (such as 'if', 'while'),
8482 # 'K' matches if 'k' does not, meaning if the previous token is not a
8484 # 'f' matches if the previous token is a function other than a keyword.
8485 # 'F' matches if 'f' does not.
8486 # 'w' matches if either 'k' or 'f' match.
8487 # 'W' matches if 'w' does not.
8488 my ( $self, $KK, $flag ) = @_;
8490 return 0 unless ( defined($flag) );
8491 return 0 if $flag eq '0';
8492 return 1 if $flag eq '1';
8493 return 1 if $flag eq '*';
8494 return 0 unless ( defined($KK) );
8496 my $rLL = $self->[_rLL_];
8497 my $rtoken_vars = $rLL->[$KK];
8498 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
8499 return 0 unless ($seqno);
8500 my $token = $rtoken_vars->[_TOKEN_];
8501 my $K_opening = $KK;
8502 if ( !$is_opening_token{$token} ) {
8503 $K_opening = $self->[_K_opening_container_]->{$seqno};
8505 return unless ( defined($K_opening) );
8507 my ( $is_f, $is_k, $is_w );
8508 my $Kp = $self->K_previous_nonblank($K_opening);
8509 if ( defined($Kp) ) {
8510 my $type_p = $rLL->[$Kp]->[_TYPE_];
8513 $is_k = $type_p eq 'k';
8516 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
8518 # either keyword or function call?
8519 $is_w = $is_k || $is_f;
8522 if ( $flag eq 'k' ) { $match = $is_k }
8523 elsif ( $flag eq 'K' ) { $match = !$is_k }
8524 elsif ( $flag eq 'f' ) { $match = $is_f }
8525 elsif ( $flag eq 'F' ) { $match = !$is_f }
8526 elsif ( $flag eq 'w' ) { $match = $is_w }
8527 elsif ( $flag eq 'W' ) { $match = !$is_w }
8529 } ## end sub match_paren_flag
8531 sub is_excluded_weld {
8533 # decide if this weld is excluded by user request
8534 my ( $self, $KK, $is_leading ) = @_;
8535 my $rLL = $self->[_rLL_];
8536 my $rtoken_vars = $rLL->[$KK];
8537 my $token = $rtoken_vars->[_TOKEN_];
8538 my $rflags = $weld_nested_exclusion_rules{$token};
8539 return 0 unless ( defined($rflags) );
8540 my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
8541 return 0 unless ( defined($flag) );
8542 return 1 if $flag eq '*';
8543 return $self->match_paren_flag( $KK, $flag );
8544 } ## end sub is_excluded_weld
8546 # hashes to simplify welding logic
8547 my %type_ok_after_bareword;
8548 my %has_tight_paren;
8552 # types needed for welding RULE 6
8553 my @q = qw# => -> { ( [ #;
8554 @type_ok_after_bareword{@q} = (1) x scalar(@q);
8556 # these types do not 'like' to be separated from a following paren
8557 @q = qw(w i q Q G C Z U);
8558 @{has_tight_paren}{@q} = (1) x scalar(@q);
8561 use constant DEBUG_WELD => 0;
8563 sub setup_new_weld_measurements {
8565 # Define quantities to check for excess line lengths when welded.
8566 # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
8568 my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
8570 # Given indexes of outer and inner opening containers to be welded:
8571 # $Kouter_opening, $Kinner_opening
8573 # Returns these variables:
8574 # $new_weld_ok = true (new weld ok) or false (do not start new weld)
8575 # $starting_indent = starting indentation
8576 # $starting_lentot = starting cumulative length
8577 # $msg = diagnostic message for debugging
8579 my $rLL = $self->[_rLL_];
8580 my $rlines = $self->[_rlines_];
8584 my $starting_lentot;
8585 my $maximum_text_length;
8586 my $msg = EMPTY_STRING;
8588 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
8589 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
8590 my ( $Kfirst, $Klast ) = @{$rK_range};
8592 #-------------------------------------------------------------------------
8593 # We now define a reference index, '$Kref', from which to start measuring
8594 # This choice turns out to be critical for keeping welds stable during
8595 # iterations, so we go through a number of STEPS...
8596 #-------------------------------------------------------------------------
8598 # STEP 1: Our starting guess is to use measure from the first token of the
8599 # current line. This is usually a good guess.
8602 # STEP 2: See if we should go back a little farther
8603 my $Kprev = $self->K_previous_nonblank($Kfirst);
8604 if ( defined($Kprev) ) {
8606 # Avoid measuring from between an opening paren and a previous token
8607 # which should stay close to it ... fixes b1185
8608 my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
8609 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
8610 if ( $Kouter_opening == $Kfirst
8612 && $has_tight_paren{$type_prev} )
8617 # Back up and count length from a token like '=' or '=>' if -lp
8618 # is used (this fixes b520)
8619 # ...or if a break is wanted before there
8620 elsif ($rOpts_line_up_parentheses
8621 || $want_break_before{$type_prev} )
8624 # If there are other sequence items between the start of this line
8625 # and the opening token in question, then do not include tokens on
8626 # the previous line in length calculations. This check added to
8627 # fix case b1174 which had a '?' on the line
8628 my $no_previous_seq_item = $Kref == $Kouter_opening
8629 || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
8631 if ( $no_previous_seq_item
8632 && substr( $type_prev, 0, 1 ) eq '=' )
8636 # Fix for b1144 and b1112: backup to the first nonblank
8637 # character before the =>, or to the start of its line.
8638 if ( $type_prev eq '=>' ) {
8639 my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
8640 my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
8641 my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
8642 foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
8643 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
8652 # STEP 3: Now look ahead for a ternary and, if found, use it.
8653 # This fixes case b1182.
8654 # Also look for a ')' at the same level and, if found, use it.
8655 # This fixes case b1224.
8656 if ( $Kref < $Kouter_opening ) {
8657 my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
8658 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8659 while ( $Knext < $Kouter_opening ) {
8660 if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
8661 if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
8662 || $rLL->[$Knext]->[_TOKEN_] eq ')' )
8668 $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
8672 # Define the starting measurements we will need
8674 $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
8675 $starting_level = $rLL->[$Kref]->[_LEVEL_];
8676 $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
8678 $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
8679 $starting_ci * $rOpts_continuation_indentation;
8681 # STEP 4: Switch to using the outer opening token as the reference
8682 # point if a line break before it would make a longer line.
8683 # Fixes case b1055 and is also an alternate fix for b1065.
8684 my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8685 if ( $Kref < $Kouter_opening ) {
8686 my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
8687 my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
8688 my $maximum_text_length_oo =
8689 $maximum_text_length_at_level[$starting_level_oo] -
8690 $starting_ci_oo * $rOpts_continuation_indentation;
8692 # The excess length to any cumulative length K = lenK is either
8693 # $excess = $lenk - ($lentot + $maximum_text_length), or
8694 # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
8695 # so the worst case (maximum excess) corresponds to the configuration
8696 # with minimum value of the sum: $lentot + $maximum_text_length
8697 if ( $lentot_oo + $maximum_text_length_oo <
8698 $starting_lentot + $maximum_text_length )
8700 $Kref = $Kouter_opening;
8701 $starting_level = $starting_level_oo;
8702 $starting_ci = $starting_ci_oo;
8703 $starting_lentot = $lentot_oo;
8704 $maximum_text_length = $maximum_text_length_oo;
8708 my $new_weld_ok = 1;
8710 # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
8711 # combination -wn -lp -dws -naws does not work well and can cause blinkers.
8712 # It will probably only occur in stress testing. For this situation we
8713 # will only start a new weld if we start at a 'good' location.
8714 # - Added 'if' to fix case b1032.
8715 # - Require blank before certain previous characters to fix b1111.
8716 # - Add ';' to fix case b1139
8717 # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
8718 # - relaxed constraints for b1227
8720 && $rOpts_line_up_parentheses
8721 && $rOpts_delete_old_whitespace
8722 && !$rOpts_add_whitespace
8723 && defined($Kprev) )
8725 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
8726 my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
8727 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
8729 if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
8731 $type_prev =~ /^[\,\.\;]/
8732 || $type_prev =~ /^[=\{\[\(\L]/
8733 && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
8734 || $type_first =~ /^[=\,\.\;\{\[\(\L]/
8735 || $type_first eq '||'
8738 && ( $token_first eq 'if'
8739 || $token_first eq 'or' )
8744 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
8748 return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
8749 } ## end sub setup_new_weld_measurements
8751 sub excess_line_length_for_Krange {
8752 my ( $self, $Kfirst, $Klast ) = @_;
8754 # returns $excess_length =
8755 # by how many characters a line composed of tokens $Kfirst .. $Klast will
8756 # exceed the allowed line length
8758 my $rLL = $self->[_rLL_];
8759 my $length_before_Kfirst =
8762 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
8764 # backup before a side comment if necessary
8766 if ( $rOpts_ignore_side_comment_lengths
8767 && $rLL->[$Klast]->[_TYPE_] eq '#' )
8769 my $Kprev = $self->K_previous_nonblank($Klast);
8770 if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
8773 # get the length of the text
8774 my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
8776 # get the size of the text window
8777 my $level = $rLL->[$Kfirst]->[_LEVEL_];
8778 my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
8779 my $max_text_length = $maximum_text_length_at_level[$level] -
8780 $ci_level * $rOpts_continuation_indentation;
8782 my $excess_length = $length - $max_text_length;
8786 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
8787 return ($excess_length);
8788 } ## end sub excess_line_length_for_Krange
8790 sub weld_nested_containers {
8793 # Called once per file for option '--weld-nested-containers'
8795 my $rK_weld_left = $self->[_rK_weld_left_];
8796 my $rK_weld_right = $self->[_rK_weld_right_];
8798 # This routine implements the -wn flag by "welding together"
8799 # the nested closing and opening tokens which were previously
8800 # identified by sub 'find_nested_pairs'. "welding" simply
8801 # involves setting certain hash values which will be checked
8802 # later during formatting.
8804 my $rLL = $self->[_rLL_];
8805 my $rlines = $self->[_rlines_];
8806 my $K_opening_container = $self->[_K_opening_container_];
8807 my $K_closing_container = $self->[_K_closing_container_];
8808 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8809 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
8810 my $ris_asub_block = $self->[_ris_asub_block_];
8811 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
8813 # Find nested pairs of container tokens for any welding.
8814 my $rnested_pairs = $self->find_nested_pairs();
8816 # Return unless there are nested pairs to weld
8817 return unless defined($rnested_pairs) && @{$rnested_pairs};
8819 # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
8820 # pairs. But it isn't clear if this is possible because we don't know
8821 # which sequences might actually start a weld.
8823 # Setup a hash to avoid instabilities with combination -lp -wn -pvt=2.
8824 # We do this by reducing -vt=2 to -vt=1 where there could be a conflict
8825 # with welding at the same tokens.
8826 # See issues b1338, b1339, b1340, b1341, b1342, b1343.
8827 if ($rOpts_line_up_parentheses) {
8829 # NOTE: just parens for now but this could be applied to all types if
8831 if ( $opening_vertical_tightness{'('} == 2 ) {
8832 my $rreduce_vertical_tightness_by_seqno =
8833 $self->[_rreduce_vertical_tightness_by_seqno_];
8834 foreach my $item ( @{$rnested_pairs} ) {
8835 my ( $inner_seqno, $outer_seqno ) = @{$item};
8836 if ( !$ris_excluded_lp_container->{$outer_seqno} ) {
8838 # Set a flag which means that if a token has -vt=2
8839 # then reduce it to -vt=1.
8840 $rreduce_vertical_tightness_by_seqno->{$outer_seqno} = 1;
8846 my $rOpts_break_at_old_method_breakpoints =
8847 $rOpts->{'break-at-old-method-breakpoints'};
8849 # This array will hold the sequence numbers of the tokens to be welded.
8852 # Variables needed for estimating line lengths
8853 my $maximum_text_length; # maximum spaces available for text
8854 my $starting_lentot; # cumulative text to start of current line
8856 my $iline_outer_opening = -1;
8857 my $weld_count_this_start = 0;
8859 # OLD: $single_line_tol added to fix cases b1180 b1181
8860 # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
8861 # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
8862 my $single_line_tol = 0;
8864 my $multiline_tol = $single_line_tol + 1 +
8865 max( $rOpts_indent_columns, $rOpts_continuation_indentation );
8867 # Define a welding cutoff level: do not start a weld if the inside
8868 # container level equals or exceeds this level.
8870 # We use the minimum of two criteria, either of which may be more
8871 # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
8872 # the 'beta' value is more restrictive in other cases (b1243).
8874 my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 );
8876 # The vertical tightness flags can throw off line length calculations.
8877 # This patch was added to fix instability issue b1284.
8878 # It works to always use a tol of 1 for 1 line block length tests, but
8879 # this restricted value keeps test case wn6.wn working as before.
8880 # It may be necessary to include '[' and '{' here in the future.
8881 my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
8883 my $length_to_opening_seqno = sub {
8885 my $KK = $K_opening_container->{$seqno};
8886 my $lentot = defined($KK)
8887 && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8891 my $length_to_closing_seqno = sub {
8893 my $KK = $K_closing_container->{$seqno};
8894 my $lentot = defined($KK)
8895 && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8900 # _oo=outer opening, i.e. first of { {
8901 # _io=inner opening, i.e. second of { {
8902 # _oc=outer closing, i.e. second of } {
8903 # _ic=inner closing, i.e. first of } }
8907 # Main loop over nested pairs...
8908 # We are working from outermost to innermost pairs so that
8909 # level changes will be complete when we arrive at the inner pairs.
8910 while ( my $item = pop( @{$rnested_pairs} ) ) {
8911 my ( $inner_seqno, $outer_seqno ) = @{$item};
8913 my $Kouter_opening = $K_opening_container->{$outer_seqno};
8914 my $Kinner_opening = $K_opening_container->{$inner_seqno};
8915 my $Kouter_closing = $K_closing_container->{$outer_seqno};
8916 my $Kinner_closing = $K_closing_container->{$inner_seqno};
8918 # RULE: do not weld if inner container has <= 3 tokens unless the next
8919 # token is a heredoc (so we know there will be multiple lines)
8920 if ( $Kinner_closing - $Kinner_opening <= 4 ) {
8921 my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
8922 next unless defined($Knext_nonblank);
8923 my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
8924 next unless ( $type eq 'h' );
8927 my $outer_opening = $rLL->[$Kouter_opening];
8928 my $inner_opening = $rLL->[$Kinner_opening];
8929 my $outer_closing = $rLL->[$Kouter_closing];
8930 my $inner_closing = $rLL->[$Kinner_closing];
8932 # RULE: do not weld to a hash brace. The reason is that it has a very
8933 # strong bond strength to the next token, so a line break after it
8934 # may not work. Previously we allowed welding to something like @{
8935 # but that caused blinking states (cases b751, b779).
8936 if ( $inner_opening->[_TYPE_] eq 'L' ) {
8940 # RULE: do not weld to a square bracket which does not contain commas
8941 if ( $inner_opening->[_TYPE_] eq '[' ) {
8942 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
8943 next unless ($rtype_count);
8944 my $comma_count = $rtype_count->{','};
8945 next unless ($comma_count);
8947 # Do not weld if there is text before a '[' such as here:
8948 # curr_opt ( @beg [2,5] )
8949 # It will not break into the desired sandwich structure.
8950 # This fixes case b109, 110.
8951 my $Kdiff = $Kinner_opening - $Kouter_opening;
8952 next if ( $Kdiff > 2 );
8955 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
8959 # RULE: Avoid welding under stress. The idea is that we need to have a
8960 # little space* within a welded container to avoid instability. Note
8961 # that after each weld the level values are reduced, so long multiple
8962 # welds can still be made. This rule will seldom be a limiting factor
8963 # in actual working code. Fixes b1206, b1243.
8964 my $inner_level = $inner_opening->[_LEVEL_];
8965 if ( $inner_level >= $weld_cutoff_level ) { next }
8967 # Set flag saying if this pair starts a new weld
8968 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
8970 # Set flag saying if this pair is adjacent to the previous nesting pair
8971 # (even if previous pair was rejected as a weld)
8972 my $touch_previous_pair =
8973 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
8974 $previous_pair = $item;
8976 my $do_not_weld_rule = 0;
8977 my $Msg = EMPTY_STRING;
8978 my $is_one_line_weld;
8980 my $iline_oo = $outer_opening->[_LINE_INDEX_];
8981 my $iline_io = $inner_opening->[_LINE_INDEX_];
8982 my $iline_ic = $inner_closing->[_LINE_INDEX_];
8983 my $iline_oc = $outer_closing->[_LINE_INDEX_];
8984 my $token_oo = $outer_opening->[_TOKEN_];
8985 my $token_io = $inner_opening->[_TOKEN_];
8987 my $is_multiline_weld =
8988 $iline_oo == $iline_io
8989 && $iline_ic == $iline_oc
8990 && $iline_io != $iline_ic;
8993 my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
8994 my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
8996 Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
8997 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
8998 tokens '$token_oo' .. '$token_io'
9002 # DO-NOT-WELD RULE 0:
9003 # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
9004 # by one line). This can produce instabilities (fixes b1250 b1251
9006 if ( !$is_multiline_weld
9007 && $iline_ic == $iline_io + 1
9009 && $token_io eq '(' )
9012 $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
9018 # If this pair is not adjacent to the previous pair (skipped or not),
9019 # then measure lengths from the start of line of oo.
9021 !$touch_previous_pair
9023 # Also do this if restarting at a new line; fixes case b965, s001
9024 || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
9028 # Remember the line we are using as a reference
9029 $iline_outer_opening = $iline_oo;
9030 $weld_count_this_start = 0;
9032 ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
9033 = $self->setup_new_weld_measurements( $Kouter_opening,
9038 && ( $iline_oo != $iline_io
9039 || $iline_ic != $iline_oc )
9042 if (DEBUG_WELD) { print $msg}
9046 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
9047 my ( $Kfirst, $Klast ) = @{$rK_range};
9049 # An existing one-line weld is a line in which
9050 # (1) the containers are all on one line, and
9051 # (2) the line does not exceed the allowable length
9052 if ( $iline_oo == $iline_oc ) {
9054 # All the tokens are on one line, now check their length.
9055 # Start with the full line index range. We will reduce this
9056 # in the coding below in some cases.
9057 my $Kstart = $Kfirst;
9060 # Note that the following minimal choice for measuring will
9061 # work and will not cause any instabilities because it is
9064 ## my $Kstart = $Kouter_opening;
9065 ## my $Kstop = $Kouter_closing;
9067 # But that can lead to some undesirable welds. So a little
9068 # more complicated method has been developed.
9070 # We are trying to avoid creating bad two-line welds when we are
9071 # working on long, previously un-welded input text, such as
9073 # INPUT (example of a long input line weld candidate):
9074 ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
9076 # GOOD two-line break: (not welded; result marked too long):
9077 ## $mutation->transpos(
9078 ## $self->RNA->position($mutation->label, $atg_label));
9080 # BAD two-line break: (welded; result if we weld):
9081 ## $mutation->transpos($self->RNA->position(
9082 ## $mutation->label, $atg_label));
9084 # We can only get an approximate estimate of the final length,
9085 # since the line breaks may change, and for -lp mode because
9086 # even the indentation is not yet known.
9088 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
9089 my $level_last = $rLL->[$Klast]->[_LEVEL_];
9090 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
9091 my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
9093 # - measure to the end of the original line if balanced
9094 # - measure to the closing container if unbalanced (fixes b1230)
9095 #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
9096 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
9098 # - measure from the start of the original line if balanced
9099 # - measure from the most previous token with same level
9100 # if unbalanced (b1232)
9101 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
9102 $Kstart = $Kouter_opening;
9105 my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
9107 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
9108 last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
9114 $self->excess_line_length_for_Krange( $Kstart, $Kstop );
9116 # Coding simplified here for case b1219.
9117 # Increased tol from 0 to 1 when pvt>0 to fix b1284.
9118 $is_one_line_weld = $excess <= $one_line_tol;
9121 # DO-NOT-WELD RULE 1:
9122 # Do not weld something that looks like the start of a two-line
9123 # function call, like this: <<snippets/wn6.in>>
9124 # $trans->add_transformation(
9125 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
9126 # We will look for a semicolon after the closing paren.
9128 # We want to weld something complex, like this though
9129 # my $compass = uc( opposite_direction( line_to_canvas_direction(
9130 # @{ $coords[0] }, @{ $coords[1] } ) ) );
9131 # Otherwise we will get a 'blinker'. For example, the following
9132 # would become a blinker without this rule:
9133 # $Self->_Add( $SortOrderDisplay{ $Field
9134 # ->GenerateFieldForSelectSQL() } );
9135 # But it is okay to weld a two-line statement if it looks like
9136 # it was already welded, meaning that the two opening containers are
9137 # on a different line that the two closing containers. This is
9138 # necessary to prevent blinking of something like this with
9139 # perltidy -wn -pbp (starting indentation two levels deep):
9141 # $top_label->set_text( gettext(
9142 # "Unable to create personal directory - check permissions.") );
9143 if ( $iline_oc == $iline_oo + 1
9144 && $iline_io == $iline_ic
9145 && $token_oo eq '(' )
9148 # Look for following semicolon...
9149 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
9150 my $next_nonblank_type =
9151 defined($Knext_nonblank)
9152 ? $rLL->[$Knext_nonblank]->[_TYPE_]
9154 if ( $next_nonblank_type eq ';' ) {
9156 # Then do not weld if no other containers between inner
9157 # opening and closing.
9158 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
9159 if ( $Knext_seq_item == $Kinner_closing ) {
9160 $do_not_weld_rule = 1;
9164 } ## end starting new weld sequence
9168 # set the 1-line flag if continuing a weld sequence; fixes b1239
9169 $is_one_line_weld = ( $iline_oo == $iline_oc );
9172 # DO-NOT-WELD RULE 2:
9173 # Do not weld an opening paren to an inner one line brace block
9174 # We will just use old line numbers for this test and require
9175 # iterations if necessary for convergence
9177 # For example, otherwise we could cause the opening paren
9178 # in the following example to separate from the caller name
9181 # $_[0]->code_handler
9182 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
9184 # Here is another example where we do not want to weld:
9185 # $wrapped->add_around_modifier(
9186 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
9188 # If the one line sub block gets broken due to length or by the
9189 # user, then we can weld. The result will then be:
9190 # $wrapped->add_around_modifier( sub {
9191 # push @tracelog => 'around 1';
9195 # Updated to fix cases b1082 b1102 b1106 b1115:
9196 # Also, do not weld to an intact inner block if the outer opening token
9197 # is on a different line. For example, this prevents oscillation
9198 # between these two states in case b1106:
9201 # ($_,[$self->$_(@_[1..$#_])])
9205 # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
9208 # The effect of this change on typical code is very minimal. Sometimes
9209 # it may take a second iteration to converge, but this gives protection
9211 if ( !$do_not_weld_rule
9212 && !$is_one_line_weld
9213 && $iline_ic == $iline_io )
9215 $do_not_weld_rule = 2
9216 if ( $token_oo eq '(' || $iline_oo != $iline_io );
9219 # DO-NOT-WELD RULE 2A:
9220 # Do not weld an opening asub brace in -lp mode if -asbl is set. This
9221 # helps avoid instabilities in one-line block formation, and fixes
9222 # b1241. Previously, the '$is_one_line_weld' flag was tested here
9223 # instead of -asbl, and this fixed most cases. But it turns out that
9224 # the real problem was the -asbl flag, and switching to this was
9225 # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
9228 ##&& $is_one_line_weld
9229 && $rOpts_line_up_parentheses
9231 && $ris_asub_block->{$outer_seqno}
9234 $do_not_weld_rule = '2A';
9237 # DO-NOT-WELD RULE 3:
9238 # Do not weld if this makes our line too long.
9239 # Use a tolerance which depends on if the old tokens were welded
9240 # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
9241 if ( !$do_not_weld_rule ) {
9243 # Measure to a little beyond the inner opening token if it is
9244 # followed by a bare word, which may have unusual line break rules.
9246 # NOTE: Originally this was OLD RULE 6: do not weld to a container
9247 # which is followed on the same line by an unknown bareword token.
9248 # This can cause blinkers (cases b626, b611). But OK to weld one
9249 # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
9250 # has been merged into RULE 3 here to also fix cases b1078 b1091.
9252 my $K_for_length = $Kinner_opening;
9253 my $Knext_io = $self->K_next_nonblank($Kinner_opening);
9254 next unless ( defined($Knext_io) ); # shouldn't happen
9255 my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
9257 # Note: may need to eventually also include other types here,
9258 # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
9259 if ( $type_io_next eq 'w' ) {
9260 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
9261 next unless ( defined($Knext_io2) );
9262 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
9263 if ( !$type_ok_after_bareword{$type_io_next2} ) {
9264 $K_for_length = $Knext_io2;
9268 # Use a tolerance for welds over multiple lines to avoid blinkers.
9269 # We can use zero tolerance if it looks like we are working on an
9272 $is_one_line_weld || $is_multiline_weld
9276 # By how many characters does this exceed the text window?
9278 $self->cumulative_length_before_K($K_for_length) -
9279 $starting_lentot + 1 + $tol -
9280 $maximum_text_length;
9282 # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
9283 # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
9284 # Revised patch: New tolerance definition allows going back to '> 0'
9285 # here. This fixes case b1124. See also cases b1087 and b1087a.
9286 if ( $excess > 0 ) { $do_not_weld_rule = 3 }
9290 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
9294 # DO-NOT-WELD RULE 4; implemented for git#10:
9295 # Do not weld an opening -ce brace if the next container is on a single
9296 # line, different from the opening brace. (This is very rare). For
9297 # example, given the following with -ce, we will avoid joining the {
9301 # [ $_, length($_) ]
9304 # because this would produce a terminal one-line block:
9306 # } else { [ $_, length($_) ] }
9308 # which may not be what is desired. But given this input:
9310 # } else { [ $_, length($_) ] }
9312 # then we will do the weld and retain the one-line block
9313 if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
9314 my $block_type = $rblock_type_of_seqno->{$outer_seqno};
9315 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
9316 my $io_line = $inner_opening->[_LINE_INDEX_];
9317 my $ic_line = $inner_closing->[_LINE_INDEX_];
9318 my $oo_line = $outer_opening->[_LINE_INDEX_];
9319 if ( $oo_line < $io_line && $ic_line == $io_line ) {
9320 $do_not_weld_rule = 4;
9325 # DO-NOT-WELD RULE 5: do not include welds excluded by user
9328 && %weld_nested_exclusion_rules
9329 && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
9330 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
9333 $do_not_weld_rule = 5;
9336 # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
9338 # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
9340 if ( !$do_not_weld_rule
9341 && $rOpts_break_at_old_method_breakpoints
9342 && $iline_io > $iline_oo )
9345 foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
9346 my $rK_range = $rlines->[$iline]->{_rK_range};
9347 next unless defined($rK_range);
9348 my ( $Kfirst, $Klast ) = @{$rK_range};
9349 next unless defined($Kfirst);
9350 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
9351 $do_not_weld_rule = 7;
9357 if ($do_not_weld_rule) {
9359 # After neglecting a pair, we start measuring from start of point
9360 # io ... but not if previous type does not like to be separated
9361 # from its container (fixes case b1184)
9362 my $Kprev = $self->K_previous_nonblank($Kinner_opening);
9363 my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
9364 if ( !$has_tight_paren{$type_prev} ) {
9365 my $starting_level = $inner_opening->[_LEVEL_];
9366 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
9368 $self->cumulative_length_before_K($Kinner_opening);
9369 $maximum_text_length =
9370 $maximum_text_length_at_level[$starting_level] -
9371 $starting_ci_level * $rOpts_continuation_indentation;
9375 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
9379 # Normally, a broken pair should not decrease indentation of
9380 # intermediate tokens:
9381 ## if ( $last_pair_broken ) { next }
9382 # However, for long strings of welded tokens, such as '{{{{{{...'
9383 # we will allow broken pairs to also remove indentation.
9384 # This will keep very long strings of opening and closing
9385 # braces from marching off to the right. We will do this if the
9386 # number of tokens in a weld before the broken weld is 4 or more.
9387 # This rule will mainly be needed for test scripts, since typical
9388 # welds have fewer than about 4 welded tokens.
9389 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
9392 # otherwise start new weld ...
9393 elsif ($starting_new_weld) {
9394 $weld_count_this_start++;
9396 $Msg .= "Starting new weld\n";
9401 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9402 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
9404 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9405 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
9408 # ... or extend current weld
9410 $weld_count_this_start++;
9412 $Msg .= "Extending current weld\n";
9415 unshift @{ $welds[-1] }, $inner_seqno;
9416 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9417 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
9419 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9420 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
9423 # After welding, reduce the indentation level if all intermediate tokens
9424 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
9425 if ( $dlevel != 0 ) {
9426 my $Kstart = $Kinner_opening;
9427 my $Kstop = $Kinner_closing;
9428 foreach my $KK ( $Kstart .. $Kstop ) {
9429 $rLL->[$KK]->[_LEVEL_] += $dlevel;
9432 # Copy opening ci level to help break at = for -lp mode (case b1124)
9433 $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
9434 $rLL->[$Kouter_opening]->[_CI_LEVEL_];
9436 # But do not copy the closing ci level ... it can give poor results
9437 ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
9438 ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
9443 } ## end sub weld_nested_containers
9445 sub weld_nested_quotes {
9447 # Called once per file for option '--weld-nested-containers'. This
9448 # does welding on qw quotes.
9452 # See if quotes are excluded from welding
9453 my $rflags = $weld_nested_exclusion_rules{'q'};
9454 return if ( defined($rflags) && defined( $rflags->[1] ) );
9456 my $rK_weld_left = $self->[_rK_weld_left_];
9457 my $rK_weld_right = $self->[_rK_weld_right_];
9459 my $rLL = $self->[_rLL_];
9460 return unless ( defined($rLL) && @{$rLL} );
9463 my $K_opening_container = $self->[_K_opening_container_];
9464 my $K_closing_container = $self->[_K_closing_container_];
9465 my $rlines = $self->[_rlines_];
9467 my $starting_lentot;
9468 my $maximum_text_length;
9470 my $is_single_quote = sub {
9471 my ( $Kbeg, $Kend, $quote_type ) = @_;
9472 foreach my $K ( $Kbeg .. $Kend ) {
9473 my $test_type = $rLL->[$K]->[_TYPE_];
9474 next if ( $test_type eq 'b' );
9475 return if ( $test_type ne $quote_type );
9480 # Length tolerance - same as previously used for sub weld_nested
9482 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
9484 # look for single qw quotes nested in containers
9485 my $KNEXT = $self->[_K_first_seq_item_];
9486 while ( defined($KNEXT) ) {
9488 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9489 my $rtoken_vars = $rLL->[$KK];
9490 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9491 if ( !$outer_seqno ) {
9492 next if ( $KK == 0 ); # first token in file may not be container
9494 # A fault here implies that an error was made in the little loop at
9495 # the bottom of sub 'respace_tokens' which set the values of
9496 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
9497 # loop control lines above.
9498 Fault("sequence = $outer_seqno not defined at K=$KK")
9503 my $token = $rtoken_vars->[_TOKEN_];
9504 if ( $is_opening_token{$token} ) {
9506 # see if the next token is a quote of some type
9509 if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
9510 next unless ( $Kn < $Num );
9512 my $next_token = $rLL->[$Kn]->[_TOKEN_];
9513 my $next_type = $rLL->[$Kn]->[_TYPE_];
9515 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
9516 && $next_token =~ /^q/ );
9518 # The token before the closing container must also be a quote
9519 my $Kouter_closing = $K_closing_container->{$outer_seqno};
9520 my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
9521 next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
9523 # This is an inner opening container
9524 my $Kinner_opening = $Kn;
9526 # Do not weld to single-line quotes. Nothing is gained, and it may
9528 next if ( $Kinner_closing == $Kinner_opening );
9530 # Only weld to quotes delimited with container tokens. This is
9531 # because welding to arbitrary quote delimiters can produce code
9532 # which is less readable than without welding.
9533 my $closing_delimiter =
9534 substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
9536 unless ( $is_closing_token{$closing_delimiter}
9537 || $closing_delimiter eq '>' );
9539 # Now make sure that there is just a single quote in the container
9543 $Kinner_opening + 1,
9544 $Kinner_closing - 1,
9549 # OK: This is a candidate for welding
9550 my $Msg = EMPTY_STRING;
9553 my $Kouter_opening = $K_opening_container->{$outer_seqno};
9554 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
9555 my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
9556 my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
9557 my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
9559 ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
9561 # Fix for case b1189. If quote is marked as type 'Q' then only weld
9562 # if the two closing tokens are on the same input line. Otherwise,
9563 # the closing line will be output earlier in the pipeline than
9564 # other CODE lines and welding will not actually occur. This will
9565 # leave a half-welded structure with potential formatting
9566 # instability. This might be fixed by adding a check for a weld on
9567 # a closing Q token and sending it down the normal channel, but it
9568 # would complicate the code and is potentially risky.
9571 && $next_type eq 'Q'
9572 && $iline_ic != $iline_oc );
9574 # If welded, the line must not exceed allowed line length
9575 ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
9576 = $self->setup_new_weld_measurements( $Kouter_opening,
9578 if ( !$ok_to_weld ) {
9579 if (DEBUG_WELD) { print $msg}
9584 $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
9585 my $excess = $length + $multiline_tol - $maximum_text_length;
9587 my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
9588 if ( $excess >= $excess_max ) {
9593 if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
9595 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
9598 # Check weld exclusion rules for outer container
9599 if ( !$do_not_weld ) {
9600 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
9601 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
9604 "No qw weld due to weld exclusion rules for outer container\n";
9610 # Check the length of the last line (fixes case b1039)
9611 if ( !$do_not_weld ) {
9612 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
9613 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
9615 $self->excess_line_length_for_Krange( $Kfirst_ic,
9618 # Allow extra space for additional welded closing container(s)
9619 # and a space and comma or semicolon.
9620 # NOTE: weld len has not been computed yet. Use 2 spaces
9621 # for now, correct for a single weld. This estimate could
9622 # be made more accurate if necessary.
9624 defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
9625 if ( $excess_ic + $weld_len + 2 > 0 ) {
9628 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
9636 $Msg .= "Not Welding QW\n";
9644 $Msg .= "Welding QW\n";
9648 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9649 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
9651 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9652 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
9654 # Undo one indentation level if an extra level was added to this
9657 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
9659 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
9661 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
9662 $rLL->[$K]->[_LEVEL_] -= 1;
9664 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
9665 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
9668 # undo CI for other welded quotes
9671 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
9672 $rLL->[$K]->[_CI_LEVEL_] = 0;
9676 # Change the level of a closing qw token to be that of the outer
9677 # containing token. This will allow -lp indentation to function
9678 # correctly in the vertical aligner.
9679 # Patch to fix c002: but not if it contains text
9680 if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
9681 $rLL->[$Kinner_closing]->[_LEVEL_] =
9682 $rLL->[$Kouter_closing]->[_LEVEL_];
9687 } ## end sub weld_nested_quotes
9689 sub is_welded_at_seqno {
9691 my ( $self, $seqno ) = @_;
9693 # given a sequence number:
9694 # return true if it is welded either left or right
9695 # return false otherwise
9696 return unless ( $total_weld_count && defined($seqno) );
9697 my $KK_o = $self->[_K_opening_container_]->{$seqno};
9698 return unless defined($KK_o);
9699 return defined( $self->[_rK_weld_left_]->{$KK_o} )
9700 || defined( $self->[_rK_weld_right_]->{$KK_o} );
9701 } ## end sub is_welded_at_seqno
9703 sub mark_short_nested_blocks {
9705 # This routine looks at the entire file and marks any short nested blocks
9706 # which should not be broken. The results are stored in the hash
9707 # $rshort_nested->{$type_sequence}
9708 # which will be true if the container should remain intact.
9710 # For example, consider the following line:
9712 # sub cxt_two { sort { $a <=> $b } test_if_list() }
9714 # The 'sort' block is short and nested within an outer sub block.
9715 # Normally, the existence of the 'sort' block will force the sub block to
9716 # break open, but this is not always desirable. Here we will set a flag for
9717 # the sort block to prevent this. To give the user control, we will
9718 # follow the input file formatting. If either of the blocks is broken in
9719 # the input file then we will allow it to remain broken. Otherwise we will
9720 # set a flag to keep it together in later formatting steps.
9722 # The flag which is set here will be checked in two places:
9723 # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
9726 return if $rOpts->{'indent-only'};
9728 my $rLL = $self->[_rLL_];
9729 return unless ( defined($rLL) && @{$rLL} );
9731 return unless ( $rOpts->{'one-line-block-nesting'} );
9733 my $K_opening_container = $self->[_K_opening_container_];
9734 my $K_closing_container = $self->[_K_closing_container_];
9735 my $rbreak_container = $self->[_rbreak_container_];
9736 my $rshort_nested = $self->[_rshort_nested_];
9737 my $rlines = $self->[_rlines_];
9738 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9740 # Variables needed for estimating line lengths
9741 my $maximum_text_length;
9742 my $starting_lentot;
9745 my $excess_length_to_K = sub {
9748 # Estimate the length from the line start to a given token
9749 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
9750 my $excess_length = $length + $length_tol - $maximum_text_length;
9751 return ($excess_length);
9754 my $is_broken_block = sub {
9756 # a block is broken if the input line numbers of the braces differ
9758 my $K_opening = $K_opening_container->{$seqno};
9759 return unless ( defined($K_opening) );
9760 my $K_closing = $K_closing_container->{$seqno};
9761 return unless ( defined($K_closing) );
9762 return $rbreak_container->{$seqno}
9763 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
9764 $rLL->[$K_opening]->[_LINE_INDEX_];
9767 # loop over all containers
9768 my @open_block_stack;
9770 my $KNEXT = $self->[_K_first_seq_item_];
9771 while ( defined($KNEXT) ) {
9773 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9774 my $rtoken_vars = $rLL->[$KK];
9775 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
9776 if ( !$type_sequence ) {
9777 next if ( $KK == 0 ); # first token in file may not be container
9779 # A fault here implies that an error was made in the little loop at
9780 # the bottom of sub 'respace_tokens' which set the values of
9781 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
9782 # loop control lines above.
9783 Fault("sequence = $type_sequence not defined at K=$KK")
9788 # Patch: do not mark short blocks with welds.
9789 # In some cases blinkers can form (case b690).
9790 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
9794 # We are just looking at code blocks
9795 my $token = $rtoken_vars->[_TOKEN_];
9796 my $type = $rtoken_vars->[_TYPE_];
9797 next unless ( $type eq $token );
9798 next unless ( $rblock_type_of_seqno->{$type_sequence} );
9800 # Keep a stack of all acceptable block braces seen.
9801 # Only consider blocks entirely on one line so dump the stack when line
9803 my $iline_last = $iline;
9804 $iline = $rLL->[$KK]->[_LINE_INDEX_];
9805 if ( $iline != $iline_last ) { @open_block_stack = () }
9807 if ( $token eq '}' ) {
9808 if (@open_block_stack) { pop @open_block_stack }
9810 next unless ( $token eq '{' );
9812 # block must be balanced (bad scripts may be unbalanced)
9813 my $K_opening = $K_opening_container->{$type_sequence};
9814 my $K_closing = $K_closing_container->{$type_sequence};
9815 next unless ( defined($K_opening) && defined($K_closing) );
9817 # require that this block be entirely on one line
9818 next if ( $is_broken_block->($type_sequence) );
9820 # See if this block fits on one line of allowed length (which may
9821 # be different from the input script)
9823 $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
9824 my $level = $rLL->[$KK]->[_LEVEL_];
9825 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
9826 $maximum_text_length =
9827 $maximum_text_length_at_level[$level] -
9828 $ci_level * $rOpts_continuation_indentation;
9830 # Dump the stack if block is too long and skip this block
9831 if ( $excess_length_to_K->($K_closing) > 0 ) {
9832 @open_block_stack = ();
9836 # OK, Block passes tests, remember it
9837 push @open_block_stack, $type_sequence;
9839 # We are only marking nested code blocks,
9840 # so check for a previous block on the stack
9841 next unless ( @open_block_stack > 1 );
9843 # Looks OK, mark this as a short nested block
9844 $rshort_nested->{$type_sequence} = 1;
9848 } ## end sub mark_short_nested_blocks
9850 sub adjust_indentation_levels {
9854 # Called once per file to do special indentation adjustments.
9855 # These routines adjust levels either by changing _CI_LEVEL_ directly or
9856 # by setting modified levels in the array $self->[_radjusted_levels_].
9858 # Initialize the adjusted levels. These will be the levels actually used
9859 # for computing indentation.
9861 # NOTE: This routine is called after the weld routines, which may have
9862 # already adjusted _LEVEL_, so we are making adjustments on top of those
9863 # levels. It would be much nicer to have the weld routines also use this
9864 # adjustment, but that gets complicated when we combine -gnu -wn and have
9865 # some welded quotes.
9866 my $Klimit = $self->[_Klimit_];
9867 my $rLL = $self->[_rLL_];
9868 my $radjusted_levels = $self->[_radjusted_levels_];
9870 return unless ( defined($Klimit) );
9872 foreach my $KK ( 0 .. $Klimit ) {
9873 $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
9876 # First set adjusted levels for any non-indenting braces.
9877 $self->do_non_indenting_braces();
9879 # Adjust breaks and indentation list containers
9880 $self->break_before_list_opening_containers();
9882 # Set adjusted levels for the whitespace cycle option.
9883 $self->whitespace_cycle_adjustment();
9885 $self->braces_left_setup();
9887 # Adjust continuation indentation if -bli is set
9888 $self->bli_adjustment();
9890 $self->extended_ci()
9891 if ($rOpts_extended_continuation_indentation);
9893 # Now clip any adjusted levels to be non-negative
9894 $self->clip_adjusted_levels();
9897 } ## end sub adjust_indentation_levels
9899 sub clip_adjusted_levels {
9901 # Replace any negative adjusted levels with zero.
9902 # Negative levels can occur in files with brace errors.
9904 my $radjusted_levels = $self->[_radjusted_levels_];
9905 return unless defined($radjusted_levels) && @{$radjusted_levels};
9906 foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
9908 } ## end sub clip_adjusted_levels
9910 sub do_non_indenting_braces {
9912 # Called once per file to handle the --non-indenting-braces parameter.
9913 # Remove indentation within marked braces if requested
9916 # Any non-indenting braces have been found by sub find_non_indenting_braces
9917 # and are defined by the following hash:
9918 my $rseqno_non_indenting_brace_by_ix =
9919 $self->[_rseqno_non_indenting_brace_by_ix_];
9920 return unless ( %{$rseqno_non_indenting_brace_by_ix} );
9922 my $rLL = $self->[_rLL_];
9923 my $rlines = $self->[_rlines_];
9924 my $K_opening_container = $self->[_K_opening_container_];
9925 my $K_closing_container = $self->[_K_closing_container_];
9926 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
9927 my $radjusted_levels = $self->[_radjusted_levels_];
9929 # First locate all of the marked blocks
9931 foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
9932 my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
9933 my $KK = $K_opening_container->{$seqno};
9934 my $line_of_tokens = $rlines->[$ix];
9935 my $rK_range = $line_of_tokens->{_rK_range};
9936 my ( $Kfirst, $Klast ) = @{$rK_range};
9937 $rspecial_side_comment_type->{$Klast} = 'NIB';
9938 push @K_stack, [ $KK, 1 ];
9939 my $Kc = $K_closing_container->{$seqno};
9940 push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
9942 return unless (@K_stack);
9943 @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
9945 # Then loop to remove indentation within marked blocks
9948 foreach my $item (@K_stack) {
9949 my ( $KK, $inc ) = @{$item};
9952 foreach ( $KK_last + 1 .. $KK ) {
9953 $radjusted_levels->[$_] -= $ndeep;
9956 # We just subtracted the old $ndeep value, which only applies to a
9957 # '{'. The new $ndeep applies to a '}', so we undo the error.
9958 if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
9965 } ## end sub do_non_indenting_braces
9967 sub whitespace_cycle_adjustment {
9971 # Called once per file to implement the --whitespace-cycle option
9972 my $rLL = $self->[_rLL_];
9973 return unless ( defined($rLL) && @{$rLL} );
9974 my $radjusted_levels = $self->[_radjusted_levels_];
9975 my $maximum_level = $self->[_maximum_level_];
9977 if ( $rOpts_whitespace_cycle
9978 && $rOpts_whitespace_cycle > 0
9979 && $rOpts_whitespace_cycle < $maximum_level )
9982 my $Kmax = @{$rLL} - 1;
9984 my $whitespace_last_level = -1;
9985 my @whitespace_level_stack = ();
9986 my $last_nonblank_type = 'b';
9987 my $last_nonblank_token = EMPTY_STRING;
9988 foreach my $KK ( 0 .. $Kmax ) {
9989 my $level_abs = $radjusted_levels->[$KK];
9990 my $level = $level_abs;
9991 if ( $level_abs < $whitespace_last_level ) {
9992 pop(@whitespace_level_stack);
9994 if ( !@whitespace_level_stack ) {
9995 push @whitespace_level_stack, $level_abs;
9997 elsif ( $level_abs > $whitespace_last_level ) {
9998 $level = $whitespace_level_stack[-1] +
9999 ( $level_abs - $whitespace_last_level );
10002 # 1 Try to break at a block brace
10004 $level > $rOpts_whitespace_cycle
10005 && $last_nonblank_type eq '{'
10006 && $last_nonblank_token eq '{'
10009 # 2 Then either a brace or bracket
10010 || ( $level > $rOpts_whitespace_cycle + 1
10011 && $last_nonblank_token =~ /^[\{\[]$/ )
10013 # 3 Then a paren too
10014 || $level > $rOpts_whitespace_cycle + 2
10019 push @whitespace_level_stack, $level;
10021 $level = $whitespace_level_stack[-1];
10022 $radjusted_levels->[$KK] = $level;
10024 $whitespace_last_level = $level_abs;
10025 my $type = $rLL->[$KK]->[_TYPE_];
10026 my $token = $rLL->[$KK]->[_TOKEN_];
10027 if ( $type ne 'b' ) {
10028 $last_nonblank_type = $type;
10029 $last_nonblank_token = $token;
10034 } ## end sub whitespace_cycle_adjustment
10036 use constant DEBUG_BBX => 0;
10038 sub break_before_list_opening_containers {
10042 # This routine is called once per batch to implement parameters
10043 # --break-before-hash-brace=n and similar -bbx=n flags
10044 # and their associated indentation flags:
10045 # --break-before-hash-brace-and-indent and similar -bbxi=n
10047 # Nothing to do if none of the -bbx=n parameters has been set
10048 return unless %break_before_container_types;
10050 my $rLL = $self->[_rLL_];
10051 return unless ( defined($rLL) && @{$rLL} );
10053 # Loop over all opening container tokens
10054 my $K_opening_container = $self->[_K_opening_container_];
10055 my $K_closing_container = $self->[_K_closing_container_];
10056 my $ris_broken_container = $self->[_ris_broken_container_];
10057 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
10058 my $rhas_list = $self->[_rhas_list_];
10059 my $rhas_broken_list = $self->[_rhas_broken_list_];
10060 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
10061 my $radjusted_levels = $self->[_radjusted_levels_];
10062 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
10063 my $rlines = $self->[_rlines_];
10064 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
10065 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
10066 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
10067 my $rK_weld_right = $self->[_rK_weld_right_];
10068 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10071 max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
10072 if ($rOpts_ignore_old_breakpoints) {
10074 # Patch suggested by b1231; the old tol was excessive.
10075 ## $length_tol += $rOpts_maximum_line_length;
10079 my $rbreak_before_container_by_seqno = {};
10080 my $rwant_reduced_ci = {};
10081 foreach my $seqno ( keys %{$K_opening_container} ) {
10083 #----------------------------------------------------------------
10084 # Part 1: Examine any -bbx=n flags
10085 #----------------------------------------------------------------
10087 next if ( $rblock_type_of_seqno->{$seqno} );
10088 my $KK = $K_opening_container->{$seqno};
10090 # This must be a list or contain a list.
10091 # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
10092 # Note2: 'has_list' holds the depth to the sub-list. We will require
10093 # a depth of just 1
10094 my $is_list = $self->is_list_by_seqno($seqno);
10095 my $has_list = $rhas_list->{$seqno};
10097 # Fix for b1173: if welded opening container, use flag of innermost
10098 # seqno. Otherwise, the restriction $has_list==1 prevents triple and
10099 # higher welds from following the -BBX parameters.
10100 if ($total_weld_count) {
10101 my $KK_test = $rK_weld_right->{$KK};
10102 if ( defined($KK_test) ) {
10103 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
10104 $is_list ||= $self->is_list_by_seqno($seqno_inner);
10105 $has_list = $rhas_list->{$seqno_inner};
10109 next unless ( $is_list || $has_list && $has_list == 1 );
10111 my $has_broken_list = $rhas_broken_list->{$seqno};
10112 my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
10114 # Only for types of container tokens with a non-default break option
10115 my $token = $rLL->[$KK]->[_TOKEN_];
10116 my $break_option = $break_before_container_types{$token};
10117 next unless ($break_option);
10119 # Do not use -bbx under stress for stability ... fixes b1300
10120 my $level = $rLL->[$KK]->[_LEVEL_];
10121 if ( $level >= $stress_level_beta ) {
10124 "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
10128 # Require previous nonblank to be '=' or '=>'
10129 my $Kprev = $KK - 1;
10130 next if ( $Kprev < 0 );
10131 my $prev_type = $rLL->[$Kprev]->[_TYPE_];
10132 if ( $prev_type eq 'b' ) {
10134 next if ( $Kprev < 0 );
10135 $prev_type = $rLL->[$Kprev]->[_TYPE_];
10137 next unless ( $is_equal_or_fat_comma{$prev_type} );
10139 my $ci = $rLL->[$KK]->[_CI_LEVEL_];
10141 #--------------------------------------------
10142 # New coding for option 2 (break if complex).
10143 #--------------------------------------------
10144 # This new coding uses clues which are invariant under formatting to
10145 # decide if a list is complex. For now it is only applied when -lp
10146 # and -vmll are used, but eventually it may become the standard method.
10147 # Fixes b1274, b1275, and others, including b1099.
10148 if ( $break_option == 2 ) {
10150 if ( $rOpts_line_up_parentheses
10151 || $rOpts_variable_maximum_line_length )
10154 # Start with the basic definition of a complex list...
10155 my $is_complex = $is_list && $has_list;
10157 # and it is also complex if the parent is a list
10158 if ( !$is_complex ) {
10159 my $parent = $rparent_of_seqno->{$seqno};
10160 if ( $self->is_list_by_seqno($parent) ) {
10165 # finally, we will call it complex if there are inner opening
10166 # and closing container tokens, not parens, within the outer
10167 # container tokens.
10168 if ( !$is_complex ) {
10169 my $Kp = $self->K_next_nonblank($KK);
10170 my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
10171 if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
10173 my $Kc = $K_closing_container->{$seqno};
10174 my $Km = $self->K_previous_nonblank($Kc);
10176 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10178 # ignore any optional ending comma
10179 if ( $token_m eq ',' ) {
10180 $Km = $self->K_previous_nonblank($Km);
10182 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10186 $is_closing_token{$token_m} && $token_m ne ')';
10190 # Convert to option 3 (always break) if complex
10191 next unless ($is_complex);
10196 # Fix for b1231: the has_list_with_lec does not cover all cases.
10197 # A broken container containing a list and with line-ending commas
10198 # will stay broken, so can be treated as if it had a list with lec.
10199 $has_list_with_lec ||=
10201 && $ris_broken_container->{$seqno}
10202 && $rlec_count_by_seqno->{$seqno};
10206 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
10208 # -bbx=1 = stable, try to follow input
10209 if ( $break_option == 1 ) {
10211 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
10212 my $rK_range = $rlines->[$iline]->{_rK_range};
10213 my ( $Kfirst, $Klast ) = @{$rK_range};
10214 next unless ( $KK == $Kfirst );
10217 # -bbx=2 => apply this style only for a 'complex' list
10218 elsif ( $break_option == 2 ) {
10220 # break if this list contains a broken list with line-ending comma
10222 my $Msg = EMPTY_STRING;
10223 if ($has_list_with_lec) {
10225 DEBUG_BBX && do { $Msg = "has list with lec;" };
10228 if ( !$ok_to_break ) {
10230 # Turn off -xci if -bbx=2 and this container has a sublist but
10231 # not a broken sublist. This avoids creating blinkers. The
10232 # problem is that -xci can cause one-line lists to break open,
10233 # and thereby creating formatting instability.
10234 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
10235 # b1045 b1046 b1047 b1051 b1052 b1061.
10236 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
10238 my $parent = $rparent_of_seqno->{$seqno};
10239 if ( $self->is_list_by_seqno($parent) ) {
10240 DEBUG_BBX && do { $Msg = "parent is list" };
10245 if ( !$ok_to_break ) {
10247 && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
10252 && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
10254 # Patch: turn off -xci if -bbx=2 and -lp
10255 # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
10256 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
10259 # -bbx=3 = always break
10260 elsif ( $break_option == 3 ) {
10265 # Shouldn't happen! Bad flag, but make behavior same as 3
10270 # Set a flag for actual implementation later in
10271 # sub insert_breaks_before_list_opening_containers
10272 $rbreak_before_container_by_seqno->{$seqno} = 1;
10274 && print STDOUT "BBX: ok to break at seqno=$seqno\n";
10276 # -bbxi=0: Nothing more to do if the ci value remains unchanged
10277 my $ci_flag = $container_indentation_options{$token};
10278 next unless ($ci_flag);
10280 # -bbxi=1: This option removes ci and is handled in
10281 # later sub final_indentation_adjustment
10282 if ( $ci_flag == 1 ) {
10283 $rwant_reduced_ci->{$seqno} = 1;
10287 # -bbxi=2: This option changes the level ...
10288 # This option can conflict with -xci in some cases. We can turn off
10289 # -xci for this container to avoid blinking. For now, only do this if
10290 # -vmll is set. ( fixes b1335, b1336 )
10291 if ($rOpts_variable_maximum_line_length) {
10292 $rno_xci_by_seqno->{$seqno} = 1;
10295 #----------------------------------------------------------------
10296 # Part 2: Perform tests before committing to changing ci and level
10297 #----------------------------------------------------------------
10299 # Before changing the ci level of the opening container, we need
10300 # to be sure that the container will be broken in the later stages of
10301 # formatting. We have to do this because we are working early in the
10302 # formatting pipeline. A problem can occur if we change the ci or
10303 # level of the opening token but do not actually break the container
10304 # open as expected. In most cases it wouldn't make any difference if
10305 # we changed ci or not, but there are some edge cases where this
10306 # can cause blinking states, so we need to try to only change ci if
10307 # the container will really be broken.
10309 # Only consider containers already broken
10310 next if ( !$ris_broken_container->{$seqno} );
10312 # Patch to fix issue b1305: the combination of -naws and ci>i appears
10313 # to cause an instability. It should almost never occur in practice.
10315 if (!$rOpts_add_whitespace
10316 && $rOpts_continuation_indentation > $rOpts_indent_columns );
10318 # Always ok to change ci for permanently broken containers
10319 if ( $ris_permanently_broken->{$seqno} ) {
10323 # Always OK if this list contains a broken sub-container with
10324 # a non-terminal line-ending comma
10325 if ($has_list_with_lec) { goto OK }
10327 # From here on we are considering a single container...
10329 # A single container must have at least 1 line-ending comma:
10330 next unless ( $rlec_count_by_seqno->{$seqno} );
10332 # Since it has a line-ending comma, it will stay broken if the -boc
10334 if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
10336 # OK if the container contains multiple fat commas
10337 # Better: multiple lines with fat commas
10338 if ( !$rOpts_ignore_old_breakpoints ) {
10339 my $rtype_count = $rtype_count_by_seqno->{$seqno};
10340 next unless ($rtype_count);
10341 my $fat_comma_count = $rtype_count->{'=>'};
10343 && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
10344 if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
10347 # The last check we can make is to see if this container could fit on a
10348 # single line. Use the least possible indentation estimate, ci=0,
10349 # so we are not subtracting $ci * $rOpts_continuation_indentation from
10350 # tabulated $maximum_text_length value.
10351 my $maximum_text_length = $maximum_text_length_at_level[$level];
10352 my $K_closing = $K_closing_container->{$seqno};
10353 my $length = $self->cumulative_length_before_K($K_closing) -
10354 $self->cumulative_length_before_K($KK);
10355 my $excess_length = $length - $maximum_text_length;
10358 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
10360 # OK if the net container definitely breaks on length
10361 if ( $excess_length > $length_tol ) {
10363 && print STDOUT "BBX: excess_length=$excess_length\n";
10367 # Otherwise skip it
10370 #################################################################
10371 # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
10372 #################################################################
10376 DEBUG_BBX && print STDOUT "BBX: OK to break\n";
10384 # n=0 default indentation (usually one ci)
10385 # n=1 outdent one ci
10386 # n=2 indent one level (minus one ci)
10387 # n=3 indent one extra ci [This may be dropped]
10389 # NOTE: We are adjusting indentation of the opening container. The
10390 # closing container will normally follow the indentation of the opening
10391 # container automatically, so this is not currently done.
10394 # option 1: outdent
10395 if ( $ci_flag == 1 ) {
10399 # option 2: indent one level
10400 elsif ( $ci_flag == 2 ) {
10402 $radjusted_levels->[$KK] += 1;
10407 # Shouldn't happen - leave ci unchanged
10410 $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
10413 $self->[_rbreak_before_container_by_seqno_] =
10414 $rbreak_before_container_by_seqno;
10415 $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
10417 } ## end sub break_before_list_opening_containers
10419 use constant DEBUG_XCI => 0;
10423 # This routine implements the -xci (--extended-continuation-indentation)
10424 # flag. We add CI to interior tokens of a container which itself has CI but
10425 # only if a token does not already have CI.
10427 # To do this, we will locate opening tokens which themselves have
10428 # continuation indentation (CI). We track them with their sequence
10429 # numbers. These sequence numbers are called 'controlling sequence
10430 # numbers'. They apply continuation indentation to the tokens that they
10431 # contain. These inner tokens remember their controlling sequence numbers.
10432 # Later, when these inner tokens are output, they have to see if the output
10433 # lines with their controlling tokens were output with CI or not. If not,
10434 # then they must remove their CI too.
10436 # The controlling CI concept works hierarchically. But CI itself is not
10437 # hierarchical; it is either on or off. There are some rare instances where
10438 # it would be best to have hierarchical CI too, but not enough to be worth
10439 # the programming effort.
10441 # The operations to remove unwanted CI are done in sub 'undo_ci'.
10445 my $rLL = $self->[_rLL_];
10446 return unless ( defined($rLL) && @{$rLL} );
10448 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
10449 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
10450 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
10451 my $rlines = $self->[_rlines_];
10452 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
10453 my $ris_bli_container = $self->[_ris_bli_container_];
10454 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10456 my %available_space;
10458 # Loop over all opening container tokens
10459 my $K_opening_container = $self->[_K_opening_container_];
10460 my $K_closing_container = $self->[_K_closing_container_];
10461 my $ris_broken_container = $self->[_ris_broken_container_];
10465 my $KNEXT = $self->[_K_first_seq_item_];
10467 # The following variable can be used to allow a little extra space to
10468 # avoid blinkers. A value $len_tol = 20 fixed the following
10469 # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
10470 # It turned out that the real problem was mis-parsing a list brace as
10471 # a code block in a 'use' statement when the line length was extremely
10472 # small. A value of 0 works now, but a slightly larger value can
10473 # be used to minimize the chance of a blinker.
10476 while ( defined($KNEXT) ) {
10478 # Fix all tokens up to the next sequence item if we are changing CI
10481 my $is_list = $ris_list_by_seqno->{$seqno_top};
10482 my $space = $available_space{$seqno_top};
10483 my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
10485 foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
10487 # But do not include tokens which might exceed the line length
10488 # and are not in a list.
10489 # ... This fixes case b1031
10490 my $length_before = $length;
10491 $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
10493 !$rLL->[$Kt]->[_CI_LEVEL_]
10495 || $length - $length_before < $space
10496 || $rLL->[$Kt]->[_TYPE_] eq '#' )
10499 $rLL->[$Kt]->[_CI_LEVEL_] = 1;
10500 $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
10504 $ris_seqno_controlling_ci->{$seqno_top} += $count;
10509 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
10511 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10512 my $K_opening = $K_opening_container->{$seqno};
10514 # see if we have reached the end of the current controlling container
10515 if ( $seqno_top && $seqno == $seqno_top ) {
10516 $seqno_top = pop @seqno_stack;
10519 # Patch to fix some block types...
10520 # Certain block types arrive from the tokenizer without CI but should
10521 # have it for this option. These include anonymous subs and
10522 # do sort map grep eval
10523 my $block_type = $rblock_type_of_seqno->{$seqno};
10524 if ( $block_type && $is_block_with_ci{$block_type} ) {
10525 $rLL->[$KK]->[_CI_LEVEL_] = 1;
10527 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10528 $ris_seqno_controlling_ci->{$seqno_top}++;
10532 # If this does not have ci, update ci if necessary and continue looking
10533 if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
10535 $rLL->[$KK]->[_CI_LEVEL_] = 1;
10536 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10537 $ris_seqno_controlling_ci->{$seqno_top}++;
10542 # Skip if requested by -bbx to avoid blinkers
10543 if ( $rno_xci_by_seqno->{$seqno} ) {
10547 # Skip if this is a -bli container (this fixes case b1065) Note: case
10548 # b1065 is also fixed by the update for b1055, so this update is not
10549 # essential now. But there does not seem to be a good reason to add
10550 # xci and bli together, so the update is retained.
10551 if ( $ris_bli_container->{$seqno} ) {
10555 # We are looking for opening container tokens with ci
10556 next unless ( defined($K_opening) && $KK == $K_opening );
10558 # Make sure there is a corresponding closing container
10559 # (could be missing if the script has a brace error)
10560 my $K_closing = $K_closing_container->{$seqno};
10561 next unless defined($K_closing);
10563 # Require different input lines. This will filter out a large number
10564 # of small hash braces and array brackets. If we accidentally filter
10565 # out an important container, it will get fixed on the next pass.
10567 $rLL->[$K_opening]->[_LINE_INDEX_] ==
10568 $rLL->[$K_closing]->[_LINE_INDEX_]
10569 && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
10570 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
10571 $rOpts_maximum_line_length )
10575 && print "XCI: Skipping seqno=$seqno, require different lines\n";
10579 # Do not apply -xci if adding extra ci will put the container contents
10580 # beyond the line length limit (fixes cases b899 b935)
10581 my $level = $rLL->[$K_opening]->[_LEVEL_];
10582 my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
10583 my $maximum_text_length =
10584 $maximum_text_length_at_level[$level] -
10585 $ci_level * $rOpts_continuation_indentation;
10587 # Fix for b1197 b1198 b1199 b1200 b1201 b1202
10588 # Do not apply -xci if we are running out of space
10589 if ( $level >= $stress_level_beta ) {
10592 "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
10596 # remember how much space is available for patch b1031 above
10598 $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
10600 if ( $space < 0 ) {
10601 DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
10604 DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
10606 $available_space{$seqno} = $space;
10608 # This becomes the next controlling container
10609 push @seqno_stack, $seqno_top if ($seqno_top);
10610 $seqno_top = $seqno;
10613 } ## end sub extended_ci
10615 sub braces_left_setup {
10617 # Called once per file to mark all -bl, -sbl, and -asbl containers
10620 my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
10621 my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
10622 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
10623 return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
10625 my $rLL = $self->[_rLL_];
10626 return unless ( defined($rLL) && @{$rLL} );
10628 # We will turn on this hash for braces controlled by these flags:
10629 my $rbrace_left = $self->[_rbrace_left_];
10631 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10632 my $ris_asub_block = $self->[_ris_asub_block_];
10633 my $ris_sub_block = $self->[_ris_sub_block_];
10634 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10636 my $block_type = $rblock_type_of_seqno->{$seqno};
10638 # use -asbl flag for an anonymous sub block
10639 if ( $ris_asub_block->{$seqno} ) {
10641 $rbrace_left->{$seqno} = 1;
10645 # use -sbl flag for a named sub
10646 elsif ( $ris_sub_block->{$seqno} ) {
10648 $rbrace_left->{$seqno} = 1;
10652 # use -bl flag if not a sub block of any type
10655 && $block_type =~ /$bl_pattern/
10656 && $block_type !~ /$bl_exclusion_pattern/ )
10658 $rbrace_left->{$seqno} = 1;
10663 } ## end sub braces_left_setup
10665 sub bli_adjustment {
10667 # Called once per file to implement the --brace-left-and-indent option.
10668 # If -bli is set, adds one continuation indentation for certain braces
10670 return unless ( $rOpts->{'brace-left-and-indent'} );
10671 my $rLL = $self->[_rLL_];
10672 return unless ( defined($rLL) && @{$rLL} );
10674 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10675 my $ris_bli_container = $self->[_ris_bli_container_];
10676 my $rbrace_left = $self->[_rbrace_left_];
10677 my $K_opening_container = $self->[_K_opening_container_];
10678 my $K_closing_container = $self->[_K_closing_container_];
10680 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10681 my $block_type = $rblock_type_of_seqno->{$seqno};
10683 && $block_type =~ /$bli_pattern/
10684 && $block_type !~ /$bli_exclusion_pattern/ )
10686 $ris_bli_container->{$seqno} = 1;
10687 $rbrace_left->{$seqno} = 1;
10688 my $Ko = $K_opening_container->{$seqno};
10689 my $Kc = $K_closing_container->{$seqno};
10690 if ( defined($Ko) && defined($Kc) ) {
10691 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
10696 } ## end sub bli_adjustment
10698 sub find_multiline_qw {
10702 # Multiline qw quotes are not sequenced items like containers { [ (
10703 # but behave in some respects in a similar way. So this routine finds them
10704 # and creates a separate sequence number system for later use.
10706 # This is straightforward because they always begin at the end of one line
10707 # and and at the beginning of a later line. This is true no matter how we
10708 # finally make our line breaks, so we can find them before deciding on new
10711 my $rstarting_multiline_qw_seqno_by_K = {};
10712 my $rending_multiline_qw_seqno_by_K = {};
10713 my $rKrange_multiline_qw_by_seqno = {};
10714 my $rmultiline_qw_has_extra_level = {};
10716 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
10718 my $rlines = $self->[_rlines_];
10719 my $rLL = $self->[_rLL_];
10721 my $num_qw_seqno = 0;
10722 my $K_start_multiline_qw;
10724 foreach my $line_of_tokens ( @{$rlines} ) {
10726 my $line_type = $line_of_tokens->{_line_type};
10727 next unless ( $line_type eq 'CODE' );
10728 my $rK_range = $line_of_tokens->{_rK_range};
10729 my ( $Kfirst, $Klast ) = @{$rK_range};
10730 next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
10731 if ( defined($K_start_multiline_qw) ) {
10732 my $type = $rLL->[$Kfirst]->[_TYPE_];
10735 if ( $type ne 'q' ) {
10736 DEVEL_MODE && print STDERR <<EOM;
10737 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
10739 $K_start_multiline_qw = undef;
10742 my $Kprev = $self->K_previous_nonblank($Kfirst);
10743 my $Knext = $self->K_next_nonblank($Kfirst);
10744 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10745 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10746 if ( $type_m eq 'q' && $type_p ne 'q' ) {
10747 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
10748 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
10749 [ $K_start_multiline_qw, $Kfirst ];
10750 $K_start_multiline_qw = undef;
10754 if ( !defined($K_start_multiline_qw)
10755 && $rLL->[$Klast]->[_TYPE_] eq 'q' )
10757 my $Kprev = $self->K_previous_nonblank($Klast);
10758 my $Knext = $self->K_next_nonblank($Klast);
10759 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10760 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10761 if ( $type_m ne 'q' && $type_p eq 'q' ) {
10763 $qw_seqno = 'q' . $num_qw_seqno;
10764 $K_start_multiline_qw = $Klast;
10765 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
10770 # Give multiline qw lists extra indentation instead of CI. This option
10771 # works well but is currently only activated when the -xci flag is set.
10772 # The reason is to avoid unexpected changes in formatting.
10773 if ($rOpts_extended_continuation_indentation) {
10774 while ( my ( $qw_seqno_x, $rKrange ) =
10775 each %{$rKrange_multiline_qw_by_seqno} )
10777 my ( $Kbeg, $Kend ) = @{$rKrange};
10779 # require isolated closing token
10780 my $token_end = $rLL->[$Kend]->[_TOKEN_];
10782 unless ( length($token_end) == 1
10783 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
10785 # require isolated opening token
10786 my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
10788 # allow space(s) after the qw
10789 if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
10791 $token_beg =~ s/\s+//;
10794 next unless ( length($token_beg) == 3 );
10796 foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
10797 $rLL->[$KK]->[_LEVEL_]++;
10798 $rLL->[$KK]->[_CI_LEVEL_] = 0;
10801 # set flag for -wn option, which will remove the level
10802 $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
10806 # For the -lp option we need to mark all parent containers of
10808 if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
10810 while ( my ( $qw_seqno_x, $rKrange ) =
10811 each %{$rKrange_multiline_qw_by_seqno} )
10813 my ( $Kbeg, $Kend ) = @{$rKrange};
10814 my $parent_seqno = $self->parent_seqno_by_K($Kend);
10815 next unless ($parent_seqno);
10817 # If the parent container exactly surrounds this qw, then -lp
10818 # formatting seems to work so we will not mark it.
10819 my $is_tightly_contained;
10820 my $Kn = $self->K_next_nonblank($Kend);
10821 my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
10822 if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
10824 my $Kp = $self->K_previous_nonblank($Kbeg);
10826 defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
10827 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
10828 $is_tightly_contained = 1;
10832 $ris_excluded_lp_container->{$parent_seqno} = 1
10833 unless ($is_tightly_contained);
10835 # continue up the tree marking parent containers
10837 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
10839 unless ( defined($parent_seqno)
10840 && $parent_seqno ne SEQ_ROOT );
10841 $ris_excluded_lp_container->{$parent_seqno} = 1;
10846 $self->[_rstarting_multiline_qw_seqno_by_K_] =
10847 $rstarting_multiline_qw_seqno_by_K;
10848 $self->[_rending_multiline_qw_seqno_by_K_] =
10849 $rending_multiline_qw_seqno_by_K;
10850 $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
10851 $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
10854 } ## end sub find_multiline_qw
10856 use constant DEBUG_COLLAPSED_LENGTHS => 0;
10858 # Minimum space reserved for contents of a code block. A value of 40 has given
10859 # reasonable results. With a large line length, say -l=120, this will not
10860 # normally be noticeable but it will prevent making a mess in some edge cases.
10861 use constant MIN_BLOCK_LEN => 40;
10863 my %is_handle_type;
10866 my @q = qw( w C U G i k => );
10867 @is_handle_type{@q} = (1) x scalar(@q);
10871 _max_prong_len_ => $i++,
10872 _handle_len_ => $i++,
10877 _interrupted_list_rule_ => $i++,
10881 sub collapsed_lengths {
10885 #----------------------------------------------------------------
10886 # Define the collapsed lengths of containers for -xlp indentation
10887 #----------------------------------------------------------------
10889 # We need an estimate of the minimum required line length starting at any
10890 # opening container for the -xlp style. This is needed to avoid using too
10891 # much indentation space for lower level containers and thereby running
10892 # out of space for outer container tokens due to the maximum line length
10895 # The basic idea is that at each node in the tree we imagine that we have a
10896 # fork with a handle and collapsible prongs:
10900 # ------------|-------
10901 # handle |------------
10905 # Each prong has a minimum collapsed length. The collapsed length at a node
10906 # is the maximum of these minimum lengths, plus the handle length. Each of
10907 # the prongs may itself be a tree node.
10909 # This is just a rough calculation to get an approximate starting point for
10910 # indentation. Later routines will be more precise. It is important that
10911 # these estimates be independent of the line breaks of the input stream in
10912 # order to avoid instabilities.
10914 my $rLL = $self->[_rLL_];
10915 my $Klimit = $self->[_Klimit_];
10916 my $rlines = $self->[_rlines_];
10917 my $K_opening_container = $self->[_K_opening_container_];
10918 my $K_closing_container = $self->[_K_closing_container_];
10919 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10920 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
10921 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
10922 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
10923 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
10924 my $rhas_broken_list = $self->[_rhas_broken_list_];
10925 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
10927 my $K_start_multiline_qw;
10928 my $level_start_multiline_qw = 0;
10929 my $max_prong_len = 0;
10930 my $handle_len_x = 0;
10933 my $last_nonblank_type = 'b';
10935 [ $max_prong_len, $handle_len_x, SEQ_ROOT, undef, undef, undef, undef ];
10938 foreach my $line_of_tokens ( @{$rlines} ) {
10940 my $line_type = $line_of_tokens->{_line_type};
10941 next if ( $line_type ne 'CODE' );
10942 my $CODE_type = $line_of_tokens->{_code_type};
10944 # Always skip blank lines
10945 next if ( $CODE_type eq 'BL' );
10947 # Note on other line types:
10948 # 'FS' (Format Skipping) lines may contain opening/closing tokens so
10949 # we have to process them to keep the stack correctly sequenced.
10950 # 'VB' (Verbatim) lines could be skipped, but testing shows that
10951 # results look better if we include their lengths.
10953 # Also note that we could exclude -xlp formatting of containers with
10954 # 'FS' and 'VB' lines, but in testing that was not really beneficial.
10956 # So we process tokens in 'FS' and 'VB' lines like all the rest...
10958 my $rK_range = $line_of_tokens->{_rK_range};
10959 my ( $K_first, $K_last ) = @{$rK_range};
10960 next unless ( defined($K_first) && defined($K_last) );
10962 my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
10964 # Always ignore block comments
10965 next if ( $has_comment && $K_first == $K_last );
10967 # Handle an intermediate line of a multiline qw quote. These may
10968 # require including some -ci or -i spaces. See cases c098/x063.
10969 # Updated to check all lines (not just $K_first==$K_last) to fix b1316
10970 my $K_begin_loop = $K_first;
10971 if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
10974 my $level = $rLL->[$KK]->[_LEVEL_];
10975 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
10977 # remember the level of the start
10978 if ( !defined($K_start_multiline_qw) ) {
10979 $K_start_multiline_qw = $K_first;
10980 $level_start_multiline_qw = $level;
10982 $self->[_rstarting_multiline_qw_seqno_by_K_]
10983 ->{$K_start_multiline_qw};
10984 if ( !$seqno_qw ) {
10985 my $Kp = $self->K_previous_nonblank($K_first);
10986 if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
10988 $K_start_multiline_qw = $Kp;
10989 $level_start_multiline_qw =
10990 $rLL->[$K_start_multiline_qw]->[_LEVEL_];
10994 # Fix for b1319, b1320
10995 goto NOT_MULTILINE_QW;
11000 $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
11001 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11003 # We may have to add the spaces of one level or ci level ... it
11004 # depends depends on the -xci flag, the -wn flag, and if the qw
11005 # uses a container token as the quote delimiter.
11007 # First rule: add ci if there is a $ci_level
11009 $len += $rOpts_continuation_indentation;
11012 # Second rule: otherwise, look for an extra indentation level
11013 # from the start and add one indentation level if found.
11014 elsif ( $level > $level_start_multiline_qw ) {
11015 $len += $rOpts_indent_columns;
11018 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11020 $last_nonblank_type = 'q';
11022 $K_begin_loop = $K_first + 1;
11024 # We can skip to the next line if more tokens
11025 next if ( $K_begin_loop > $K_last );
11030 $K_start_multiline_qw = undef;
11032 # Find the terminal token, before any side comment
11033 my $K_terminal = $K_last;
11034 if ($has_comment) {
11037 if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
11038 && $K_terminal > $K_first );
11041 # Use length to terminal comma if interrupted list rule applies
11042 if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
11043 my $K_c = $stack[-1]->[_K_c_];
11046 && $rLL->[$K_terminal]->[_TYPE_] eq ','
11048 # Ignore if terminal comma, causes instability (b1297, b1330)
11050 $K_c - $K_terminal > 2
11051 || ( $K_c - $K_terminal == 2
11052 && $rLL->[ $K_terminal + 1 ]->[_TYPE_] ne 'b' )
11056 my $Kend = $K_terminal;
11058 # This caused an instability in b1311 by making the result
11059 # dependent on input. It is not really necessary because the
11060 # comment length is added at the end of the loop.
11061 ##if ( $has_comment
11062 ## && !$rOpts_ignore_side_comment_lengths )
11064 ## $Kend = $K_last;
11067 # changed from $len to my $leng to fix b1302 b1306 b1317 b1321
11068 my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
11069 $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
11071 # Fix for b1331: at a broken => item, include the length of
11072 # the previous half of the item plus one for the missing space
11073 if ( $last_nonblank_type eq '=>' ) {
11077 if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
11081 # Loop over tokens on this line ...
11082 foreach my $KK ( $K_begin_loop .. $K_terminal ) {
11084 my $type = $rLL->[$KK]->[_TYPE_];
11085 next if ( $type eq 'b' );
11087 #------------------------
11088 # Handle sequenced tokens
11089 #------------------------
11090 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
11093 my $token = $rLL->[$KK]->[_TOKEN_];
11095 #----------------------------
11096 # Entering a new container...
11097 #----------------------------
11098 if ( $is_opening_token{$token}
11099 && defined( $K_closing_container->{$seqno} ) )
11102 # save current prong length
11103 $stack[-1]->[_max_prong_len_] = $max_prong_len;
11104 $max_prong_len = 0;
11106 # Start new prong one level deeper
11107 my $handle_len = 0;
11108 if ( $rblock_type_of_seqno->{$seqno} ) {
11110 # code blocks do not use -lp indentation, but behave as
11111 # if they had a handle of one indentation length
11112 $handle_len = $rOpts_indent_columns;
11115 elsif ( $is_handle_type{$last_nonblank_type} ) {
11116 $handle_len = $len;
11118 if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
11121 # Set a flag if the 'Interrupted List Rule' will be applied
11122 # (see sub copy_old_breakpoints).
11123 # - Added check on has_broken_list to fix issue b1298
11125 my $interrupted_list_rule =
11126 $ris_permanently_broken->{$seqno}
11127 && $ris_list_by_seqno->{$seqno}
11128 && !$rhas_broken_list->{$seqno}
11129 && !$rOpts_ignore_old_breakpoints;
11131 # NOTES: Since we are looking at old line numbers we have
11132 # to be very careful not to introduce an instability.
11134 # This following causes instability (b1288-b1296):
11135 # $interrupted_list_rule ||=
11136 # $rOpts_break_at_old_comma_breakpoints;
11138 # - We could turn off the interrupted list rule if there is
11139 # a broken sublist, to follow 'Compound List Rule 1'.
11140 # - We could use the _rhas_broken_list_ flag for this.
11141 # - But it seems safer not to do this, to avoid
11142 # instability, since the broken sublist could be
11143 # temporary. It seems better to let the formatting
11144 # stabilize by itself after one or two iterations.
11145 # - So, not doing this for now
11147 # Turn off the interrupted list rule if -vmll is set and a
11148 # list has '=>' characters. This avoids instabilities due
11149 # to dependence on old line breaks; issue b1325.
11150 if ( $interrupted_list_rule
11151 && $rOpts_variable_maximum_line_length )
11153 my $rtype_count = $rtype_count_by_seqno->{$seqno};
11154 if ( $rtype_count && $rtype_count->{'=>'} ) {
11155 $interrupted_list_rule = 0;
11159 # Include length to a comma ending this line
11160 if ( $interrupted_list_rule
11161 && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
11163 my $Kend = $K_terminal;
11165 # fix for b1332: side comments handled at end of loop
11166 ##if ( $Kend < $K_last
11167 ## && !$rOpts_ignore_side_comment_lengths )
11169 ## $Kend = $K_last;
11172 # Measure from the next blank if any (fixes b1301)
11174 if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
11180 my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
11181 $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
11182 if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
11185 my $K_c = $K_closing_container->{$seqno};
11189 $max_prong_len, $handle_len,
11192 $interrupted_list_rule
11196 #--------------------
11197 # Exiting a container
11198 #--------------------
11199 elsif ( $is_closing_token{$token} ) {
11202 # The current prong ends - get its handle
11203 my $item = pop @stack;
11204 my $handle_len = $item->[_handle_len_];
11205 my $seqno_o = $item->[_seqno_o_];
11206 my $iline_o = $item->[_iline_o_];
11207 my $K_o = $item->[_K_o_];
11208 my $K_c_expect = $item->[_K_c_];
11209 my $collapsed_len = $max_prong_len;
11211 if ( $seqno_o ne $seqno ) {
11213 # This can happen if input file has brace errors.
11214 # Otherwise it shouldn't happen. Not fatal but -lp
11215 # formatting could get messed up.
11216 if ( DEVEL_MODE && !get_saw_brace_error() ) {
11218 sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
11223 #------------------------------------------
11224 # Rules to avoid scrunching code blocks ...
11225 #------------------------------------------
11227 # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
11228 my $block_type = $rblock_type_of_seqno->{$seqno};
11232 my $block_length = MIN_BLOCK_LEN;
11233 my $is_one_line_block;
11234 my $level = $rLL->[$K_o]->[_LEVEL_];
11235 if ( defined($K_o) && defined($K_c) ) {
11237 # note: fixed 3 May 2022 (removed 'my')
11239 $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
11240 $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
11241 $is_one_line_block = $iline == $iline_o;
11244 # Code block rule 1: Use the total block length if
11245 # it is less than the minimum.
11246 if ( $block_length < MIN_BLOCK_LEN ) {
11247 $collapsed_len = $block_length;
11250 # Code block rule 2: Use the full length of a
11251 # one-line block to avoid breaking it, unless
11252 # extremely long. We do not need to do a precise
11253 # check here, because if it breaks then it will
11254 # stay broken on later iterations.
11258 $maximum_line_length_at_level[$level]
11260 # But skip this for sort/map/grep/eval blocks
11261 # because they can reform (b1345)
11262 && !$is_sort_map_grep_eval{$block_type}
11265 $collapsed_len = $block_length;
11268 # Code block rule 3: Otherwise the length should be
11269 # at least MIN_BLOCK_LEN to avoid scrunching code
11271 elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
11272 $collapsed_len = MIN_BLOCK_LEN;
11276 # Store the result. Some extra space, '2', allows for
11277 # length of an opening token, inside space, comma, ...
11278 # This constant has been tuned to give good overall
11280 $collapsed_len += 2;
11281 $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
11283 # Restart scanning the lower level prong
11285 $max_prong_len = $stack[-1]->[_max_prong_len_];
11286 $collapsed_len += $handle_len;
11287 if ( $collapsed_len > $max_prong_len ) {
11288 $max_prong_len = $collapsed_len;
11294 # it is a ternary - no special processing for these yet
11300 $last_nonblank_type = $type;
11304 #----------------------------
11305 # Handle non-container tokens
11306 #----------------------------
11307 my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
11309 # Count lengths of things like 'xx => yy' as a single item
11310 if ( $type eq '=>' ) {
11311 $len += $token_length + 1;
11312 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11314 elsif ( $last_nonblank_type eq '=>' ) {
11315 $len += $token_length;
11316 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11318 # but only include one => per item
11319 $len = $token_length;
11322 # include everything to end of line after a here target
11323 elsif ( $type eq 'h' ) {
11324 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
11325 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11326 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11329 # for everything else just use the token length
11331 $len = $token_length;
11332 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11334 $last_nonblank_type = $type;
11336 } ## end loop over tokens on this line
11338 # Now take care of any side comment
11339 if ($has_comment) {
11340 if ($rOpts_ignore_side_comment_lengths) {
11345 # For a side comment when -iscl is not set, measure length from
11346 # the start of the previous nonblank token
11349 ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
11351 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
11352 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11356 } ## end loop over lines
11358 if (DEBUG_COLLAPSED_LENGTHS) {
11359 print "\nCollapsed lengths--\n";
11361 my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
11363 my $clen = $rcollapsed_length_by_seqno->{$key};
11364 print "$key -> $clen\n";
11369 } ## end sub collapsed_lengths
11371 sub is_excluded_lp {
11373 # Decide if this container is excluded by user request:
11374 # returns true if this token is excluded (i.e., may not use -lp)
11375 # returns false otherwise
11377 # The control hash can either describe:
11378 # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
11379 # what to include: $line_up_parentheses_control_is_lxpl = 0
11381 my ( $self, $KK ) = @_;
11382 my $rLL = $self->[_rLL_];
11383 my $rtoken_vars = $rLL->[$KK];
11384 my $token = $rtoken_vars->[_TOKEN_];
11385 my $rflags = $line_up_parentheses_control_hash{$token};
11387 #-----------------------------------------------
11388 # TEST #1: check match to listed container types
11389 #-----------------------------------------------
11390 if ( !defined($rflags) ) {
11392 # There is no entry for this container, so we are done
11393 return !$line_up_parentheses_control_is_lxpl;
11396 my ( $flag1, $flag2 ) = @{$rflags};
11398 #-----------------------------------------------------------
11399 # TEST #2: check match to flag1, the preceding nonblank word
11400 #-----------------------------------------------------------
11401 my $match_flag1 = !defined($flag1) || $flag1 eq '*';
11402 if ( !$match_flag1 ) {
11404 # Find the previous token
11405 my ( $is_f, $is_k, $is_w );
11406 my $Kp = $self->K_previous_nonblank($KK);
11407 if ( defined($Kp) ) {
11408 my $type_p = $rLL->[$Kp]->[_TYPE_];
11409 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11412 $is_k = $type_p eq 'k';
11415 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
11417 # either keyword or function call?
11418 $is_w = $is_k || $is_f;
11421 # Check for match based on flag1 and the previous token:
11422 if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
11423 elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
11424 elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
11425 elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
11426 elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
11427 elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
11430 # See if we can exclude this based on the flag1 test...
11431 if ($line_up_parentheses_control_is_lxpl) {
11432 return 1 if ($match_flag1);
11435 return 1 if ( !$match_flag1 );
11438 #-------------------------------------------------------------
11439 # TEST #3: exclusion based on flag2 and the container contents
11440 #-------------------------------------------------------------
11442 # Note that this is an exclusion test for both -lpxl or -lpil input methods
11444 # 0 or blank: ignore container contents
11445 # 1 exclude non-lists or lists with sublists
11446 # 2 same as 1 but also exclude lists with code blocks
11451 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11453 my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
11454 my $has_list = $self->[_rhas_list_]->{$seqno};
11455 my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
11456 my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
11460 || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
11465 return $match_flag2;
11466 } ## end sub is_excluded_lp
11468 sub set_excluded_lp_containers {
11471 return unless ($rOpts_line_up_parentheses);
11472 my $rLL = $self->[_rLL_];
11473 return unless ( defined($rLL) && @{$rLL} );
11475 my $K_opening_container = $self->[_K_opening_container_];
11476 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
11477 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11479 foreach my $seqno ( keys %{$K_opening_container} ) {
11481 # code blocks are always excluded by the -lp coding so we can skip them
11482 next if ( $rblock_type_of_seqno->{$seqno} );
11484 my $KK = $K_opening_container->{$seqno};
11485 next unless defined($KK);
11487 # see if a user exclusion rule turns off -lp for this container
11488 if ( $self->is_excluded_lp($KK) ) {
11489 $ris_excluded_lp_container->{$seqno} = 1;
11493 } ## end sub set_excluded_lp_containers
11495 ######################################
11496 # CODE SECTION 6: Process line-by-line
11497 ######################################
11499 sub process_all_lines {
11501 #----------------------------------------------------------
11502 # Main loop to format all lines of a file according to type
11503 #----------------------------------------------------------
11506 my $rlines = $self->[_rlines_];
11507 my $sink_object = $self->[_sink_object_];
11508 my $fh_tee = $self->[_fh_tee_];
11509 my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
11510 my $file_writer_object = $self->[_file_writer_object_];
11511 my $logger_object = $self->[_logger_object_];
11512 my $vertical_aligner_object = $self->[_vertical_aligner_object_];
11513 my $save_logfile = $self->[_save_logfile_];
11515 # Note for RT#118553, leave only one newline at the end of a file.
11516 # Example code to do this is in comments below:
11517 # my $Opt_trim_ending_blank_lines = 0;
11518 # if ($Opt_trim_ending_blank_lines) {
11519 # while ( my $line_of_tokens = pop @{$rlines} ) {
11520 # my $line_type = $line_of_tokens->{_line_type};
11521 # if ( $line_type eq 'CODE' ) {
11522 # my $CODE_type = $line_of_tokens->{_code_type};
11523 # next if ( $CODE_type eq 'BL' );
11525 # push @{$rlines}, $line_of_tokens;
11530 # But while this would be a trivial update, it would have very undesirable
11531 # side effects when perltidy is run from within an editor on a small snippet.
11532 # So this is best done with a separate filter, such
11533 # as 'delete_ending_blank_lines.pl' in the examples folder.
11535 # Flag to prevent blank lines when POD occurs in a format skipping sect.
11536 my $in_format_skipping_section;
11538 # set locations for blanks around long runs of keywords
11539 my $rwant_blank_line_after = $self->keyword_group_scan();
11541 my $line_type = EMPTY_STRING;
11542 my $i_last_POD_END = -10;
11544 foreach my $line_of_tokens ( @{$rlines} ) {
11547 # insert blank lines requested for keyword sequences
11549 && defined( $rwant_blank_line_after->{ $i - 1 } )
11550 && $rwant_blank_line_after->{ $i - 1 } == 1 )
11552 $self->want_blank_line();
11555 my $last_line_type = $line_type;
11556 $line_type = $line_of_tokens->{_line_type};
11557 my $input_line = $line_of_tokens->{_line_text};
11559 # _line_type codes are:
11560 # SYSTEM - system-specific code before hash-bang line
11561 # CODE - line of perl code (including comments)
11562 # POD_START - line starting pod, such as '=head'
11563 # POD - pod documentation text
11564 # POD_END - last line of pod section, '=cut'
11565 # HERE - text of here-document
11566 # HERE_END - last line of here-doc (target word)
11567 # FORMAT - format section
11568 # FORMAT_END - last line of format section, '.'
11569 # SKIP - code skipping section
11570 # SKIP_END - last line of code skipping section, '#>>V'
11571 # DATA_START - __DATA__ line
11572 # DATA - unidentified text following __DATA__
11573 # END_START - __END__ line
11574 # END - unidentified text following __END__
11575 # ERROR - we are in big trouble, probably not a perl script
11577 # put a blank line after an =cut which comes before __END__ and __DATA__
11578 # (required by podchecker)
11579 if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
11580 $i_last_POD_END = $i;
11581 $file_writer_object->reset_consecutive_blank_lines();
11582 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
11583 $self->want_blank_line();
11587 # handle line of code..
11588 if ( $line_type eq 'CODE' ) {
11590 my $CODE_type = $line_of_tokens->{_code_type};
11591 $in_format_skipping_section = $CODE_type eq 'FS';
11593 # Handle blank lines
11594 if ( $CODE_type eq 'BL' ) {
11596 # Keep this blank? Start with the flag -kbl=n, where
11597 # n=0 ignore all old blank lines
11598 # n=1 stable: keep old blanks, but limited by -mbl=n
11599 # n=2 keep all old blank lines, regardless of -mbl=n
11600 # If n=0 we delete all old blank lines and let blank line
11601 # rules generate any needed blank lines.
11602 my $kgb_keep = $rOpts_keep_old_blank_lines;
11604 # Then delete lines requested by the keyword-group logic if
11606 if ( $kgb_keep == 1
11607 && defined( $rwant_blank_line_after->{$i} )
11608 && $rwant_blank_line_after->{$i} == 2 )
11613 # But always keep a blank line following an =cut
11614 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
11619 $self->flush($CODE_type);
11620 $file_writer_object->write_blank_code_line(
11621 $rOpts_keep_old_blank_lines == 2 );
11622 $self->[_last_line_leading_type_] = 'b';
11628 # Let logger see all non-blank lines of code. This is a slow
11629 # operation so we avoid it if it is not going to be saved.
11630 if ( $save_logfile && $logger_object ) {
11631 $logger_object->black_box( $line_of_tokens,
11632 $vertical_aligner_object->get_output_line_number );
11636 # Handle Format Skipping (FS) and Verbatim (VB) Lines
11637 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
11638 $self->write_unindented_line("$input_line");
11639 $file_writer_object->reset_consecutive_blank_lines();
11643 # Handle all other lines of code
11644 $self->process_line_of_CODE($line_of_tokens);
11647 # handle line of non-code..
11650 # set special flags
11652 if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
11654 # Pod docs should have a preceding blank line. But stay
11655 # out of __END__ and __DATA__ sections, because
11656 # the user may be using this section for any purpose whatsoever
11657 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
11658 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
11660 && !$in_format_skipping_section
11661 && $line_type eq 'POD_START'
11662 && !$self->[_saw_END_or_DATA_] )
11664 $self->want_blank_line();
11668 # leave the blank counters in a predictable state
11669 # after __END__ or __DATA__
11670 elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
11671 $file_writer_object->reset_consecutive_blank_lines();
11672 $self->[_saw_END_or_DATA_] = 1;
11675 # Patch to avoid losing blank lines after a code-skipping block;
11677 elsif ( $line_type eq 'SKIP_END' ) {
11678 $file_writer_object->reset_consecutive_blank_lines();
11681 # write unindented non-code line
11682 if ( !$skip_line ) {
11683 $self->write_unindented_line($input_line);
11689 } ## end sub process_all_lines
11691 sub keyword_group_scan {
11694 #-------------------------------------------------------------------------
11695 # Called once per file to process any --keyword-group-blanks-* parameters.
11696 #-------------------------------------------------------------------------
11698 # Manipulate blank lines around keyword groups (kgb* flags)
11699 # Scan all lines looking for runs of consecutive lines beginning with
11700 # selected keywords. Example keywords are 'my', 'our', 'local', ... but
11701 # they may be anything. We will set flags requesting that blanks be
11702 # inserted around and within them according to input parameters. Note
11703 # that we are scanning the lines as they came in in the input stream, so
11704 # they are not necessarily well formatted.
11706 # The output of this sub is a return hash ref whose keys are the indexes of
11707 # lines after which we desire a blank line. For line index i:
11708 # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
11709 # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
11710 my $rhash_of_desires = {};
11712 # Nothing to do if no blanks can be output. This test added to fix
11714 if ( !$rOpts_maximum_consecutive_blank_lines ) {
11715 return $rhash_of_desires;
11718 my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
11719 my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
11720 my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
11721 my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
11722 my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
11724 # A range of sizes can be input with decimal notation like 'min.max' with
11725 # any number of dots between the two numbers. Examples:
11726 # string => min max matches
11727 # 1.1 1 1 exactly 1
11728 # 1.3 1 3 1,2, or 3
11729 # 1..3 1 3 1,2, or 3
11734 my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
11735 if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
11736 || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
11739 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
11740 ignoring all -kgb flags
11743 # Turn this option off so that this message does not keep repeating
11744 # during iterations and other files.
11745 $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
11746 return $rhash_of_desires;
11748 $Opt_size_min = 1 unless ($Opt_size_min);
11750 if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
11751 return $rhash_of_desires;
11754 # codes for $Opt_blanks_before and $Opt_blanks_after:
11755 # 0 = never (delete if exist)
11756 # 1 = stable (keep unchanged)
11757 # 2 = always (insert if missing)
11759 return $rhash_of_desires
11760 unless $Opt_size_min > 0
11761 && ( $Opt_blanks_before != 1
11762 || $Opt_blanks_after != 1
11763 || $Opt_blanks_inside
11764 || $Opt_blanks_delete );
11766 my $Opt_pattern = $keyword_group_list_pattern;
11767 my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
11768 my $Opt_repeat_count =
11769 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
11771 my $rlines = $self->[_rlines_];
11772 my $rLL = $self->[_rLL_];
11773 my $K_closing_container = $self->[_K_closing_container_];
11774 my $K_opening_container = $self->[_K_opening_container_];
11775 my $rK_weld_right = $self->[_rK_weld_right_];
11777 # variables for the current group and subgroups:
11778 my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
11782 # ($ibeg, $iend) = starting and ending line indexes of this entire group
11783 # $count = total number of keywords seen in this entire group
11784 # $level_beg = indentation level of this group
11785 # @group = [ $i, $token, $count ] =list of all keywords & blanks
11786 # @subgroup = $j, index of group where token changes
11787 # @iblanks = line indexes of blank lines in input stream in this group
11788 # where i=starting line index
11789 # token (the keyword)
11790 # count = number of this token in this subgroup
11791 # j = index in group where token changes
11793 # These vars will contain values for the most recently seen line:
11794 my ( $line_type, $CODE_type, $K_first, $K_last );
11796 my $number_of_groups_seen = 0;
11798 #-------------------
11799 # helper subroutines
11800 #-------------------
11802 my $insert_blank_after = sub {
11804 $rhash_of_desires->{$i} = 1;
11806 if ( defined( $rhash_of_desires->{$ip} )
11807 && $rhash_of_desires->{$ip} == 2 )
11809 $rhash_of_desires->{$ip} = 0;
11814 my $split_into_sub_groups = sub {
11816 # place blanks around long sub-groups of keywords
11818 return unless ($Opt_blanks_inside);
11820 # loop over sub-groups, index k
11821 push @subgroup, scalar @group;
11823 my $kend = @subgroup - 1;
11824 foreach my $k ( $kbeg .. $kend ) {
11826 # index j runs through all keywords found
11827 my $j_b = $subgroup[ $k - 1 ];
11828 my $j_e = $subgroup[$k] - 1;
11830 # index i is the actual line number of a keyword
11831 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
11832 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
11833 my $num = $count_e - $count_b + 1;
11835 # This subgroup runs from line $ib to line $ie-1, but may contain
11837 if ( $num >= $Opt_size_min ) {
11839 # if there are blank lines, we require that at least $num lines
11840 # be non-blank up to the boundary with the next subgroup.
11841 my $nog_b = my $nog_e = 1;
11842 if ( @iblanks && !$Opt_blanks_delete ) {
11843 my $j_bb = $j_b + $num - 1;
11844 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
11845 $nog_b = $count_bb - $count_b + 1 == $num;
11847 my $j_ee = $j_e - ( $num - 1 );
11848 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
11849 $nog_e = $count_e - $count_ee + 1 == $num;
11851 if ( $nog_b && $k > $kbeg ) {
11852 $insert_blank_after->( $i_b - 1 );
11854 if ( $nog_e && $k < $kend ) {
11855 my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
11856 $insert_blank_after->( $i_ep - 1 );
11863 my $delete_if_blank = sub {
11866 # delete line $i if it is blank
11867 return unless ( $i >= 0 && $i < @{$rlines} );
11868 return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
11869 my $code_type = $rlines->[$i]->{_code_type};
11870 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
11874 my $delete_inner_blank_lines = sub {
11876 # always remove unwanted trailing blank lines from our list
11877 return unless (@iblanks);
11878 while ( my $ibl = pop(@iblanks) ) {
11879 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
11883 # now mark mark interior blank lines for deletion if requested
11884 return unless ($Opt_blanks_delete);
11886 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
11891 my $end_group = sub {
11893 # end a group of keywords
11894 my ($bad_ending) = @_;
11895 if ( defined($ibeg) && $ibeg >= 0 ) {
11897 # then handle sufficiently large groups
11898 if ( $count >= $Opt_size_min ) {
11900 $number_of_groups_seen++;
11902 # do any blank deletions regardless of the count
11903 $delete_inner_blank_lines->();
11906 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
11908 # patch for hash bang line which is not currently marked as
11909 # a comment; mark it as a comment
11910 if ( $ibeg == 1 && !$code_type ) {
11911 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
11913 if ( $line_text && $line_text =~ /^#/ );
11916 # Do not insert a blank after a comment
11917 # (this could be subject to a flag in the future)
11918 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
11919 if ( $Opt_blanks_before == INSERT ) {
11920 $insert_blank_after->( $ibeg - 1 );
11923 elsif ( $Opt_blanks_before == DELETE ) {
11924 $delete_if_blank->( $ibeg - 1 );
11929 # We will only put blanks before code lines. We could loosen
11930 # this rule a little, but we have to be very careful because
11931 # for example we certainly don't want to drop a blank line
11932 # after a line like this:
11934 if ( $line_type eq 'CODE' && defined($K_first) ) {
11936 # - Do not put a blank before a line of different level
11937 # - Do not put a blank line if we ended the search badly
11938 # - Do not put a blank at the end of the file
11939 # - Do not put a blank line before a hanging side comment
11940 my $level = $rLL->[$K_first]->[_LEVEL_];
11941 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
11943 if ( $level == $level_beg
11946 && $iend < @{$rlines}
11947 && $CODE_type ne 'HSC' )
11949 if ( $Opt_blanks_after == INSERT ) {
11950 $insert_blank_after->($iend);
11952 elsif ( $Opt_blanks_after == DELETE ) {
11953 $delete_if_blank->( $iend + 1 );
11958 $split_into_sub_groups->();
11961 # reset for another group
11965 $K_closing = undef;
11973 my $find_container_end = sub {
11975 # If the keyword line is continued onto subsequent lines, find the
11976 # closing token '$K_closing' so that we can easily skip past the
11977 # contents of the container.
11979 # We only set this value if we find a simple list, meaning
11980 # -contents only one level deep
11983 # First check: skip if next line is not one deeper
11984 my $Knext_nonblank = $self->K_next_nonblank($K_last);
11985 goto RETURN if ( !defined($Knext_nonblank) );
11986 my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
11987 goto RETURN if ( $level_next != $level_beg + 1 );
11989 # Find the parent container of the first token on the next line
11990 my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
11991 goto RETURN unless ( defined($parent_seqno) );
11993 # Must not be a weld (can be unstable)
11995 if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
11997 # Opening container must exist and be on this line
11998 my $Ko = $K_opening_container->{$parent_seqno};
11999 goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
12001 # Verify that the closing container exists and is on a later line
12002 my $Kc = $K_closing_container->{$parent_seqno};
12003 goto RETURN unless ( defined($Kc) && $Kc > $K_last );
12013 my $add_to_group = sub {
12014 my ( $i, $token, $level ) = @_;
12016 # End the previous group if we have reached the maximum
12018 if ( $Opt_size_max && @group >= $Opt_size_max ) {
12022 if ( @group == 0 ) {
12024 $level_beg = $level;
12032 if ( !@group || $token ne $group[-1]->[1] ) {
12033 push @subgroup, scalar(@group);
12035 push @group, [ $i, $token, $count ];
12037 # remember if this line ends in an open container
12038 $find_container_end->();
12043 #----------------------------------
12044 # loop over all lines of the source
12045 #----------------------------------
12048 foreach my $line_of_tokens ( @{$rlines} ) {
12052 if ( $Opt_repeat_count > 0
12053 && $number_of_groups_seen >= $Opt_repeat_count );
12055 $CODE_type = EMPTY_STRING;
12058 $line_type = $line_of_tokens->{_line_type};
12060 # always end a group at non-CODE
12061 if ( $line_type ne 'CODE' ) { $end_group->(); next }
12063 $CODE_type = $line_of_tokens->{_code_type};
12065 # end any group at a format skipping line
12066 if ( $CODE_type && $CODE_type eq 'FS' ) {
12071 # continue in a verbatim (VB) type; it may be quoted text
12072 if ( $CODE_type eq 'VB' ) {
12073 if ( $ibeg >= 0 ) { $iend = $i; }
12077 # and continue in blank (BL) types
12078 if ( $CODE_type eq 'BL' ) {
12079 if ( $ibeg >= 0 ) {
12081 push @{iblanks}, $i;
12083 # propagate current subgroup token
12084 my $tok = $group[-1]->[1];
12085 push @group, [ $i, $tok, $count ];
12090 # examine the first token of this line
12091 my $rK_range = $line_of_tokens->{_rK_range};
12092 ( $K_first, $K_last ) = @{$rK_range};
12093 if ( !defined($K_first) ) {
12095 # Somewhat unexpected blank line..
12096 # $rK_range is normally defined for line type CODE, but this can
12097 # happen for example if the input line was a single semicolon which
12098 # is being deleted. In that case there was code in the input
12099 # file but it is not being retained. So we can silently return.
12100 return $rhash_of_desires;
12103 my $level = $rLL->[$K_first]->[_LEVEL_];
12104 my $type = $rLL->[$K_first]->[_TYPE_];
12105 my $token = $rLL->[$K_first]->[_TOKEN_];
12106 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
12108 # End a group 'badly' at an unexpected level. This will prevent
12109 # blank lines being incorrectly placed after the end of the group.
12110 # We are looking for any deviation from two acceptable patterns:
12111 # PATTERN 1: a simple list; secondary lines are at level+1
12112 # PATTERN 2: a long statement; all secondary lines same level
12113 # This was added as a fix for case b1177, in which a complex structure
12114 # got incorrectly inserted blank lines.
12115 if ( $ibeg >= 0 ) {
12117 # Check for deviation from PATTERN 1, simple list:
12118 if ( defined($K_closing) && $K_first < $K_closing ) {
12119 $end_group->(1) if ( $level != $level_beg + 1 );
12122 # Check for deviation from PATTERN 2, single statement:
12123 elsif ( $level != $level_beg ) { $end_group->(1) }
12126 # Do not look for keywords in lists ( keyword 'my' can occur in lists,
12127 # see case b760); fixed for c048.
12128 if ( $self->is_list_by_K($K_first) ) {
12129 if ( $ibeg >= 0 ) { $iend = $i }
12133 # see if this is a code type we seek (i.e. comment)
12135 && $Opt_comment_pattern
12136 && $CODE_type =~ /$Opt_comment_pattern/ )
12139 my $tok = $CODE_type;
12141 # Continuing a group
12142 if ( $ibeg >= 0 && $level == $level_beg ) {
12143 $add_to_group->( $i, $tok, $level );
12149 # first end old group if any; we might be starting new
12150 # keywords at different level
12151 if ( $ibeg >= 0 ) { $end_group->(); }
12152 $add_to_group->( $i, $tok, $level );
12157 # See if it is a keyword we seek, but never start a group in a
12158 # continuation line; the code may be badly formatted.
12159 if ( $ci_level == 0
12161 && $token =~ /$Opt_pattern/ )
12164 # Continuing a keyword group
12165 if ( $ibeg >= 0 && $level == $level_beg ) {
12166 $add_to_group->( $i, $token, $level );
12169 # Start new keyword group
12172 # first end old group if any; we might be starting new
12173 # keywords at different level
12174 if ( $ibeg >= 0 ) { $end_group->(); }
12175 $add_to_group->( $i, $token, $level );
12180 # This is not one of our keywords, but we are in a keyword group
12181 # so see if we should continue or quit
12182 elsif ( $ibeg >= 0 ) {
12184 # - bail out on a large level change; we may have walked into a
12185 # data structure or anonymous sub code.
12186 if ( $level > $level_beg + 1 || $level < $level_beg ) {
12191 # - keep going on a continuation line of the same level, since
12192 # it is probably a continuation of our previous keyword,
12193 # - and keep going past hanging side comments because we never
12194 # want to interrupt them.
12195 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
12196 || $CODE_type eq 'HSC' )
12202 # - continue if if we are within in a container which started with
12203 # the line of the previous keyword.
12204 if ( defined($K_closing) && $K_first <= $K_closing ) {
12206 # continue if entire line is within container
12207 if ( $K_last <= $K_closing ) { $iend = $i; next }
12209 # continue at ); or }; or ];
12210 my $KK = $K_closing + 1;
12211 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
12212 if ( $KK < $K_last ) {
12213 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
12214 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
12227 # - end the group if none of the above
12232 # not in a keyword group; continue
12236 # end of loop over all lines
12238 return $rhash_of_desires;
12240 } ## end sub keyword_group_scan
12242 #######################################
12243 # CODE SECTION 7: Process lines of code
12244 #######################################
12246 { ## begin closure process_line_of_CODE
12248 # The routines in this closure receive lines of code and combine them into
12249 # 'batches' and send them along. A 'batch' is the unit of code which can be
12250 # processed further as a unit. It has the property that it is the largest
12251 # amount of code into which which perltidy is free to place one or more
12252 # line breaks within it without violating any constraints.
12254 # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
12256 # flags needed by the store routine
12257 my $line_of_tokens;
12258 my $no_internal_newlines;
12261 # range of K of tokens for the current line
12262 my ( $K_first, $K_last );
12264 my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
12265 $rblock_type_of_seqno, $ri_starting_one_line_block );
12267 # past stored nonblank tokens and flags
12269 $K_last_nonblank_code, $looking_for_else,
12270 $is_static_block_comment, $last_CODE_type,
12271 $last_line_had_side_comment, $next_parent_seqno,
12275 # Called once at the start of a new file
12276 sub initialize_process_line_of_CODE {
12277 $K_last_nonblank_code = undef;
12278 $looking_for_else = 0;
12279 $is_static_block_comment = 0;
12280 $last_line_had_side_comment = 0;
12281 $next_parent_seqno = SEQ_ROOT;
12282 $next_slevel = undef;
12286 # Batch variables: these describe the current batch of code being formed
12287 # and sent down the pipeline. They are initialized in the next
12289 my ( $rbrace_follower, $index_start_one_line_block,
12290 $semicolons_before_block_self_destruct,
12291 $starting_in_quote, $ending_in_quote, );
12293 # Called before the start of each new batch
12294 sub initialize_batch_variables {
12296 $max_index_to_go = UNDEFINED_INDEX;
12297 $summed_lengths_to_go[0] = 0;
12298 $nesting_depth_to_go[0] = 0;
12299 ##@summed_lengths_to_go = @nesting_depth_to_go = (0);
12300 $ri_starting_one_line_block = [];
12302 # The initialization code for the remaining batch arrays is as follows
12303 # and can be activated for testing. But profiling shows that it is
12304 # time-consuming to re-initialize the batch arrays and is not necessary
12305 # because the maximum valid token, $max_index_to_go, is carefully
12306 # controlled. This means however that it is not possible to do any
12307 # type of filter or map operation directly on these arrays. And it is
12308 # not possible to use negative indexes. As a precaution against program
12309 # changes which might do this, sub pad_array_to_go adds some undefs at
12310 # the end of the current batch of data.
12312 # So 'long story short': this is a waste of time
12314 @block_type_to_go = ();
12315 @type_sequence_to_go = ();
12316 @forced_breakpoint_to_go = ();
12317 @token_lengths_to_go = ();
12318 @levels_to_go = ();
12319 @mate_index_to_go = ();
12320 @ci_levels_to_go = ();
12321 @nobreak_to_go = ();
12322 @old_breakpoint_to_go = ();
12323 @tokens_to_go = ();
12326 @leading_spaces_to_go = ();
12327 @reduced_spaces_to_go = ();
12330 @parent_seqno_to_go = ();
12333 $rbrace_follower = undef;
12334 $ending_in_quote = 0;
12336 # These get re-initialized by calls to sub destroy_one_line_block():
12337 $index_start_one_line_block = UNDEFINED_INDEX;
12338 $semicolons_before_block_self_destruct = 0;
12340 # initialize forced breakpoint vars associated with each output batch
12341 $forced_breakpoint_count = 0;
12342 $index_max_forced_break = UNDEFINED_INDEX;
12343 $forced_breakpoint_undo_count = 0;
12346 } ## end sub initialize_batch_variables
12348 sub leading_spaces_to_go {
12350 # return the number of indentation spaces for a token in the output
12354 return 0 if ( $ii < 0 );
12355 my $indentation = $leading_spaces_to_go[$ii];
12356 return ref($indentation) ? $indentation->get_spaces() : $indentation;
12357 } ## end sub leading_spaces_to_go
12359 sub create_one_line_block {
12360 ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
12365 sub destroy_one_line_block {
12366 $index_start_one_line_block = UNDEFINED_INDEX;
12367 $semicolons_before_block_self_destruct = 0;
12371 # Routine to place the current token into the output stream.
12372 # Called once per output token.
12374 use constant DEBUG_STORE => 0;
12376 sub store_token_to_go {
12378 my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
12380 # Add one token to the next batch.
12381 # $Ktoken_vars = the index K in the global token array
12382 # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
12383 # unless they are temporarily being overridden
12385 #------------------------------------------------------------------
12386 # NOTE: called once per token so coding efficiency is critical here
12387 #------------------------------------------------------------------
12389 my $type = $rtoken_vars->[_TYPE_];
12391 # Check for emergency flush...
12392 # The K indexes in the batch must always be a continuous sequence of
12393 # the global token array. The batch process programming assumes this.
12394 # If storing this token would cause this relation to fail we must dump
12395 # the current batch before storing the new token. It is extremely rare
12396 # for this to happen. One known example is the following two-line
12397 # snippet when run with parameters
12398 # --noadd-newlines --space-terminal-semicolon:
12399 # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
12401 if ( $max_index_to_go >= 0 ) {
12402 if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
12403 $self->flush_batch_of_CODE();
12406 # Do not output consecutive blank tokens ... this should not
12407 # happen, but it is worth checking. Later code can then make the
12408 # simplifying assumption that blank tokens are not consecutive.
12409 elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
12413 # if this happens, it is may be that consecutive blanks
12414 # were inserted into the token stream in 'respace_tokens'
12415 my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
12416 Fault("consecutive blanks near line $lno; please fix");
12422 # Do not start a batch with a blank token.
12423 # Fixes cases b149 b888 b984 b985 b986 b987
12425 if ( $type eq 'b' ) { return }
12428 #----------------------------
12429 # add this token to the batch
12430 #----------------------------
12431 $K_to_go[ ++$max_index_to_go ] = $Ktoken_vars;
12432 $types_to_go[$max_index_to_go] = $type;
12434 $old_breakpoint_to_go[$max_index_to_go] = 0;
12435 $forced_breakpoint_to_go[$max_index_to_go] = 0;
12436 $mate_index_to_go[$max_index_to_go] = -1;
12438 my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
12440 my $ci_level = $ci_levels_to_go[$max_index_to_go] =
12441 $rtoken_vars->[_CI_LEVEL_];
12443 # Clip levels to zero if there are level errors in the file.
12444 # We had to wait until now for reasons explained in sub 'write_line'.
12445 my $level = $rtoken_vars->[_LEVEL_];
12446 if ( $level < 0 ) { $level = 0 }
12447 $levels_to_go[$max_index_to_go] = $level;
12449 my $seqno = $type_sequence_to_go[$max_index_to_go] =
12450 $rtoken_vars->[_TYPE_SEQUENCE_];
12452 my $in_continued_quote =
12453 ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
12455 # Initializations for first token of new batch
12456 if ( $max_index_to_go == 0 ) {
12458 $starting_in_quote = $in_continued_quote;
12460 # Update the next parent sequence number for each new batch.
12462 #----------------------------------------
12463 # Begin coding from sub parent_seqno_by_K
12464 #----------------------------------------
12466 # The following is equivalent to this call but much faster:
12467 # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
12469 $next_parent_seqno = SEQ_ROOT;
12471 $next_parent_seqno = $rparent_of_seqno->{$seqno};
12474 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
12475 if ( defined($Kt) ) {
12476 my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
12477 my $type_t = $rLL->[$Kt]->[_TYPE_];
12479 # if next container token is closing, it is the parent seqno
12480 if ( $is_closing_type{$type_t} ) {
12481 $next_parent_seqno = $type_sequence_t;
12484 # otherwise we want its parent container
12486 $next_parent_seqno =
12487 $rparent_of_seqno->{$type_sequence_t};
12491 $next_parent_seqno = SEQ_ROOT
12492 unless ( defined($next_parent_seqno) );
12494 #--------------------------------------
12495 # End coding from sub parent_seqno_by_K
12496 #--------------------------------------
12498 $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
12501 # Initialize some sequence-dependent variables to their normal values
12502 $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
12503 $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
12504 $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
12506 # Then fix them at container tokens:
12509 $block_type_to_go[$max_index_to_go] =
12510 $rblock_type_of_seqno->{$seqno}
12511 if ( $rblock_type_of_seqno->{$seqno} );
12513 if ( $is_opening_token{$token} ) {
12515 my $slevel = $rdepth_of_opening_seqno->[$seqno];
12516 $nesting_depth_to_go[$max_index_to_go] = $slevel;
12517 $next_slevel = $slevel + 1;
12519 $next_parent_seqno = $seqno;
12522 elsif ( $is_closing_token{$token} ) {
12524 $next_slevel = $rdepth_of_opening_seqno->[$seqno];
12525 my $slevel = $next_slevel + 1;
12526 $nesting_depth_to_go[$max_index_to_go] = $slevel;
12528 my $parent_seqno = $rparent_of_seqno->{$seqno};
12529 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
12530 $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
12531 $next_parent_seqno = $parent_seqno;
12535 # ternary token: nothing to do
12539 $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
12541 my $length = $rtoken_vars->[_TOKEN_LENGTH_];
12543 # Safety check that length is defined. Should not be needed now.
12544 # Former patch for indent-only, in which the entire set of tokens is
12545 # turned into type 'q'. Lengths may have not been defined because sub
12546 # 'respace_tokens' is bypassed. We do not need lengths in this case,
12547 # but we will use the character count to have a defined value. In the
12548 # future, it would be nicer to have 'respace_tokens' convert the lines
12549 # to quotes and get correct lengths.
12550 if ( !defined($length) ) {
12551 $length = length($token);
12554 $token_lengths_to_go[$max_index_to_go] = $length;
12556 # We keep a running sum of token lengths from the start of this batch:
12557 # summed_lengths_to_go[$i] = total length to just before token $i
12558 # summed_lengths_to_go[$i+1] = total length to just after token $i
12559 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
12560 $summed_lengths_to_go[$max_index_to_go] + $length;
12562 # Define the indentation that this token will have in two cases:
12563 # Without CI = reduced_spaces_to_go
12564 # With CI = leading_spaces_to_go
12565 if ($in_continued_quote) {
12566 $leading_spaces_to_go[$max_index_to_go] = 0;
12567 $reduced_spaces_to_go[$max_index_to_go] = 0;
12570 $leading_spaces_to_go[$max_index_to_go] =
12571 $reduced_spaces_to_go[$max_index_to_go] =
12572 $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
12574 $leading_spaces_to_go[$max_index_to_go] +=
12575 $rOpts_continuation_indentation * $ci_level
12579 DEBUG_STORE && do {
12580 my ( $a, $b, $c ) = caller();
12582 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
12585 } ## end sub store_token_to_go
12587 sub flush_batch_of_CODE {
12589 # Finish any batch packaging and call the process routine.
12590 # This must be the only call to grind_batch_of_CODE()
12593 if ( $max_index_to_go >= 0 ) {
12595 # Create an array to hold variables for this batch
12596 my $this_batch = [];
12598 $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
12599 $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
12601 if ( $CODE_type || $last_CODE_type ) {
12602 $this_batch->[_batch_CODE_type_] =
12603 $K_to_go[$max_index_to_go] >= $K_first
12608 $last_line_had_side_comment =
12609 ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
12611 # The flag $is_static_block_comment applies to the line which just
12612 # arrived. So it only applies if we are outputting that line.
12613 if ( $is_static_block_comment && !$last_line_had_side_comment ) {
12614 $this_batch->[_is_static_block_comment_] =
12615 $K_to_go[0] == $K_first;
12618 $this_batch->[_ri_starting_one_line_block_] =
12619 $ri_starting_one_line_block;
12621 $self->[_this_batch_] = $this_batch;
12623 $self->grind_batch_of_CODE();
12625 # Done .. this batch is history
12626 $self->[_this_batch_] = undef;
12628 initialize_batch_variables();
12632 } ## end sub flush_batch_of_CODE
12636 # end the current batch, EXCEPT for a few special cases
12639 if ( $max_index_to_go < 0 ) {
12641 # This is harmless but should be eliminated in development
12643 Fault("End batch called with nothing to do; please fix\n");
12648 # Exceptions when a line does not end with a comment... (fixes c058)
12649 if ( $types_to_go[$max_index_to_go] ne '#' ) {
12651 # Exception 1: Do not end line in a weld
12653 if ( $total_weld_count
12654 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
12656 # Exception 2: just set a tentative breakpoint if we might be in a
12658 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12659 $self->set_forced_breakpoint($max_index_to_go);
12664 $self->flush_batch_of_CODE();
12666 } ## end sub end_batch
12668 sub flush_vertical_aligner {
12670 my $vao = $self->[_vertical_aligner_object_];
12675 # flush is called to output any tokens in the pipeline, so that
12676 # an alternate source of lines can be written in the correct order
12678 my ( $self, $CODE_type_flush ) = @_;
12680 # end the current batch with 1 exception
12682 destroy_one_line_block();
12684 # Exception: if we are flushing within the code stream only to insert
12685 # blank line(s), then we can keep the batch intact at a weld. This
12686 # improves formatting of -ce. See test 'ce1.ce'
12687 if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
12688 $self->end_batch() if ( $max_index_to_go >= 0 );
12691 # otherwise, we have to shut things down completely.
12692 else { $self->flush_batch_of_CODE() }
12694 $self->flush_vertical_aligner();
12698 sub process_line_of_CODE {
12700 my ( $self, $my_line_of_tokens ) = @_;
12702 #----------------------------------------------------------------
12703 # This routine is called once per INPUT line to format all of the
12704 # tokens on that line.
12705 #----------------------------------------------------------------
12707 # It outputs full-line comments and blank lines immediately.
12709 # The tokens are copied one-by-one from the global token array $rLL to
12710 # a set of '_to_go' arrays which collect batches of tokens for a
12711 # further processing via calls to 'sub store_token_to_go', until a well
12712 # defined 'structural' break point* or 'forced' breakpoint* is reached.
12713 # Then, the batch of collected '_to_go' tokens is passed along to 'sub
12714 # grind_batch_of_CODE' for further processing.
12716 # * 'structural' break points are basically line breaks corresponding
12717 # to code blocks. An example is a chain of if-elsif-else statements,
12718 # which should typically be broken at the opening and closing braces.
12720 # * 'forced' break points are breaks required by side comments or by
12721 # special user controls.
12723 # So this routine is just making an initial set of required line
12724 # breaks, basically regardless of the maximum requested line length.
12725 # The subsequent stage of formatting make additional line breaks
12726 # appropriate for lists and logical structures, and to keep line
12727 # lengths below the requested maximum line length.
12729 #-----------------------------------
12730 # begin initialize closure variables
12731 #-----------------------------------
12732 $line_of_tokens = $my_line_of_tokens;
12733 my $rK_range = $line_of_tokens->{_rK_range};
12734 if ( !defined( $rK_range->[0] ) ) {
12736 # Empty line: This can happen if tokens are deleted, for example
12737 # with the -mangle parameter
12741 ( $K_first, $K_last ) = @{$rK_range};
12742 $last_CODE_type = $CODE_type;
12743 $CODE_type = $line_of_tokens->{_code_type};
12745 $rLL = $self->[_rLL_];
12746 $radjusted_levels = $self->[_radjusted_levels_];
12747 $rparent_of_seqno = $self->[_rparent_of_seqno_];
12748 $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
12749 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12751 #---------------------------------
12752 # end initialize closure variables
12753 #---------------------------------
12755 # This flag will become nobreak_to_go and should be set to 2 to prevent
12756 # a line break AFTER the current token.
12757 $no_internal_newlines = 0;
12758 if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
12759 $no_internal_newlines = 2;
12762 my $input_line = $line_of_tokens->{_line_text};
12764 my ( $is_block_comment, $has_side_comment );
12765 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
12766 if ( $K_last == $K_first ) { $is_block_comment = 1 }
12767 else { $has_side_comment = 1 }
12770 my $is_static_block_comment_without_leading_space =
12771 $CODE_type eq 'SBCX';
12772 $is_static_block_comment =
12773 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
12775 # check for a $VERSION statement
12776 if ( $CODE_type eq 'VER' ) {
12777 $self->[_saw_VERSION_in_this_file_] = 1;
12778 $no_internal_newlines = 2;
12781 # Add interline blank if any
12782 my $last_old_nonblank_type = "b";
12783 my $first_new_nonblank_token = EMPTY_STRING;
12784 my $K_first_true = $K_first;
12785 if ( $max_index_to_go >= 0 ) {
12786 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
12787 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
12788 if ( !$is_block_comment
12789 && $types_to_go[$max_index_to_go] ne 'b'
12791 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
12797 my $rtok_first = $rLL->[$K_first];
12799 my $in_quote = $line_of_tokens->{_ending_in_quote};
12800 $ending_in_quote = $in_quote;
12802 #------------------------------------
12803 # Handle a block (full-line) comment.
12804 #------------------------------------
12805 if ($is_block_comment) {
12807 if ( $rOpts->{'delete-block-comments'} ) {
12812 destroy_one_line_block();
12813 $self->end_batch() if ( $max_index_to_go >= 0 );
12815 # output a blank line before block comments
12817 # unless we follow a blank or comment line
12818 $self->[_last_line_leading_type_] ne '#'
12819 && $self->[_last_line_leading_type_] ne 'b'
12822 && $rOpts->{'blanks-before-comments'}
12824 # if this is NOT an empty comment, unless it follows a side
12825 # comment and could become a hanging side comment.
12827 $rtok_first->[_TOKEN_] ne '#'
12828 || ( $last_line_had_side_comment
12829 && $rLL->[$K_first]->[_LEVEL_] > 0 )
12832 # not after a short line ending in an opening token
12833 # because we already have space above this comment.
12834 # Note that the first comment in this if block, after
12835 # the 'if (', does not get a blank line because of this.
12836 && !$self->[_last_output_short_opening_token_]
12838 # never before static block comments
12839 && !$is_static_block_comment
12842 $self->flush(); # switching to new output stream
12843 my $file_writer_object = $self->[_file_writer_object_];
12844 $file_writer_object->write_blank_code_line();
12845 $self->[_last_line_leading_type_] = 'b';
12849 $rOpts->{'indent-block-comments'}
12850 && ( !$rOpts->{'indent-spaced-block-comments'}
12851 || $input_line =~ /^\s+/ )
12852 && !$is_static_block_comment_without_leading_space
12855 my $Ktoken_vars = $K_first;
12856 my $rtoken_vars = $rLL->[$Ktoken_vars];
12857 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12858 $self->end_batch();
12862 # switching to new output stream
12865 # Note that last arg in call here is 'undef' for comments
12866 my $file_writer_object = $self->[_file_writer_object_];
12867 $file_writer_object->write_code_line(
12868 $rtok_first->[_TOKEN_] . "\n", undef );
12869 $self->[_last_line_leading_type_] = '#';
12874 # Compare input/output indentation except for:
12875 # - hanging side comments
12876 # - continuation lines (have unknown amount of initial blank space)
12877 # - and lines which are quotes (because they may have been outdented)
12878 my $guessed_indentation_level =
12879 $line_of_tokens->{_guessed_indentation_level};
12881 unless ( $CODE_type eq 'HSC'
12882 || $rtok_first->[_CI_LEVEL_] > 0
12883 || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' )
12885 my $input_line_number = $line_of_tokens->{_line_number};
12886 $self->compare_indentation_levels( $K_first,
12887 $guessed_indentation_level, $input_line_number );
12890 #------------------------
12891 # Handle indentation-only
12892 #------------------------
12894 # NOTE: In previous versions we sent all qw lines out immediately here.
12895 # No longer doing this: also write a line which is entirely a 'qw' list
12896 # to allow stacking of opening and closing tokens. Note that interior
12897 # qw lines will still go out at the end of this routine.
12898 if ( $CODE_type eq 'IO' ) {
12900 my $line = $input_line;
12902 # Fix for rt #125506 Unexpected string formating
12903 # in which leading space of a terminal quote was removed
12905 $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
12907 my $Ktoken_vars = $K_first;
12909 # We work with a copy of the token variables and change the
12910 # first token to be the entire line as a quote variable
12911 my $rtoken_vars = $rLL->[$Ktoken_vars];
12912 $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
12914 # Patch: length is not really important here
12915 $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
12917 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12918 $self->end_batch();
12922 #---------------------------
12923 # Handle all other lines ...
12924 #---------------------------
12926 # If we just saw the end of an elsif block, write nag message
12927 # if we do not see another elseif or an else.
12928 if ($looking_for_else) {
12930 ## /^(elsif|else)$/
12931 if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) {
12932 write_logfile_entry("(No else block)\n");
12934 $looking_for_else = 0;
12937 # This is a good place to kill incomplete one-line blocks
12938 if ( $max_index_to_go >= 0 ) {
12941 ( $semicolons_before_block_self_destruct == 0 )
12942 && ( $last_old_nonblank_type eq ';' )
12943 && ( $first_new_nonblank_token ne '}' )
12946 # Patch for RT #98902. Honor request to break at old commas.
12947 || ( $rOpts_break_at_old_comma_breakpoints
12948 && $last_old_nonblank_type eq ',' )
12951 $forced_breakpoint_to_go[$max_index_to_go] = 1
12952 if ($rOpts_break_at_old_comma_breakpoints);
12953 destroy_one_line_block();
12954 $self->end_batch();
12957 # Keep any requested breaks before this line. Note that we have to
12958 # use the original K_first because it may have been reduced above
12959 # to add a blank. The value of the flag is as follows:
12960 # 1 => hard break, flush the batch
12961 # 2 => soft break, set breakpoint and continue building the batch
12962 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
12963 destroy_one_line_block();
12964 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
12965 $self->set_forced_breakpoint($max_index_to_go);
12968 $self->end_batch() if ( $max_index_to_go >= 0 );
12973 #--------------------------------------
12974 # loop to process the tokens one-by-one
12975 #--------------------------------------
12977 # We do not want a leading blank if the previous batch just got output
12979 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
12983 foreach my $Ktoken_vars ( $K_first .. $K_last ) {
12985 my $rtoken_vars = $rLL->[$Ktoken_vars];
12990 if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
12991 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12995 #------------------
12996 # handle non-blanks
12997 #------------------
12998 my $type = $rtoken_vars->[_TYPE_];
13000 # If we are continuing after seeing a right curly brace, flush
13001 # buffer unless we see what we are looking for, as in
13003 if ($rbrace_follower) {
13004 my $token = $rtoken_vars->[_TOKEN_];
13005 unless ( $rbrace_follower->{$token} ) {
13006 $self->end_batch() if ( $max_index_to_go >= 0 );
13008 $rbrace_follower = undef;
13012 $block_type, $type_sequence,
13013 $is_opening_BLOCK, $is_closing_BLOCK,
13014 $nobreak_BEFORE_BLOCK
13017 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
13019 my $token = $rtoken_vars->[_TOKEN_];
13020 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
13021 $block_type = $rblock_type_of_seqno->{$type_sequence};
13025 && $block_type ne 't'
13026 && !$self->[_rshort_nested_]->{$type_sequence} )
13029 if ( $type eq '{' ) {
13030 $is_opening_BLOCK = 1;
13031 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
13033 elsif ( $type eq '}' ) {
13034 $is_closing_BLOCK = 1;
13035 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
13040 # if at last token ...
13041 if ( $Ktoken_vars == $K_last ) {
13043 #---------------------
13044 # handle side comments
13045 #---------------------
13046 if ($has_side_comment) {
13047 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13052 # if before last token ... do not allow breaks which would promote
13053 # a side comment to a block comment
13056 && ( $Ktoken_vars == $K_last - 1
13057 || $Ktoken_vars == $K_last - 2
13058 && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
13061 $no_internal_newlines = 2;
13064 # Process non-blank and non-comment tokens ...
13069 if ( $type eq ';' ) {
13071 my $next_nonblank_token_type = 'b';
13072 my $next_nonblank_token = EMPTY_STRING;
13073 if ( $Ktoken_vars < $K_last ) {
13074 my $Knnb = $Ktoken_vars + 1;
13075 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
13076 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
13077 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
13080 my $break_before_semicolon = ( $Ktoken_vars == $K_first )
13081 && $rOpts_break_at_old_semicolon_breakpoints;
13083 # kill one-line blocks with too many semicolons
13084 $semicolons_before_block_self_destruct--;
13086 $break_before_semicolon
13087 || ( $semicolons_before_block_self_destruct < 0 )
13088 || ( $semicolons_before_block_self_destruct == 0
13089 && $next_nonblank_token_type !~ /^[b\}]$/ )
13092 destroy_one_line_block();
13094 if ( $break_before_semicolon
13095 && $max_index_to_go >= 0 );
13098 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13102 $no_internal_newlines
13103 || ( $rOpts_keep_interior_semicolons
13104 && $Ktoken_vars < $K_last )
13105 || ( $next_nonblank_token eq '}' )
13112 elsif ($is_opening_BLOCK) {
13114 # Tentatively output this token. This is required before
13115 # calling starting_one_line_block. We may have to unstore
13116 # it, though, if we have to break before it.
13117 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13119 # Look ahead to see if we might form a one-line block..
13121 $self->starting_one_line_block( $Ktoken_vars,
13122 $K_last_nonblank_code, $K_last );
13123 $self->clear_breakpoint_undo_stack();
13125 # to simplify the logic below, set a flag to indicate if
13126 # this opening brace is far from the keyword which introduces it
13127 my $keyword_on_same_line = 1;
13129 $max_index_to_go >= 0
13130 && defined($K_last_nonblank_code)
13131 && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
13132 && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
13136 $keyword_on_same_line = 0;
13139 # Break before '{' if requested with -bl or -bli flag
13140 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
13142 # But do not break if this token is welded to the left
13143 if ( $total_weld_count
13144 && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
13149 # Break BEFORE an opening '{' ...
13155 # and we were unable to start looking for a block,
13156 && $index_start_one_line_block == UNDEFINED_INDEX
13158 # or if it will not be on same line as its keyword, so that
13159 # it will be outdented (eval.t, overload.t), and the user
13160 # has not insisted on keeping it on the right
13161 || ( !$keyword_on_same_line
13162 && !$rOpts_opening_brace_always_on_right )
13166 # but only if allowed
13167 unless ($nobreak_BEFORE_BLOCK) {
13169 # since we already stored this token, we must unstore it
13170 $self->unstore_token_to_go();
13172 # then output the line
13173 $self->end_batch() if ( $max_index_to_go >= 0 );
13175 # and now store this token at the start of a new line
13176 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13180 # now output this line
13182 if ( $max_index_to_go >= 0 && !$no_internal_newlines );
13188 elsif ($is_closing_BLOCK) {
13190 my $next_nonblank_token_type = 'b';
13191 my $next_nonblank_token = EMPTY_STRING;
13193 if ( $Ktoken_vars < $K_last ) {
13194 $Knnb = $Ktoken_vars + 1;
13195 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
13196 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
13197 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
13200 # If there is a pending one-line block ..
13201 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13203 # Fix for b1208: if a side comment follows this closing
13204 # brace then we must include its length in the length test
13205 # ... unless the -issl flag is set (fixes b1307-1309).
13206 # Assume a minimum of 1 blank space to the comment.
13207 my $added_length = 0;
13208 if ( $has_side_comment
13209 && !$rOpts_ignore_side_comment_lengths
13210 && $next_nonblank_token_type eq '#' )
13212 $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
13215 # we have to terminate it if..
13218 # it is too long (final length may be different from
13219 # initial estimate). note: must allow 1 space for this
13221 $self->excess_line_length( $index_start_one_line_block,
13222 $max_index_to_go ) + $added_length >= 0
13224 # or if it has too many semicolons
13225 || ( $semicolons_before_block_self_destruct == 0
13226 && defined($K_last_nonblank_code)
13227 && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' )
13230 destroy_one_line_block();
13234 # put a break before this closing curly brace if appropriate
13236 if ( $max_index_to_go >= 0
13237 && !$nobreak_BEFORE_BLOCK
13238 && $index_start_one_line_block == UNDEFINED_INDEX );
13240 # store the closing curly brace
13241 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13243 # ok, we just stored a closing curly brace. Often, but
13244 # not always, we want to end the line immediately.
13245 # So now we have to check for special cases.
13247 # if this '}' successfully ends a one-line block..
13248 my $is_one_line_block = 0;
13249 my $keep_going = 0;
13250 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13252 # Remember the type of token just before the
13253 # opening brace. It would be more general to use
13254 # a stack, but this will work for one-line blocks.
13255 $is_one_line_block =
13256 $types_to_go[$index_start_one_line_block];
13258 # we have to actually make it by removing tentative
13259 # breaks that were set within it
13260 $self->undo_forced_breakpoint_stack(0);
13262 # For -lp, extend the nobreak to include a trailing
13263 # terminal ','. This is because the -lp indentation was
13264 # not known when making one-line blocks, so we may be able
13265 # to move the line back to fit. Otherwise we may create a
13266 # needlessly stranded comma on the next line.
13267 my $iend_nobreak = $max_index_to_go - 1;
13268 if ( $rOpts_line_up_parentheses
13269 && $next_nonblank_token_type eq ','
13270 && $Knnb eq $K_last )
13272 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13274 $self->[_ris_excluded_lp_container_]->{$p_seqno};
13275 $iend_nobreak = $max_index_to_go if ( !$is_excluded );
13278 $self->set_nobreaks( $index_start_one_line_block,
13281 # save starting block indexes so that sub correct_lp can
13282 # check and adjust -lp indentation (c098)
13283 push @{$ri_starting_one_line_block},
13284 $index_start_one_line_block;
13286 # then re-initialize for the next one-line block
13287 destroy_one_line_block();
13289 # then decide if we want to break after the '}' ..
13290 # We will keep going to allow certain brace followers as in:
13291 # do { $ifclosed = 1; last } unless $losing;
13293 # But make a line break if the curly ends a
13294 # significant block:
13297 $is_block_without_semicolon{$block_type}
13299 # Follow users break point for
13300 # one line block types U & G, such as a 'try' block
13301 || $is_one_line_block =~ /^[UG]$/
13302 && $Ktoken_vars == $K_last
13305 # if needless semicolon follows we handle it later
13306 && $next_nonblank_token ne ';'
13310 unless ($no_internal_newlines);
13314 # set string indicating what we need to look for brace follower
13316 if ( $is_if_unless_elsif_else{$block_type} ) {
13317 $rbrace_follower = undef;
13319 elsif ( $block_type eq 'do' ) {
13320 $rbrace_follower = \%is_do_follower;
13322 $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
13325 $rbrace_follower = { ')' => 1 };
13329 # added eval for borris.t
13330 elsif ($is_sort_map_grep_eval{$block_type}
13331 || $is_one_line_block eq 'G' )
13333 $rbrace_follower = undef;
13338 elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
13339 if ($is_one_line_block) {
13341 $rbrace_follower = \%is_anon_sub_1_brace_follower;
13343 # Exceptions to help keep -lp intact, see git #74 ...
13344 # Exception 1: followed by '}' on this line
13345 if ( $Ktoken_vars < $K_last
13346 && $next_nonblank_token eq '}' )
13348 $rbrace_follower = undef;
13352 # Exception 2: followed by '}' on next line if -lp set.
13353 # The -lp requirement allows the formatting to follow
13354 # old breaks when -lp is not used, minimizing changes.
13355 # Fixes issue c087.
13356 elsif ($Ktoken_vars == $K_last
13357 && $rOpts_line_up_parentheses )
13359 my $K_closing_container =
13360 $self->[_K_closing_container_];
13361 my $K_opening_container =
13362 $self->[_K_opening_container_];
13363 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13364 my $Kc = $K_closing_container->{$p_seqno};
13366 $self->[_ris_excluded_lp_container_]->{$p_seqno};
13368 && $rLL->[$Kc]->[_TOKEN_] eq '}'
13370 && $Kc - $Ktoken_vars <= 2 )
13372 $rbrace_follower = undef;
13378 $rbrace_follower = \%is_anon_sub_brace_follower;
13382 # None of the above: specify what can follow a closing
13383 # brace of a block which is not an
13384 # if/elsif/else/do/sort/map/grep/eval
13386 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
13388 $rbrace_follower = \%is_other_brace_follower;
13391 # See if an elsif block is followed by another elsif or else;
13393 if ( $block_type eq 'elsif' ) {
13395 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
13396 $looking_for_else = 1; # ok, check on next line
13399 ## /^(elsif|else)$/
13400 if ( !$is_elsif_else{$next_nonblank_token} ) {
13401 write_logfile_entry("No else block :(\n");
13406 # keep going after certain block types (map,sort,grep,eval)
13407 # added eval for borris.t
13413 # if no more tokens, postpone decision until re-entering
13414 elsif ( ( $next_nonblank_token_type eq 'b' )
13415 && $rOpts_add_newlines )
13417 unless ($rbrace_follower) {
13419 unless ( $no_internal_newlines
13420 || $max_index_to_go < 0 );
13423 elsif ($rbrace_follower) {
13425 unless ( $rbrace_follower->{$next_nonblank_token} ) {
13427 unless ( $no_internal_newlines
13428 || $max_index_to_go < 0 );
13430 $rbrace_follower = undef;
13435 unless ( $no_internal_newlines
13436 || $max_index_to_go < 0 );
13439 } ## end treatment of closing block token
13441 #------------------------------
13442 # handle here_doc target string
13443 #------------------------------
13444 elsif ( $type eq 'h' ) {
13446 # no newlines after seeing here-target
13447 $no_internal_newlines = 2;
13448 ## destroy_one_line_block(); # deleted to fix case b529
13449 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13452 #-----------------------------
13453 # handle all other token types
13454 #-----------------------------
13457 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13459 # break after a label if requested
13460 if ( $rOpts_break_after_labels
13462 && $rOpts_break_after_labels == 1 )
13465 unless ($no_internal_newlines);
13469 # remember previous nonblank, non-comment OUTPUT token
13470 $K_last_nonblank_code = $Ktoken_vars;
13472 } ## end of loop over all tokens in this line
13474 # if there is anything left in the output buffer ...
13475 if ( $max_index_to_go >= 0 ) {
13477 my $type = $rLL->[$K_last]->[_TYPE_];
13478 my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
13480 # we have to flush ..
13483 # if there is a side comment...
13486 # if this line ends in a quote
13487 # NOTE: This is critically important for insuring that quoted
13488 # lines do not get processed by things like -sot and -sct
13491 # if this is a VERSION statement
13492 || $CODE_type eq 'VER'
13494 # to keep a label at the end of a line
13495 || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
13497 # if we have a hard break request
13498 || $break_flag && $break_flag != 2
13500 # if we are instructed to keep all old line breaks
13501 || !$rOpts->{'delete-old-newlines'}
13503 # if this is a line of the form 'use overload'. A break here in
13504 # the input file is a good break because it will allow the
13505 # operators which follow to be formatted well. Without this
13506 # break the formatting with -ci=4 -xci is poor, for example.
13510 # print length $_[2], "\n";
13511 # my ( $x, $y ) = _order(@_);
13512 # Number::Roman->new( int $x + $y );
13515 # my ( $x, $y ) = _order(@_);
13516 # Number::Roman->new( int $x - $y );
13518 || ( $max_index_to_go == 2
13519 && $types_to_go[0] eq 'k'
13520 && $tokens_to_go[0] eq 'use'
13521 && $tokens_to_go[$max_index_to_go] eq 'overload' )
13524 destroy_one_line_block();
13525 $self->end_batch();
13530 # Check for a soft break request
13531 if ( $break_flag && $break_flag == 2 ) {
13532 $self->set_forced_breakpoint($max_index_to_go);
13535 # mark old line breakpoints in current output stream
13536 if ( !$rOpts_ignore_old_breakpoints
13537 || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
13539 my $jobp = $max_index_to_go;
13540 if ( $types_to_go[$max_index_to_go] eq 'b'
13541 && $max_index_to_go > 0 )
13545 $old_breakpoint_to_go[$jobp] = 1;
13551 } ## end sub process_line_of_CODE
13552 } ## end closure process_line_of_CODE
13554 sub tight_paren_follows {
13556 my ( $self, $K_to_go_0, $K_ic ) = @_;
13558 # Input parameters:
13559 # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
13560 # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
13561 # Return parameter:
13562 # false if we want a break after the closing do brace
13563 # true if we do not want a break after the closing do brace
13565 # We are at the closing brace of a 'do' block. See if this brace is
13566 # followed by a closing paren, and if so, set a flag which indicates
13567 # that we do not want a line break between the '}' and ')'.
13569 # xxxxx ( ...... do { ... } ) {
13570 # ^-------looking at this brace, K_ic
13572 # Subscript notation:
13573 # _i = inner container (braces in this case)
13574 # _o = outer container (parens in this case)
13575 # _io = inner opening = '{'
13576 # _ic = inner closing = '}'
13577 # _oo = outer opening = '('
13578 # _oc = outer closing = ')'
13580 # |--K_oo |--K_oc = outer container
13581 # xxxxx ( ...... do { ...... } ) {
13582 # |--K_io |--K_ic = inner container
13584 # In general, the safe thing to do is return a 'false' value
13585 # if the statement appears to be complex. This will have
13586 # the downstream side-effect of opening up outer containers
13587 # to help make complex code readable. But for simpler
13588 # do blocks it can be preferable to keep the code compact
13589 # by returning a 'true' value.
13591 return unless defined($K_ic);
13592 my $rLL = $self->[_rLL_];
13594 # we should only be called at a closing block
13595 my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
13596 return unless ($seqno_i); # shouldn't happen;
13598 # This only applies if the next nonblank is a ')'
13599 my $K_oc = $self->K_next_nonblank($K_ic);
13600 return unless defined($K_oc);
13601 my $token_next = $rLL->[$K_oc]->[_TOKEN_];
13602 return unless ( $token_next eq ')' );
13604 my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
13605 my $K_io = $self->[_K_opening_container_]->{$seqno_i};
13606 my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
13607 return unless ( defined($K_io) && defined($K_oo) );
13609 # RULE 1: Do not break before a closing signature paren
13610 # (regardless of complexity). This is a fix for issue git#22.
13611 # Looking for something like:
13612 # sub xxx ( ... do { ... } ) {
13613 # ^----- next block_type
13614 my $K_test = $self->K_next_nonblank($K_oc);
13615 if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
13616 my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
13618 if ( $self->[_ris_asub_block_]->{$seqno_test}
13619 || $self->[_ris_sub_block_]->{$seqno_test} )
13626 # RULE 2: Break if the contents within braces appears to be 'complex'. We
13627 # base this decision on the number of tokens between braces.
13629 # xxxxx ( ... do { ... } ) {
13632 # Although very simple, it has the advantages of (1) being insensitive to
13633 # changes in lengths of identifier names, (2) easy to understand, implement
13634 # and test. A test case for this is 't/snippets/long_line.in'.
13636 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
13637 # if ( do { $2 !~ /&/ } ) { ... }
13639 # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
13640 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13642 # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
13643 # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
13645 return if ( $K_ic - $K_io > 16 );
13647 # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
13648 # As with the previous rule, we decide based on the token count
13650 # xxxxx ( ... do { ... } ) {
13653 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
13654 # $K_io - $K_oo = 4 [Pass Rule 3]
13655 # if ( do { $2 !~ /&/ } ) { ... }
13657 # Example: $K_ic - $K_oo = 10 [Pass rule 2]
13658 # $K_io - $K_oo = 9 [Pass rule 3]
13659 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13661 return if ( $K_io - $K_oo > 9 );
13663 # RULE 4: Break if we have already broken this batch of output tokens
13664 return if ( $K_oo < $K_to_go_0 );
13666 # RULE 5: Break if input is not on one line
13667 # For example, we will set the flag for the following expression
13668 # written in one line:
13670 # This has: $K_ic - $K_oo = 10 [Pass rule 2]
13671 # $K_io - $K_oo = 8 [Pass rule 3]
13672 # $self->debug( 'Error: ' . do { local $/; <$err> } );
13674 # but we break after the brace if it is on multiple lines on input, since
13675 # the user may prefer it on multiple lines:
13679 # 'Error: ' . do { local $/; <$err> }
13682 if ( !$rOpts_ignore_old_breakpoints ) {
13683 my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
13684 my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
13685 return if ( $iline_oo != $iline_oc );
13688 # OK to keep the paren tight
13690 } ## end sub tight_paren_follows
13692 my %is_brace_semicolon_colon;
13695 my @q = qw( { } ; : );
13696 @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
13699 sub starting_one_line_block {
13701 # after seeing an opening curly brace, look for the closing brace and see
13702 # if the entire block will fit on a line. This routine is not always right
13703 # so a check is made later (at the closing brace) to make sure we really
13704 # have a one-line block. We have to do this preliminary check, though,
13705 # because otherwise we would always break at a semicolon within a one-line
13706 # block if the block contains multiple statements.
13708 my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
13710 my $rbreak_container = $self->[_rbreak_container_];
13711 my $rshort_nested = $self->[_rshort_nested_];
13712 my $rLL = $self->[_rLL_];
13713 my $K_opening_container = $self->[_K_opening_container_];
13714 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13716 # kill any current block - we can only go 1 deep
13717 destroy_one_line_block();
13720 # 1=distance from start of block to opening brace exceeds line length
13725 # This routine should not have been called if there are no tokens in the
13726 # 'to_go' arrays of previously stored tokens. A previous call to
13727 # 'store_token_to_go' should have stored an opening brace. An error here
13728 # indicates that a programming change may have caused a flush operation to
13729 # clean out the previously stored tokens.
13730 if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
13731 Fault("program bug: store_token_to_go called incorrectly\n")
13736 # Return if block should be broken
13737 my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
13738 if ( $rbreak_container->{$type_sequence_j} ) {
13742 my $ris_bli_container = $self->[_ris_bli_container_];
13743 my $is_bli = $ris_bli_container->{$type_sequence_j};
13745 my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
13746 $block_type = EMPTY_STRING unless ( defined($block_type) );
13748 my $previous_nonblank_token = EMPTY_STRING;
13749 my $i_last_nonblank = -1;
13750 if ( defined($K_last_nonblank) ) {
13751 $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
13752 if ( $i_last_nonblank >= 0 ) {
13753 $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
13757 # find the starting keyword for this block (such as 'if', 'else', ...)
13759 $max_index_to_go == 0
13760 ##|| $block_type =~ /^[\{\}\;\:]$/
13761 || $is_brace_semicolon_colon{$block_type}
13762 || substr( $block_type, 0, 7 ) eq 'package'
13765 $i_start = $max_index_to_go;
13768 # the previous nonblank token should start these block types
13770 $i_last_nonblank >= 0
13771 && ( $previous_nonblank_token eq $block_type
13772 || $self->[_ris_asub_block_]->{$type_sequence_j}
13773 || $self->[_ris_sub_block_]->{$type_sequence_j}
13774 || substr( $block_type, -2, 2 ) eq '()' )
13777 $i_start = $i_last_nonblank;
13779 # For signatures and extended syntax ...
13780 # If this brace follows a parenthesized list, we should look back to
13781 # find the keyword before the opening paren because otherwise we might
13782 # form a one line block which stays intact, and cause the parenthesized
13783 # expression to break open. That looks bad.
13784 if ( $tokens_to_go[$i_start] eq ')' ) {
13786 # Find the opening paren
13787 my $K_start = $K_to_go[$i_start];
13788 return 0 unless defined($K_start);
13789 my $seqno = $type_sequence_to_go[$i_start];
13790 return 0 unless ($seqno);
13791 my $K_opening = $K_opening_container->{$seqno};
13792 return 0 unless defined($K_opening);
13793 my $i_opening = $i_start + ( $K_opening - $K_start );
13795 # give up if not on this line
13796 return 0 unless ( $i_opening >= 0 );
13797 $i_start = $i_opening; ##$index_max_forced_break + 1;
13799 # go back one token before the opening paren
13800 if ( $i_start > 0 ) { $i_start-- }
13801 if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
13802 my $lev = $levels_to_go[$i_start];
13803 if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
13807 elsif ( $previous_nonblank_token eq ')' ) {
13809 # For something like "if (xxx) {", the keyword "if" will be
13810 # just after the most recent break. This will be 0 unless
13811 # we have just killed a one-line block and are starting another.
13813 # Note: cannot use inext_index_to_go[] here because that array
13814 # is still being constructed.
13815 $i_start = $index_max_forced_break + 1;
13816 if ( $types_to_go[$i_start] eq 'b' ) {
13820 # Patch to avoid breaking short blocks defined with extended_syntax:
13821 # Strip off any trailing () which was added in the parser to mark
13822 # the opening keyword. For example, in the following
13823 # create( TypeFoo $e) {$bubba}
13824 # the blocktype would be marked as create()
13825 my $stripped_block_type = $block_type;
13826 if ( substr( $block_type, -2, 2 ) eq '()' ) {
13827 $stripped_block_type = substr( $block_type, 0, -2 );
13829 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
13834 # patch for SWITCH/CASE to retain one-line case/when blocks
13835 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
13837 # Note: cannot use inext_index_to_go[] here because that array
13838 # is still being constructed.
13839 $i_start = $index_max_forced_break + 1;
13840 if ( $types_to_go[$i_start] eq 'b' ) {
13843 unless ( $tokens_to_go[$i_start] eq $block_type ) {
13852 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
13854 my $maximum_line_length =
13855 $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
13857 # see if block starting location is too great to even start
13858 if ( $pos > $maximum_line_length ) {
13862 # See if everything to the closing token will fit on one line
13863 # This is part of an update to fix cases b562 .. b983
13864 my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
13865 return 0 unless ( defined($K_closing) );
13866 my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
13867 $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
13869 my $excess = $pos + 1 + $container_length - $maximum_line_length;
13871 # Add a small tolerance for welded tokens (case b901)
13872 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
13876 if ( $excess > 0 ) {
13878 # line is too long... there is no chance of forming a one line block
13879 # if the excess is more than 1 char
13880 return 0 if ( $excess > 1 );
13882 # ... and give up if it is not a one-line block on input.
13883 # note: for a one-line block on input, it may be possible to keep
13884 # it as a one-line block (by removing a needless semicolon ).
13885 my $K_start = $K_to_go[$i_start];
13887 $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
13888 return 0 if ($ldiff);
13891 foreach my $Ki ( $Kj + 1 .. $K_last ) {
13893 # old whitespace could be arbitrarily large, so don't use it
13894 if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
13895 else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
13897 # ignore some small blocks
13898 my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
13899 my $nobreak = $rshort_nested->{$type_sequence_i};
13901 # Return false result if we exceed the maximum line length,
13902 if ( $pos > $maximum_line_length ) {
13906 # keep going for non-containers
13907 elsif ( !$type_sequence_i ) {
13911 # return if we encounter another opening brace before finding the
13913 elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
13914 && $rLL->[$Ki]->[_TYPE_] eq '{'
13915 && $rblock_type_of_seqno->{$type_sequence_i}
13921 # if we find our closing brace..
13922 elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
13923 && $rLL->[$Ki]->[_TYPE_] eq '}'
13924 && $rblock_type_of_seqno->{$type_sequence_i}
13928 # be sure any trailing comment also fits on the line
13929 my $Ki_nonblank = $Ki;
13930 if ( $Ki_nonblank < $K_last ) {
13932 if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
13933 && $Ki_nonblank < $K_last )
13939 # Patch for one-line sort/map/grep/eval blocks with side comments:
13940 # We will ignore the side comment length for sort/map/grep/eval
13941 # because this can lead to statements which change every time
13942 # perltidy is run. Here is an example from Denis Moskowitz which
13943 # oscillates between these two states without this patch:
13946 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13950 ## $_->foo ne 'bar'
13951 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13955 # When the first line is input it gets broken apart by the main
13956 # line break logic in sub process_line_of_CODE.
13957 # When the second line is input it gets recombined by
13958 # process_line_of_CODE and passed to the output routines. The
13959 # output routines (break_long_lines) do not break it apart
13960 # because the bond strengths are set to the highest possible value
13961 # for grep/map/eval/sort blocks, so the first version gets output.
13962 # It would be possible to fix this by changing bond strengths,
13963 # but they are high to prevent errors in older versions of perl.
13964 # See c100 for eval test.
13966 && $rLL->[$K_last]->[_TYPE_] eq '#'
13967 && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
13968 && !$rOpts_ignore_side_comment_lengths
13969 && !$is_sort_map_grep_eval{$block_type}
13970 && $K_last - $Ki_nonblank <= 2 )
13972 # Only include the side comment for if/else/elsif/unless if it
13973 # immediately follows (because the current '$rbrace_follower'
13974 # logic for these will give an immediate brake after these
13975 # closing braces). So for example a line like this
13976 # if (...) { ... } ; # very long comment......
13977 # will already break like this:
13979 # ; # very long comment......
13980 # so we do not need to include the length of the comment, which
13981 # would break the block. Project 'bioperl' has coding like this.
13982 ## !~ /^(if|else|elsif|unless)$/
13983 if ( !$is_if_unless_elsif_else{$block_type}
13984 || $K_last == $Ki_nonblank )
13986 $Ki_nonblank = $K_last;
13987 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
13989 if ( $Ki_nonblank > $Ki + 1 ) {
13991 # source whitespace could be anything, assume
13992 # at least one space before the hash on output
13993 if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
13996 else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
13999 if ( $pos >= $maximum_line_length ) {
14005 # ok, it's a one-line block
14006 create_one_line_block( $i_start, 20 );
14010 # just keep going for other characters
14015 # We haven't hit the closing brace, but there is still space. So the
14016 # question here is, should we keep going to look at more lines in hopes of
14017 # forming a new one-line block, or should we stop right now. The problem
14018 # with continuing is that we will not be able to honor breaks before the
14019 # opening brace if we continue.
14021 # Typically we will want to keep trying to make one-line blocks for things
14022 # like sort/map/grep/eval. But it is not always a good idea to make as
14023 # many one-line blocks as possible, so other types are not done. The user
14024 # can always use -mangle.
14026 # If we want to keep going, we will create a new one-line block.
14027 # The blocks which we can keep going are in a hash, but we never want
14028 # to continue if we are at a '-bli' block.
14029 if ( $want_one_line_block{$block_type} && !$is_bli ) {
14030 create_one_line_block( $i_start, 1 );
14033 } ## end sub starting_one_line_block
14035 sub unstore_token_to_go {
14037 # remove most recent token from output stream
14039 if ( $max_index_to_go > 0 ) {
14040 $max_index_to_go--;
14043 $max_index_to_go = UNDEFINED_INDEX;
14046 } ## end sub unstore_token_to_go
14048 sub compare_indentation_levels {
14050 # Check to see if output line tabbing agrees with input line
14051 # this can be very useful for debugging a script which has an extra
14052 # or missing brace.
14054 my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
14055 return unless ( defined($K_first) );
14057 my $rLL = $self->[_rLL_];
14059 my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
14060 my $radjusted_levels = $self->[_radjusted_levels_];
14061 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
14062 $structural_indentation_level = $radjusted_levels->[$K_first];
14065 # record max structural depth for log file
14066 if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
14067 $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
14068 $self->[_maximum_BLOCK_level_at_line_] = $line_number;
14071 my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
14072 my $is_closing_block =
14074 && $self->[_rblock_type_of_seqno_]->{$type_sequence}
14075 && $rLL->[$K_first]->[_TYPE_] eq '}';
14077 if ( $guessed_indentation_level ne $structural_indentation_level ) {
14078 $self->[_last_tabbing_disagreement_] = $line_number;
14080 if ($is_closing_block) {
14082 if ( !$self->[_in_brace_tabbing_disagreement_] ) {
14083 $self->[_in_brace_tabbing_disagreement_] = $line_number;
14085 if ( !$self->[_first_brace_tabbing_disagreement_] ) {
14086 $self->[_first_brace_tabbing_disagreement_] = $line_number;
14090 if ( !$self->[_in_tabbing_disagreement_] ) {
14091 $self->[_tabbing_disagreement_count_]++;
14093 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
14094 write_logfile_entry(
14095 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
14098 $self->[_in_tabbing_disagreement_] = $line_number;
14099 $self->[_first_tabbing_disagreement_] = $line_number
14100 unless ( $self->[_first_tabbing_disagreement_] );
14105 $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
14107 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
14108 if ($in_tabbing_disagreement) {
14110 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
14111 write_logfile_entry(
14112 "End indentation disagreement from input line $in_tabbing_disagreement\n"
14115 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
14117 write_logfile_entry(
14118 "No further tabbing disagreements will be noted\n");
14121 $self->[_in_tabbing_disagreement_] = 0;
14126 } ## end sub compare_indentation_levels
14128 ###################################################
14129 # CODE SECTION 8: Utilities for setting breakpoints
14130 ###################################################
14132 { ## begin closure set_forced_breakpoint
14134 my @forced_breakpoint_undo_stack;
14136 # These are global vars for efficiency:
14137 # my $forced_breakpoint_count;
14138 # my $forced_breakpoint_undo_count;
14139 # my $index_max_forced_break;
14141 # Break before or after certain tokens based on user settings
14142 my %break_before_or_after_token;
14146 # Updated to use all operators. This fixes case b1054
14147 # Here is the previous simplified version:
14148 ## my @q = qw( . : ? and or xor && || );
14149 my @q = @all_operators;
14152 @break_before_or_after_token{@q} = (1) x scalar(@q);
14155 # This is no longer called - global vars - moved into initialize_batch_vars
14156 sub initialize_forced_breakpoint_vars {
14157 $forced_breakpoint_count = 0;
14158 $index_max_forced_break = UNDEFINED_INDEX;
14159 $forced_breakpoint_undo_count = 0;
14160 ##@forced_breakpoint_undo_stack = (); # not needed
14164 sub set_fake_breakpoint {
14166 # Just bump up the breakpoint count as a signal that there are breaks.
14167 # This is useful if we have breaks but may want to postpone deciding
14168 # where to make them.
14169 $forced_breakpoint_count++;
14173 use constant DEBUG_FORCE => 0;
14175 sub set_forced_breakpoint {
14176 my ( $self, $i ) = @_;
14178 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
14181 # - If the token at index $i is a blank, backup to $i-1 to
14182 # get to the previous nonblank token.
14183 # - For certain tokens, the break may be placed BEFORE the token
14184 # at index $i, depending on user break preference settings.
14185 # - If a break is made after an opening token, then a break will
14186 # also be made before the corresponding closing token.
14188 # Returns '$i_nonblank':
14189 # = index of the token after which the breakpoint was actually placed
14190 # = undef if breakpoint was not set.
14193 if ( !defined($i) || $i < 0 ) {
14195 # Calls with bad index $i are harmless but waste time and should
14196 # be caught and eliminated during code development.
14198 my ( $a, $b, $c ) = caller();
14200 "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
14206 # Break after token $i
14207 $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
14209 # If we break at an opening container..break at the closing
14211 if ( defined($i_nonblank)
14212 && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
14215 $self->set_closing_breakpoint($i_nonblank);
14218 DEBUG_FORCE && do {
14219 my ( $a, $b, $c ) = caller();
14221 "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
14222 if ( !defined($i_nonblank) ) {
14223 $i = EMPTY_STRING unless defined($i);
14224 $msg .= " but could not set break after i='$i'\n";
14228 set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
14230 if ( defined($set_closing) ) {
14232 " Also set closing breakpoint corresponding to this token\n";
14238 return $i_nonblank;
14239 } ## end sub set_forced_breakpoint
14241 sub set_forced_breakpoint_AFTER {
14242 my ( $self, $i ) = @_;
14244 # This routine is only called by sub set_forced_breakpoint and
14245 # sub set_closing_breakpoint.
14247 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
14250 # - If the token at index $i is a blank, backup to $i-1 to
14251 # get to the previous nonblank token.
14252 # - For certain tokens, the break may be placed BEFORE the token
14253 # at index $i, depending on user break preference settings.
14256 # - the index of the token after which the break was set, or
14257 # - undef if no break was set
14259 return unless ( defined($i) && $i >= 0 );
14261 # Back up at a blank so we have a token to examine.
14262 # This was added to fix for cases like b932 involving an '=' break.
14263 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
14265 # Never break between welded tokens
14267 if ( $total_weld_count
14268 && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
14270 my $token = $tokens_to_go[$i];
14271 my $type = $types_to_go[$i];
14273 # For certain tokens, use user settings to decide if we break before or
14275 if ( $break_before_or_after_token{$token}
14276 && ( $type eq $token || $type eq 'k' ) )
14278 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14281 # breaks are forced before 'if' and 'unless'
14282 elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
14284 if ( $i >= 0 && $i <= $max_index_to_go ) {
14285 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14287 if ( $i_nonblank >= 0
14288 && $nobreak_to_go[$i_nonblank] == 0
14289 && !$forced_breakpoint_to_go[$i_nonblank] )
14291 $forced_breakpoint_to_go[$i_nonblank] = 1;
14293 if ( $i_nonblank > $index_max_forced_break ) {
14294 $index_max_forced_break = $i_nonblank;
14296 $forced_breakpoint_count++;
14297 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
14301 return $i_nonblank;
14305 } ## end sub set_forced_breakpoint_AFTER
14307 sub clear_breakpoint_undo_stack {
14309 $forced_breakpoint_undo_count = 0;
14313 use constant DEBUG_UNDOBP => 0;
14315 sub undo_forced_breakpoint_stack {
14317 my ( $self, $i_start ) = @_;
14319 # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
14320 # remove all breakpoints from the top of the 'undo stack' down to and
14321 # including index $i_start.
14323 # The 'undo stack' is a stack of all breakpoints made for a batch of
14326 if ( $i_start < 0 ) {
14328 my ( $a, $b, $c ) = caller();
14330 # Bad call, can only be due to a recent programming change.
14332 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
14337 while ( $forced_breakpoint_undo_count > $i_start ) {
14339 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14340 if ( $i >= 0 && $i <= $max_index_to_go ) {
14341 $forced_breakpoint_to_go[$i] = 0;
14342 $forced_breakpoint_count--;
14344 DEBUG_UNDOBP && do {
14345 my ( $a, $b, $c ) = caller();
14347 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
14351 # shouldn't happen, but not a critical error
14353 DEBUG_UNDOBP && do {
14354 my ( $a, $b, $c ) = caller();
14356 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
14361 } ## end sub undo_forced_breakpoint_stack
14362 } ## end closure set_forced_breakpoint
14364 { ## begin closure set_closing_breakpoint
14366 my %postponed_breakpoint;
14368 sub initialize_postponed_breakpoint {
14369 %postponed_breakpoint = ();
14373 sub has_postponed_breakpoint {
14375 return $postponed_breakpoint{$seqno};
14378 sub set_closing_breakpoint {
14380 # set a breakpoint at a matching closing token
14381 my ( $self, $i_break ) = @_;
14383 if ( $mate_index_to_go[$i_break] >= 0 ) {
14385 # Don't reduce the '2' in the statement below.
14386 # Test files: attrib.t, BasicLyx.pm.html
14387 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
14389 # break before } ] and ), but sub set_forced_breakpoint will decide
14390 # to break before or after a ? and :
14391 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
14392 $self->set_forced_breakpoint_AFTER(
14393 $mate_index_to_go[$i_break] - $inc );
14397 my $type_sequence = $type_sequence_to_go[$i_break];
14398 if ($type_sequence) {
14399 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
14400 $postponed_breakpoint{$type_sequence} = 1;
14404 } ## end sub set_closing_breakpoint
14405 } ## end closure set_closing_breakpoint
14407 #########################################
14408 # CODE SECTION 9: Process batches of code
14409 #########################################
14411 { ## begin closure grind_batch_of_CODE
14413 # The routines in this closure begin the processing of a 'batch' of code.
14415 # A variable to keep track of consecutive nonblank lines so that we can
14416 # insert occasional blanks
14417 my @nonblank_lines_at_depth;
14419 # A variable to remember maximum size of previous batches; this is needed
14420 # by the logical padding routine
14421 my $peak_batch_size;
14424 # variables to keep track of unbalanced containers.
14425 my %saved_opening_indentation;
14426 my @unmatched_opening_indexes_in_this_batch;
14428 sub initialize_grind_batch_of_CODE {
14429 @nonblank_lines_at_depth = ();
14430 $peak_batch_size = 0;
14432 %saved_opening_indentation = ();
14436 # sub grind_batch_of_CODE receives sections of code which are the longest
14437 # possible lines without a break. In other words, it receives what is left
14438 # after applying all breaks forced by blank lines, block comments, side
14439 # comments, pod text, and structural braces. Its job is to break this code
14440 # down into smaller pieces, if necessary, which fit within the maximum
14441 # allowed line length. Then it sends the resulting lines of code on down
14442 # the pipeline to the VerticalAligner package, breaking the code into
14443 # continuation lines as necessary. The batch of tokens are in the "to_go"
14444 # arrays. The name 'grind' is slightly suggestive of a machine continually
14445 # breaking down long lines of code, but mainly it is unique and easy to
14446 # remember and find with an editor search.
14448 # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
14449 # together in the following way:
14451 # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
14452 # combines them into the largest sequences of tokens which might form a new
14454 # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
14457 # So sub 'process_line_of_CODE' builds up the longest possible continuous
14458 # sequences of tokens, regardless of line length, and then
14459 # grind_batch_of_CODE breaks these sequences back down into the new output
14462 # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
14464 use constant DEBUG_GRIND => 0;
14466 sub check_grind_input {
14468 # Check for valid input to sub grind_batch_of_CODE. An error here
14469 # would most likely be due to an error in 'sub store_token_to_go'.
14472 # Be sure there are tokens in the batch
14473 if ( $max_index_to_go < 0 ) {
14475 sub grind incorrectly called with max_index_to_go=$max_index_to_go
14478 my $Klimit = $self->[_Klimit_];
14480 # The local batch tokens must be a continuous part of the global token
14483 foreach my $ii ( 0 .. $max_index_to_go ) {
14487 $KK = $K_to_go[$ii];
14488 if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
14489 $KK = '(undef)' unless defined($KK);
14491 at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
14495 if ( $ii > 0 && $KK != $Km + 1 ) {
14498 Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
14503 } ## end sub check_grind_input
14505 sub grind_batch_of_CODE {
14509 my $this_batch = $self->[_this_batch_];
14512 $self->check_grind_input() if (DEVEL_MODE);
14514 # This routine is only called from sub flush_batch_of_code, so that
14515 # routine is a better spot for debugging.
14516 DEBUG_GRIND && do {
14517 my $token = my $type = EMPTY_STRING;
14518 if ( $max_index_to_go >= 0 ) {
14519 $token = $tokens_to_go[$max_index_to_go];
14520 $type = $types_to_go[$max_index_to_go];
14522 my $output_str = EMPTY_STRING;
14523 if ( $max_index_to_go > 20 ) {
14524 my $mm = $max_index_to_go - 10;
14526 join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
14527 . join( EMPTY_STRING,
14528 @tokens_to_go[ $mm .. $max_index_to_go ] );
14531 $output_str = join EMPTY_STRING,
14532 @tokens_to_go[ 0 .. $max_index_to_go ];
14534 print STDERR <<EOM;
14535 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
14540 return if ( $max_index_to_go < 0 );
14542 $self->set_lp_indentation()
14543 if ($rOpts_line_up_parentheses);
14545 #----------------------------
14546 # Shortcut for block comments
14547 #----------------------------
14549 $max_index_to_go == 0
14550 && $types_to_go[0] eq '#'
14552 # this shortcut does not work for -lp yet
14553 && !$rOpts_line_up_parentheses
14557 $this_batch->[_ri_first_] = [$ibeg];
14558 $this_batch->[_ri_last_] = [$ibeg];
14559 $this_batch->[_peak_batch_size_] = $peak_batch_size;
14560 $this_batch->[_do_not_pad_] = 0;
14561 $this_batch->[_batch_count_] = $batch_count;
14562 $this_batch->[_rix_seqno_controlling_ci_] = [];
14564 $self->convey_batch_to_vertical_aligner();
14566 my $level = $levels_to_go[$ibeg];
14567 $self->[_last_last_line_leading_level_] =
14568 $self->[_last_line_leading_level_];
14569 $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
14570 $self->[_last_line_leading_level_] = $level;
14571 $nonblank_lines_at_depth[$level] = 1;
14579 my $rLL = $self->[_rLL_];
14580 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
14581 my $rwant_container_open = $self->[_rwant_container_open_];
14583 #-------------------------------------------------------
14584 # Loop over the batch to initialize some batch variables
14585 #-------------------------------------------------------
14586 my $comma_count_in_batch = 0;
14587 my $ilast_nonblank = -1;
14589 my @ix_seqno_controlling_ci;
14590 my %comma_arrow_count;
14591 my $comma_arrow_count_contained = 0;
14592 my @unmatched_closing_indexes_in_this_batch;
14594 @unmatched_opening_indexes_in_this_batch = ();
14596 foreach my $i ( 0 .. $max_index_to_go ) {
14597 $iprev_to_go[$i] = $ilast_nonblank;
14598 $inext_to_go[$i] = $i + 1;
14600 my $type = $types_to_go[$i];
14601 if ( $type ne 'b' ) {
14602 if ( $ilast_nonblank >= 0 ) {
14603 $inext_to_go[$ilast_nonblank] = $i;
14605 # just in case there are two blanks in a row (shouldn't
14607 if ( ++$ilast_nonblank < $i ) {
14608 $inext_to_go[$ilast_nonblank] = $i;
14611 $ilast_nonblank = $i;
14613 # This is a good spot to efficiently collect information needed
14614 # for breaking lines...
14616 # gather info needed by sub break_long_lines
14617 if ( $type_sequence_to_go[$i] ) {
14618 my $seqno = $type_sequence_to_go[$i];
14619 my $token = $tokens_to_go[$i];
14621 # remember indexes of any tokens controlling xci
14622 # in this batch. This list is needed by sub undo_ci.
14623 if ( $ris_seqno_controlling_ci->{$seqno} ) {
14624 push @ix_seqno_controlling_ci, $i;
14627 if ( $is_opening_sequence_token{$token} ) {
14628 if ( $rwant_container_open->{$seqno} ) {
14629 $self->set_forced_breakpoint($i);
14631 push @unmatched_opening_indexes_in_this_batch, $i;
14632 if ( $type eq '?' ) {
14633 push @colon_list, $type;
14636 elsif ( $is_closing_sequence_token{$token} ) {
14638 if ( $i > 0 && $rwant_container_open->{$seqno} ) {
14639 $self->set_forced_breakpoint( $i - 1 );
14643 pop @unmatched_opening_indexes_in_this_batch;
14644 if ( defined($i_mate) && $i_mate >= 0 ) {
14645 if ( $type_sequence_to_go[$i_mate] ==
14646 $type_sequence_to_go[$i] )
14648 $mate_index_to_go[$i] = $i_mate;
14649 $mate_index_to_go[$i_mate] = $i;
14650 if ( $comma_arrow_count{$seqno} ) {
14651 $comma_arrow_count_contained +=
14652 $comma_arrow_count{$seqno};
14656 push @unmatched_opening_indexes_in_this_batch,
14658 push @unmatched_closing_indexes_in_this_batch,
14663 push @unmatched_closing_indexes_in_this_batch, $i;
14665 if ( $type eq ':' ) {
14666 push @colon_list, $type;
14668 } ## end elsif ( $is_closing_sequence_token...)
14670 } ## end if ($seqno)
14672 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
14673 elsif ( $tokens_to_go[$i] eq '=>' ) {
14674 if (@unmatched_opening_indexes_in_this_batch) {
14675 my $j = $unmatched_opening_indexes_in_this_batch[-1];
14676 my $seqno = $type_sequence_to_go[$j];
14677 $comma_arrow_count{$seqno}++;
14680 } ## end if ( $type ne 'b' )
14681 } ## end for ( my $i = 0 ; $i <=...)
14683 my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
14684 @unmatched_closing_indexes_in_this_batch;
14686 #------------------------
14687 # Set special breakpoints
14688 #------------------------
14689 # If this line ends in a code block brace, set breaks at any
14690 # previous closing code block braces to breakup a chain of code
14691 # blocks on one line. This is very rare but can happen for
14692 # user-defined subs. For example we might be looking at this:
14693 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
14694 my $saw_good_break = 0; # flag to force breaks even if short line
14697 # looking for opening or closing block brace
14698 $block_type_to_go[$max_index_to_go]
14700 # never any good breaks if just one token
14701 && $max_index_to_go > 0
14703 # but not one of these which are never duplicated on a line:
14704 # until|while|for|if|elsif|else
14705 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
14709 my $lev = $nesting_depth_to_go[$max_index_to_go];
14711 # Walk backwards from the end and
14712 # set break at any closing block braces at the same level.
14713 # But quit if we are not in a chain of blocks.
14714 foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
14715 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
14716 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
14718 if ( $block_type_to_go[$i] ) {
14719 if ( $tokens_to_go[$i] eq '}' ) {
14720 $self->set_forced_breakpoint($i);
14721 $saw_good_break = 1;
14725 # quit if we see anything besides words, function, blanks
14727 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
14731 #-----------------------------------------------
14732 # insertion of any blank lines before this batch
14733 #-----------------------------------------------
14736 my $imax = $max_index_to_go;
14738 # trim any blank tokens
14739 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14740 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14742 if ( $imin > $imax ) {
14744 my $K0 = $K_to_go[0];
14745 my $lno = EMPTY_STRING;
14746 if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
14748 Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
14754 my $last_line_leading_type = $self->[_last_line_leading_type_];
14755 my $last_line_leading_level = $self->[_last_line_leading_level_];
14756 my $last_last_line_leading_level =
14757 $self->[_last_last_line_leading_level_];
14759 # add a blank line before certain key types but not after a comment
14760 if ( $last_line_leading_type ne '#' ) {
14761 my $want_blank = 0;
14762 my $leading_token = $tokens_to_go[$imin];
14763 my $leading_type = $types_to_go[$imin];
14765 # break before certain key blocks except one-liners
14766 if ( $leading_type eq 'k' ) {
14767 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
14768 $want_blank = $rOpts->{'blank-lines-before-subs'}
14769 if ( terminal_type_i( $imin, $imax ) ne '}' );
14772 # Break before certain block types if we haven't had a
14773 # break at this level for a while. This is the
14774 # difficult decision..
14775 elsif ($last_line_leading_type ne 'b'
14776 && $is_if_unless_while_until_for_foreach{$leading_token} )
14778 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
14779 if ( !defined($lc) ) { $lc = 0 }
14781 # patch for RT #128216: no blank line inserted at a level
14783 if ( $levels_to_go[$imin] != $last_line_leading_level ) {
14788 $rOpts->{'blanks-before-blocks'}
14789 && $lc >= $rOpts->{'long-block-line-count'}
14790 && $self->consecutive_nonblank_lines() >=
14791 $rOpts->{'long-block-line-count'}
14792 && terminal_type_i( $imin, $imax ) ne '}';
14796 # blank lines before subs except declarations and one-liners
14797 elsif ( $leading_type eq 'i' ) {
14802 substr( $leading_token, 0, 3 ) eq 'sub'
14803 || $rOpts_sub_alias_list
14807 && $leading_token =~ /$SUB_PATTERN/
14810 $want_blank = $rOpts->{'blank-lines-before-subs'}
14811 if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
14814 # break before all package declarations
14815 elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
14816 $want_blank = $rOpts->{'blank-lines-before-packages'};
14820 # Check for blank lines wanted before a closing brace
14821 elsif ( $leading_token eq '}' ) {
14822 if ( $rOpts->{'blank-lines-before-closing-block'}
14823 && $block_type_to_go[$imin]
14824 && $block_type_to_go[$imin] =~
14825 /$blank_lines_before_closing_block_pattern/ )
14827 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
14828 if ( $nblanks > $want_blank ) {
14829 $want_blank = $nblanks;
14836 # future: send blank line down normal path to VerticalAligner
14837 $self->flush_vertical_aligner();
14838 my $file_writer_object = $self->[_file_writer_object_];
14839 $file_writer_object->require_blank_code_lines($want_blank);
14843 # update blank line variables and count number of consecutive
14844 # non-blank, non-comment lines at this level
14845 $last_last_line_leading_level = $last_line_leading_level;
14846 $last_line_leading_level = $levels_to_go[$imin];
14847 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
14848 $last_line_leading_type = $types_to_go[$imin];
14849 if ( $last_line_leading_level == $last_last_line_leading_level
14850 && $last_line_leading_type ne 'b'
14851 && $last_line_leading_type ne '#'
14852 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
14854 $nonblank_lines_at_depth[$last_line_leading_level]++;
14857 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
14860 $self->[_last_line_leading_type_] = $last_line_leading_type;
14861 $self->[_last_line_leading_level_] = $last_line_leading_level;
14862 $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
14864 #--------------------------
14865 # scan lists and long lines
14866 #--------------------------
14868 # Flag to remember if we called sub 'pad_array_to_go'.
14869 # Some routines (break_lists(), break_long_lines() ) need some
14870 # extra tokens added at the end of the batch. Most batches do not
14871 # use these routines, so we will avoid calling 'pad_array_to_go'
14872 # unless it is needed.
14873 my $called_pad_array_to_go;
14875 # set all forced breakpoints for good list formatting
14876 my $is_long_line = $max_index_to_go > 0
14877 && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
14879 my $old_line_count_in_batch = 1;
14880 if ( $max_index_to_go > 0 ) {
14881 my $Kbeg = $K_to_go[0];
14882 my $Kend = $K_to_go[$max_index_to_go];
14883 $old_line_count_in_batch +=
14884 $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
14887 my $rbond_strength_bias = [];
14890 || $old_line_count_in_batch > 1
14892 # must always call break_lists() with unbalanced batches because
14893 # it is maintaining some stacks
14894 || $is_unbalanced_batch
14896 # call break_lists if we might want to break at commas
14898 $comma_count_in_batch
14899 && ( $rOpts_maximum_fields_per_table > 0
14900 && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
14901 || $rOpts_comma_arrow_breakpoints == 0 )
14904 # call break_lists if user may want to break open some one-line
14906 || ( $comma_arrow_count_contained
14907 && $rOpts_comma_arrow_breakpoints != 3 )
14910 # add a couple of extra terminal blank tokens
14911 $self->pad_array_to_go();
14912 $called_pad_array_to_go = 1;
14914 my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
14915 $saw_good_break ||= $sgb;
14918 # let $ri_first and $ri_last be references to lists of
14919 # first and last tokens of line fragments to output..
14920 my ( $ri_first, $ri_last );
14922 #-------------------------
14923 # write a single line if..
14924 #-------------------------
14927 # we aren't allowed to add any newlines
14928 !$rOpts_add_newlines
14933 # this line is 'short'
14936 # and we didn't see a good breakpoint
14937 && !$saw_good_break
14939 # and we don't already have an interior breakpoint
14940 && !$forced_breakpoint_count
14944 @{$ri_first} = ($imin);
14945 @{$ri_last} = ($imax);
14948 #-----------------------------
14949 # otherwise use multiple lines
14950 #-----------------------------
14953 # add a couple of extra terminal blank tokens if we haven't
14955 $self->pad_array_to_go() unless ($called_pad_array_to_go);
14957 ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
14958 $self->break_long_lines( $saw_good_break, \@colon_list,
14959 $rbond_strength_bias );
14961 $self->break_all_chain_tokens( $ri_first, $ri_last );
14963 $self->break_equals( $ri_first, $ri_last );
14965 # now we do a correction step to clean this up a bit
14966 # (The only time we would not do this is for debugging)
14967 $self->recombine_breakpoints( $ri_first, $ri_last,
14968 $rbond_strength_to_go )
14969 if ( $rOpts_recombine && @{$ri_first} > 1 );
14971 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
14975 $self->insert_breaks_before_list_opening_containers( $ri_first,
14977 if ( %break_before_container_types && $max_index_to_go > 0 );
14979 #-------------------
14980 # -lp corrector step
14981 #-------------------
14982 my $do_not_pad = 0;
14983 if ($rOpts_line_up_parentheses) {
14984 $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last );
14987 #--------------------------
14988 # unmask phantom semicolons
14989 #--------------------------
14990 if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
14994 if ( $want_left_space{';'} != WS_NO ) {
14998 $tokens_to_go[$i] = $tok;
14999 $token_lengths_to_go[$i] = $tok_len;
15000 my $KK = $K_to_go[$i];
15001 $rLL->[$KK]->[_TOKEN_] = $tok;
15002 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
15003 my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
15004 $self->note_added_semicolon($line_number);
15006 foreach ( $imax .. $max_index_to_go ) {
15007 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
15011 if ( $rOpts_one_line_block_semicolons == 0 ) {
15012 $self->delete_one_line_semicolons( $ri_first, $ri_last );
15015 #--------------------
15016 # ship this batch out
15017 #--------------------
15018 $this_batch->[_ri_first_] = $ri_first;
15019 $this_batch->[_ri_last_] = $ri_last;
15020 $this_batch->[_peak_batch_size_] = $peak_batch_size;
15021 $this_batch->[_do_not_pad_] = $do_not_pad;
15022 $this_batch->[_batch_count_] = $batch_count;
15023 $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
15025 $self->convey_batch_to_vertical_aligner();
15027 #-------------------------------------------------------------------
15028 # Write requested number of blank lines after an opening block brace
15029 #-------------------------------------------------------------------
15030 if ($rOpts_blank_lines_after_opening_block) {
15032 if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
15034 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
15039 if ( $types_to_go[$iterm] eq '{'
15040 && $block_type_to_go[$iterm]
15041 && $block_type_to_go[$iterm] =~
15042 /$blank_lines_after_opening_block_pattern/ )
15044 my $nblanks = $rOpts_blank_lines_after_opening_block;
15045 $self->flush_vertical_aligner();
15046 my $file_writer_object = $self->[_file_writer_object_];
15047 $file_writer_object->require_blank_code_lines($nblanks);
15051 # Remember the largest batch size processed. This is needed by the
15052 # logical padding routine to avoid padding the first nonblank token
15053 if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
15054 $peak_batch_size = $max_index_to_go;
15058 } ## end sub grind_batch_of_CODE
15060 sub save_opening_indentation {
15062 # This should be called after each batch of tokens is output. It
15063 # saves indentations of lines of all unmatched opening tokens.
15064 # These will be used by sub get_opening_indentation.
15066 my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
15068 # QW INDENTATION PATCH 1:
15069 # Also save indentation for multiline qw quotes
15071 my $seqno_qw_opening;
15072 if ( $types_to_go[$max_index_to_go] eq 'q' ) {
15073 my $KK = $K_to_go[$max_index_to_go];
15074 $seqno_qw_opening =
15075 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
15076 if ($seqno_qw_opening) {
15077 push @i_qw, $max_index_to_go;
15081 # we need to save indentations of any unmatched opening tokens
15082 # in this batch because we may need them in a subsequent batch.
15083 foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
15085 my $seqno = $type_sequence_to_go[$_];
15088 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
15089 $seqno = $seqno_qw_opening;
15094 $seqno = 'UNKNOWN';
15098 $saved_opening_indentation{$seqno} = [
15099 lookup_opening_indentation(
15100 $_, $ri_first, $ri_last, $rindentation_list
15105 } ## end sub save_opening_indentation
15107 sub get_saved_opening_indentation {
15109 my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
15112 if ( $saved_opening_indentation{$seqno} ) {
15113 ( $indent, $offset, $is_leading ) =
15114 @{ $saved_opening_indentation{$seqno} };
15119 # some kind of serious error it doesn't exist
15120 # (example is badfile.t)
15122 return ( $indent, $offset, $is_leading, $exists );
15123 } ## end sub get_saved_opening_indentation
15124 } ## end closure grind_batch_of_CODE
15126 sub lookup_opening_indentation {
15128 # get the indentation of the line in the current output batch
15129 # which output a selected opening token
15132 # $i_opening - index of an opening token in the current output batch
15133 # whose line indentation we need
15134 # $ri_first - reference to list of the first index $i for each output
15135 # line in this batch
15136 # $ri_last - reference to list of the last index $i for each output line
15138 # $rindentation_list - reference to a list containing the indentation
15139 # used for each line. (NOTE: the first slot in
15140 # this list is the last returned line number, and this is
15141 # followed by the list of indentations).
15144 # -the indentation of the line which contained token $i_opening
15145 # -and its offset (number of columns) from the start of the line
15147 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
15149 if ( !@{$ri_last} ) {
15151 # An error here implies a bug introduced by a recent program change.
15152 # Every batch of code has lines, so this should never happen.
15154 Fault("Error in opening_indentation: no lines");
15156 return ( 0, 0, 0 );
15159 my $nline = $rindentation_list->[0]; # line number of previous lookup
15161 # reset line location if necessary
15162 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
15164 # find the correct line
15165 unless ( $i_opening > $ri_last->[-1] ) {
15166 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
15169 # Error - token index is out of bounds - shouldn't happen
15170 # A program bug has been introduced in one of the calling routines.
15171 # We better stop here.
15173 my $i_last_line = $ri_last->[-1];
15176 Program bug in call to lookup_opening_indentation - index out of range
15177 called with index i_opening=$i_opening > $i_last_line = max index of last line
15178 This batch has max index = $max_index_to_go,
15181 $nline = $#{$ri_last};
15184 $rindentation_list->[0] =
15185 $nline; # save line number to start looking next call
15186 my $ibeg = $ri_start->[$nline];
15187 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
15188 my $is_leading = ( $ibeg == $i_opening );
15189 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
15190 } ## end sub lookup_opening_indentation
15192 sub terminal_type_i {
15194 # returns type of last token on this line (terminal token), as follows:
15195 # returns # for a full-line comment
15196 # returns ' ' for a blank line
15197 # otherwise returns final token type
15199 my ( $ibeg, $iend ) = @_;
15201 # Start at the end and work backwards
15203 my $type_i = $types_to_go[$i];
15205 # Check for side comment
15206 if ( $type_i eq '#' ) {
15208 if ( $i < $ibeg ) {
15209 return wantarray ? ( $type_i, $ibeg ) : $type_i;
15211 $type_i = $types_to_go[$i];
15214 # Skip past a blank
15215 if ( $type_i eq 'b' ) {
15217 if ( $i < $ibeg ) {
15218 return wantarray ? ( $type_i, $ibeg ) : $type_i;
15220 $type_i = $types_to_go[$i];
15223 # Found it..make sure it is a BLOCK termination,
15224 # but hide a terminal } after sort/map/grep/eval/do because it is not
15225 # necessarily the end of the line. (terminal.t)
15226 my $block_type = $block_type_to_go[$i];
15230 || $is_sort_map_grep_eval_do{$block_type} )
15235 return wantarray ? ( $type_i, $i ) : $type_i;
15236 } ## end sub terminal_type_i
15238 sub pad_array_to_go {
15240 # To simplify coding in break_lists and set_bond_strengths, it helps to
15241 # create some extra blank tokens at the end of the arrays. We also add
15242 # some undef's to help guard against using invalid data.
15244 $K_to_go[ $max_index_to_go + 1 ] = undef;
15245 $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
15246 $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
15247 $tokens_to_go[ $max_index_to_go + 3 ] = undef;
15248 $types_to_go[ $max_index_to_go + 1 ] = 'b';
15249 $types_to_go[ $max_index_to_go + 2 ] = 'b';
15250 $types_to_go[ $max_index_to_go + 3 ] = undef;
15251 $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
15252 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
15253 $nesting_depth_to_go[$max_index_to_go];
15256 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
15257 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
15259 # Nesting depths are set to be >=0 in sub write_line, so it should
15260 # not be possible to get here unless the code has a bracing error
15261 # which leaves a closing brace with zero nesting depth.
15262 unless ( get_saw_brace_error() ) {
15265 Program bug in pad_array_to_go: hit nesting error which should have been caught
15271 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
15276 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
15277 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
15280 } ## end sub pad_array_to_go
15282 sub break_all_chain_tokens {
15284 # scan the current breakpoints looking for breaks at certain "chain
15285 # operators" (. : && || + etc) which often occur repeatedly in a long
15286 # statement. If we see a break at any one, break at all similar tokens
15287 # within the same container.
15289 my ( $self, $ri_left, $ri_right ) = @_;
15291 my %saw_chain_type;
15292 my %left_chain_type;
15293 my %right_chain_type;
15294 my %interior_chain_type;
15295 my $nmax = @{$ri_right} - 1;
15297 # scan the left and right end tokens of all lines
15299 for my $n ( 0 .. $nmax ) {
15300 my $il = $ri_left->[$n];
15301 my $ir = $ri_right->[$n];
15302 my $typel = $types_to_go[$il];
15303 my $typer = $types_to_go[$ir];
15304 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
15305 $typer = '+' if ( $typer eq '-' );
15306 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
15307 $typer = '*' if ( $typer eq '/' );
15309 my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
15310 my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
15311 if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
15312 next if ( $typel eq '?' );
15313 push @{ $left_chain_type{$keyl} }, $il;
15314 $saw_chain_type{$keyl} = 1;
15317 if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
15318 next if ( $typer eq '?' );
15319 push @{ $right_chain_type{$keyr} }, $ir;
15320 $saw_chain_type{$keyr} = 1;
15324 return unless $count;
15326 # now look for any interior tokens of the same types
15328 for my $n ( 0 .. $nmax ) {
15329 my $il = $ri_left->[$n];
15330 my $ir = $ri_right->[$n];
15331 foreach my $i ( $il + 1 .. $ir - 1 ) {
15332 my $type = $types_to_go[$i];
15333 my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
15334 $key = '+' if ( $key eq '-' );
15335 $key = '*' if ( $key eq '/' );
15336 if ( $saw_chain_type{$key} ) {
15337 push @{ $interior_chain_type{$key} }, $i;
15342 return unless $count;
15344 # now make a list of all new break points
15347 # loop over all chain types
15348 foreach my $key ( keys %saw_chain_type ) {
15350 # quit if just ONE continuation line with leading . For example--
15351 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
15353 last if ( $nmax == 1 && $key =~ /^[\.\+]$/ );
15355 # loop over all interior chain tokens
15356 foreach my $itest ( @{ $interior_chain_type{$key} } ) {
15358 # loop over all left end tokens of same type
15359 if ( $left_chain_type{$key} ) {
15360 next if $nobreak_to_go[ $itest - 1 ];
15361 foreach my $i ( @{ $left_chain_type{$key} } ) {
15362 next unless $self->in_same_container_i( $i, $itest );
15363 push @insert_list, $itest - 1;
15365 # Break at matching ? if this : is at a different level.
15366 # For example, the ? before $THRf_DEAD in the following
15367 # should get a break if its : gets a break.
15370 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
15371 # : ( $_ & 4 ) ? $THRf_R_DETACHED
15372 # : $THRf_R_JOINABLE;
15374 && $levels_to_go[$i] != $levels_to_go[$itest] )
15376 my $i_question = $mate_index_to_go[$itest];
15377 if ( $i_question > 0 ) {
15378 push @insert_list, $i_question - 1;
15385 # loop over all right end tokens of same type
15386 if ( $right_chain_type{$key} ) {
15387 next if $nobreak_to_go[$itest];
15388 foreach my $i ( @{ $right_chain_type{$key} } ) {
15389 next unless $self->in_same_container_i( $i, $itest );
15390 push @insert_list, $itest;
15392 # break at matching ? if this : is at a different level
15394 && $levels_to_go[$i] != $levels_to_go[$itest] )
15396 my $i_question = $mate_index_to_go[$itest];
15397 if ( $i_question >= 0 ) {
15398 push @insert_list, $i_question;
15407 # insert any new break points
15408 if (@insert_list) {
15409 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15412 } ## end sub break_all_chain_tokens
15414 sub insert_additional_breaks {
15416 # this routine will add line breaks at requested locations after
15417 # sub break_long_lines has made preliminary breaks.
15419 my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
15422 my $line_number = 0;
15423 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
15425 next if ( $nobreak_to_go[$i_break_left] );
15427 $i_f = $ri_first->[$line_number];
15428 $i_l = $ri_last->[$line_number];
15429 while ( $i_break_left >= $i_l ) {
15432 # shouldn't happen unless caller passes bad indexes
15433 if ( $line_number >= @{$ri_last} ) {
15436 Non-fatal program bug: couldn't set break at $i_break_left
15441 $i_f = $ri_first->[$line_number];
15442 $i_l = $ri_last->[$line_number];
15445 # Do not leave a blank at the end of a line; back up if necessary
15446 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
15448 my $i_break_right = $inext_to_go[$i_break_left];
15449 if ( $i_break_left >= $i_f
15450 && $i_break_left < $i_l
15451 && $i_break_right > $i_f
15452 && $i_break_right <= $i_l )
15454 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
15455 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
15459 } ## end sub insert_additional_breaks
15461 { ## begin closure in_same_container_i
15462 my $ris_break_token;
15463 my $ris_comma_token;
15467 # all cases break on seeing commas at same level
15470 @{$ris_comma_token}{@q} = (1) x scalar(@q);
15472 # Non-ternary text also breaks on seeing any of qw(? : || or )
15473 # Example: we would not want to break at any of these .'s
15474 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
15475 push @q, qw( or || ? : );
15476 @{$ris_break_token}{@q} = (1) x scalar(@q);
15479 sub in_same_container_i {
15481 # Check to see if tokens at i1 and i2 are in the same container, and
15482 # not separated by certain characters: => , ? : || or
15483 # This is an interface between the _to_go arrays to the rLL array
15484 my ( $self, $i1, $i2 ) = @_;
15487 my $parent_seqno_1 = $parent_seqno_to_go[$i1];
15488 return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
15490 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
15491 my $K1 = $K_to_go[$i1];
15492 my $K2 = $K_to_go[$i2];
15493 my $rLL = $self->[_rLL_];
15495 my $depth_1 = $nesting_depth_to_go[$i1];
15496 return if ( $depth_1 < 0 );
15498 # Shouldn't happen since i1 and i2 have same parent:
15499 return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
15501 # Select character set to scan for
15502 my $type_1 = $types_to_go[$i1];
15503 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
15505 # Fast preliminary loop to verify that tokens are in the same container
15508 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
15509 last if !defined($KK);
15510 last if ( $KK >= $K2 );
15511 my $ii = $i1 + $KK - $K1;
15512 my $depth_i = $nesting_depth_to_go[$ii];
15513 return if ( $depth_i < $depth_1 );
15514 next if ( $depth_i > $depth_1 );
15515 if ( $type_1 ne ':' ) {
15516 my $tok_i = $tokens_to_go[$ii];
15517 return if ( $tok_i eq '?' || $tok_i eq ':' );
15521 # Slow loop checking for certain characters
15523 #-----------------------------------------------------
15524 # This is potentially a slow routine and not critical.
15525 # For safety just give up for large differences.
15526 # See test file 'infinite_loop.txt'
15527 #-----------------------------------------------------
15528 return if ( $i2 - $i1 > 200 );
15530 foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
15532 my $depth_i = $nesting_depth_to_go[$ii];
15533 next if ( $depth_i > $depth_1 );
15534 return if ( $depth_i < $depth_1 );
15535 my $tok_i = $tokens_to_go[$ii];
15536 return if ( $rbreak->{$tok_i} );
15539 } ## end sub in_same_container_i
15540 } ## end closure in_same_container_i
15544 # Look for assignment operators that could use a breakpoint.
15545 # For example, in the following snippet
15547 # $HOME = $ENV{HOME}
15550 # || die "no home directory for user $<";
15552 # we could break at the = to get this, which is a little nicer:
15557 # || die "no home directory for user $<";
15559 # The logic here follows the logic in set_logical_padding, which
15560 # will add the padding in the second line to improve alignment.
15562 my ( $self, $ri_left, $ri_right ) = @_;
15563 my $nmax = @{$ri_right} - 1;
15564 return unless ( $nmax >= 2 );
15566 # scan the left ends of first two lines
15567 my $tokbeg = EMPTY_STRING;
15569 for my $n ( 1 .. 2 ) {
15570 my $il = $ri_left->[$n];
15571 my $typel = $types_to_go[$il];
15572 my $tokenl = $tokens_to_go[$il];
15573 my $keyl = $typel eq 'k' ? $tokenl : $typel;
15575 my $has_leading_op = $is_chain_operator{$keyl};
15576 return unless ($has_leading_op);
15579 unless ( $tokenl eq $tokbeg
15580 && $nesting_depth_to_go[$il] eq $depth_beg );
15583 $depth_beg = $nesting_depth_to_go[$il];
15586 # now look for any interior tokens of the same types
15587 my $il = $ri_left->[0];
15588 my $ir = $ri_right->[0];
15590 # now make a list of all new break points
15592 foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
15593 my $type = $types_to_go[$i];
15594 if ( $is_assignment{$type}
15595 && $nesting_depth_to_go[$i] eq $depth_beg )
15597 if ( $want_break_before{$type} ) {
15598 push @insert_list, $i - 1;
15601 push @insert_list, $i;
15606 # Break after a 'return' followed by a chain of operators
15607 # return ( $^O !~ /win32|dos/i )
15608 # && ( $^O ne 'VMS' )
15609 # && ( $^O ne 'OS2' )
15610 # && ( $^O ne 'MacOS' );
15613 # ( $^O !~ /win32|dos/i )
15614 # && ( $^O ne 'VMS' )
15615 # && ( $^O ne 'OS2' )
15616 # && ( $^O ne 'MacOS' );
15618 if ( $types_to_go[$i] eq 'k'
15619 && $tokens_to_go[$i] eq 'return'
15621 && $nesting_depth_to_go[$i] eq $depth_beg )
15623 push @insert_list, $i;
15626 return unless (@insert_list);
15628 # One final check...
15629 # scan second and third lines and be sure there are no assignments
15630 # we want to avoid breaking at an = to make something like this:
15632 # $html_icons{"$type-$state"}
15633 # or $icon = $html_icons{$type}
15634 # or $icon = $html_icons{$state} )
15635 for my $n ( 1 .. 2 ) {
15636 my $il_n = $ri_left->[$n];
15637 my $ir_n = $ri_right->[$n];
15638 foreach my $i ( $il_n + 1 .. $ir_n ) {
15639 my $type = $types_to_go[$i];
15641 if ( $is_assignment{$type}
15642 && $nesting_depth_to_go[$i] eq $depth_beg );
15646 # ok, insert any new break point
15647 if (@insert_list) {
15648 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15651 } ## end sub break_equals
15653 { ## begin closure recombine_breakpoints
15655 # This routine is called once per batch to see if it would be better
15656 # to combine some of the lines into which the batch has been broken.
15667 @is_amp_amp{@q} = (1) x scalar(@q);
15669 @q = qw( + - * / );
15670 @is_math_op{@q} = (1) x scalar(@q);
15673 @is_plus_minus{@q} = (1) x scalar(@q);
15676 @is_mult_div{@q} = (1) x scalar(@q);
15679 sub Debug_dump_breakpoints {
15681 # Debug routine to dump current breakpoints...not normally called
15682 # We are given indexes to the current lines:
15683 # $ri_beg = ref to array of BEGinning indexes of each line
15684 # $ri_end = ref to array of ENDing indexes of each line
15685 my ( $self, $ri_beg, $ri_end, $msg ) = @_;
15686 print STDERR "----Dumping breakpoints from: $msg----\n";
15687 for my $n ( 0 .. @{$ri_end} - 1 ) {
15688 my $ibeg = $ri_beg->[$n];
15689 my $iend = $ri_end->[$n];
15690 my $text = EMPTY_STRING;
15691 foreach my $i ( $ibeg .. $iend ) {
15692 $text .= $tokens_to_go[$i];
15694 print STDERR "$n ($ibeg:$iend) $text\n";
15696 print STDERR "----\n";
15698 } ## end sub Debug_dump_breakpoints
15700 sub delete_one_line_semicolons {
15702 my ( $self, $ri_beg, $ri_end ) = @_;
15703 my $rLL = $self->[_rLL_];
15704 my $K_opening_container = $self->[_K_opening_container_];
15706 # Walk down the lines of this batch and delete any semicolons
15707 # terminating one-line blocks;
15708 my $nmax = @{$ri_end} - 1;
15710 foreach my $n ( 0 .. $nmax ) {
15711 my $i_beg = $ri_beg->[$n];
15712 my $i_e = $ri_end->[$n];
15713 my $K_beg = $K_to_go[$i_beg];
15714 my $K_e = $K_to_go[$i_e];
15716 my $type_end = $rLL->[$K_end]->[_TYPE_];
15717 if ( $type_end eq '#' ) {
15718 $K_end = $self->K_previous_nonblank($K_end);
15719 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
15722 # we are looking for a line ending in closing brace
15724 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
15726 # ...and preceded by a semicolon on the same line
15727 my $K_semicolon = $self->K_previous_nonblank($K_end);
15728 next unless defined($K_semicolon);
15729 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
15730 next if ( $i_semicolon <= $i_beg );
15731 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
15733 # Safety check - shouldn't happen - not critical
15734 # This is not worth throwing a Fault, except in DEVEL_MODE
15735 if ( $types_to_go[$i_semicolon] ne ';' ) {
15737 && Fault("unexpected type looking for semicolon");
15741 # ... with the corresponding opening brace on the same line
15742 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
15743 my $K_opening = $K_opening_container->{$type_sequence};
15744 next unless ( defined($K_opening) );
15745 my $i_opening = $i_beg + ( $K_opening - $K_beg );
15746 next if ( $i_opening < $i_beg );
15748 # ... and only one semicolon between these braces
15749 my $semicolon_count = 0;
15750 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
15751 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
15752 $semicolon_count++;
15756 next if ($semicolon_count);
15758 # ...ok, then make the semicolon invisible
15759 my $len = $token_lengths_to_go[$i_semicolon];
15760 $tokens_to_go[$i_semicolon] = EMPTY_STRING;
15761 $token_lengths_to_go[$i_semicolon] = 0;
15762 $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
15763 $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
15764 foreach ( $i_semicolon .. $max_index_to_go ) {
15765 $summed_lengths_to_go[ $_ + 1 ] -= $len;
15769 } ## end sub delete_one_line_semicolons
15771 use constant DEBUG_RECOMBINE => 0;
15773 sub recombine_breakpoints {
15775 # We are given indexes to the current lines:
15776 # $ri_beg = ref to array of BEGinning indexes of each line
15777 # $ri_end = ref to array of ENDing indexes of each line
15778 my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
15780 # sub break_long_lines is very liberal in setting line breaks
15781 # for long lines, always setting breaks at good breakpoints, even
15782 # when that creates small lines. Sometimes small line fragments
15783 # are produced which would look better if they were combined.
15784 # That's the task of this routine.
15786 # do nothing under extreme stress
15787 return if ( $stress_level_alpha < 1 && !DEVEL_MODE );
15789 my $rK_weld_right = $self->[_rK_weld_right_];
15790 my $rK_weld_left = $self->[_rK_weld_left_];
15792 my $nmax_start = @{$ri_end} - 1;
15793 return if ( $nmax_start <= 0 );
15795 # Make a list of all good joining tokens between the lines
15799 # Break the total batch sub-sections with lengths short enough to
15801 my $rsections = [];
15804 my $nmax_section = 0;
15805 foreach my $nn ( 1 .. $nmax_start ) {
15806 my $ibeg_1 = $ri_beg->[ $nn - 1 ];
15807 my $iend_1 = $ri_end->[ $nn - 1 ];
15808 my $iend_2 = $ri_end->[$nn];
15809 my $ibeg_2 = $ri_beg->[$nn];
15811 # Define the joint variable
15812 my ( $itok, $itokp, $itokm );
15813 foreach my $itest ( $iend_1, $ibeg_2 ) {
15814 my $type = $types_to_go[$itest];
15815 if ( $is_math_op{$type}
15816 || $is_amp_amp{$type}
15817 || $is_assignment{$type}
15823 $joint[$nn] = [$itok];
15825 # Update the section list
15826 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15830 # The number 5 here is an arbitrary small number intended
15831 # to keep most small matches in one sub-section.
15832 || ( defined($nend_sec)
15833 && ( $nn < 5 || $nmax_start - $nn < 5 ) )
15839 if ( defined($nend_sec) ) {
15840 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
15841 my $num = $nend_sec - $nbeg_sec;
15842 if ( $num > $nmax_section ) { $nmax_section = $num }
15849 if ( defined($nend_sec) ) {
15850 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
15851 my $num = $nend_sec - $nbeg_sec;
15852 if ( $num > $nmax_section ) { $nmax_section = $num }
15855 my $num_sections = @{$rsections};
15857 # This is potentially an O(n-squared) loop, but not critical, so we can
15858 # put a finite limit on the total number of iterations. This is
15859 # suggested by issue c118, which pushed about 5.e5 lines through here
15860 # and caused an excessive run time.
15862 # Three lines of defense have been put in place to prevent excessive
15864 # 1. do nothing if formatting under stress (c118 was under stress)
15865 # 2. break into small sub-sections to decrease the maximum n-squared.
15866 # 3. put a finite limit on the number of iterations.
15868 # Testing shows that most batches only require one or two iterations.
15869 # A very large batch which is broken into sub-sections can require one
15870 # iteration per section. This suggests the limit here, which allows
15871 # up to 10 iterations plus one pass per sub-section.
15874 10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
15876 if ( DEBUG_RECOMBINE > 1 ) {
15879 "-----\n$num_sections sections found for nmax=$nmax_start\n";
15880 foreach my $sect ( @{$rsections} ) {
15881 my ( $nbeg, $nend ) = @{$sect};
15882 my $num = $nend - $nbeg;
15883 if ( $num > $max ) { $max = $num }
15884 print STDERR "$nbeg $nend\n";
15886 print STDERR "max size=$max of $nmax_start lines\n";
15889 # Loop over all sub-sections. Note that we have to work backwards
15890 # from the end of the batch since the sections use original line
15891 # numbers, and the line numbers change as we go.
15892 while ( my $section = pop @{$rsections} ) {
15893 my ( $nbeg, $nend ) = @{$section};
15895 # number of ending lines to leave untouched in this pass
15896 my $nmax_sec = @{$ri_end} - 1;
15897 my $num_freeze = $nmax_sec - $nend;
15899 my $more_to_do = 1;
15901 # We keep looping over all of the lines of this batch
15902 # until there are no more possible recombinations
15903 my $nmax_last = $nmax_sec + 1;
15906 while ($more_to_do) {
15908 # Safety check for excess total iterations
15910 if ( $it_count > $it_count_max ) {
15916 my $nmax = @{$ri_end} - 1;
15918 # Safety check for infinite loop: the line count must decrease
15919 unless ( $nmax < $nmax_last ) {
15921 # Shouldn't happen because splice below decreases nmax on
15922 # each iteration. An error can only be due to a recent
15923 # programming change. We better stop here.
15926 "Program bug-infinite loop in recombine breakpoints\n"
15932 $nmax_last = $nmax;
15934 my $skip_Section_3;
15935 my $leading_amp_count = 0;
15936 my $this_line_is_semicolon_terminated;
15938 # loop over all remaining lines in this batch
15939 my $nstop = $nmax - $num_freeze;
15940 for my $iter ( $nbeg + 1 .. $nstop ) {
15942 # alternating sweep direction gives symmetric results
15943 # for recombining lines which exceed the line length
15944 # such as eval {{{{.... }}}}
15946 if ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
15947 else { $n = $iter }
15949 #----------------------------------------------------------
15950 # If we join the current pair of lines,
15951 # line $n-1 will become the left part of the joined line
15952 # line $n will become the right part of the joined line
15954 # Here are Indexes of the endpoint tokens of the two lines:
15956 # -----line $n-1--- | -----line $n-----
15957 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
15960 # We want to decide if we should remove the line break
15961 # between the tokens at $iend_1 and $ibeg_2
15963 # We will apply a number of ad-hoc tests to see if joining
15964 # here will look ok. The code will just issue a 'next'
15965 # command if the join doesn't look good. If we get through
15966 # the gauntlet of tests, the lines will be recombined.
15967 #----------------------------------------------------------
15969 # beginning and ending tokens of the lines we are working on
15970 my $ibeg_1 = $ri_beg->[ $n - 1 ];
15971 my $iend_1 = $ri_end->[ $n - 1 ];
15972 my $iend_2 = $ri_end->[$n];
15973 my $ibeg_2 = $ri_beg->[$n];
15974 my $ibeg_nmax = $ri_beg->[$nmax];
15976 # combined line cannot be too long
15978 $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15979 next if ( $excess > 0 );
15981 my $type_iend_1 = $types_to_go[$iend_1];
15982 my $type_iend_2 = $types_to_go[$iend_2];
15983 my $type_ibeg_1 = $types_to_go[$ibeg_1];
15984 my $type_ibeg_2 = $types_to_go[$ibeg_2];
15986 # terminal token of line 2 if any side comment is ignored:
15987 my $iend_2t = $iend_2;
15988 my $type_iend_2t = $type_iend_2;
15990 # some beginning indexes of other lines, which may not exist
15991 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
15992 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
15993 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
15997 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15998 # $nesting_depth_to_go[$ibeg_1] );
16000 DEBUG_RECOMBINE > 1 && do {
16002 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
16005 # If line $n is the last line, we set some flags and
16006 # do any special checks for it
16007 if ( $n == $nmax ) {
16009 # a terminal '{' should stay where it is
16010 # unless preceded by a fat comma
16011 next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
16013 if ( $type_iend_2 eq '#'
16014 && $iend_2 - $ibeg_2 >= 2
16015 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
16017 $iend_2t = $iend_2 - 2;
16018 $type_iend_2t = $types_to_go[$iend_2t];
16021 $this_line_is_semicolon_terminated =
16022 $type_iend_2t eq ';';
16025 #----------------------------------------------------------
16026 # Recombine Section 0:
16027 # Examine the special token joining this line pair, if any.
16028 # Put as many tests in this section to avoid duplicate code
16029 # and to make formatting independent of whether breaks are
16030 # to the left or right of an operator.
16031 #----------------------------------------------------------
16033 my ($itok) = @{ $joint[$n] };
16036 my $type = $types_to_go[$itok];
16038 if ( $type eq ':' ) {
16040 # do not join at a colon unless it disobeys the
16042 if ( $itok eq $iend_1 ) {
16043 next unless $want_break_before{$type};
16046 $leading_amp_count++;
16047 next if $want_break_before{$type};
16051 # handle math operators + - * /
16052 elsif ( $is_math_op{$type} ) {
16054 # Combine these lines if this line is a single
16055 # number, or if it is a short term with same
16056 # operator as the previous line. For example, in
16057 # the following code we will combine all of the
16058 # short terms $A, $B, $C, $D, $E, $F, together
16059 # instead of leaving them one per line:
16061 # $A * $B * $C * $D * $E * $F *
16062 # ( 2. * $eps * $sigma * $area ) *
16063 # ( 1. / $tcold**3 - 1. / $thot**3 );
16065 # This can be important in math-intensive code.
16069 my $itokp = min( $inext_to_go[$itok], $iend_2 );
16070 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
16071 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
16072 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
16074 # check for a number on the right
16075 if ( $types_to_go[$itokp] eq 'n' ) {
16077 # ok if nothing else on right
16078 if ( $itokp == $iend_2 ) {
16083 # look one more token to right..
16084 # okay if math operator or some termination
16086 ( ( $itokpp == $iend_2 )
16087 && $is_math_op{ $types_to_go[$itokpp]
16089 || $types_to_go[$itokpp] =~ /^[#,;]$/;
16093 # check for a number on the left
16094 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
16096 # okay if nothing else to left
16097 if ( $itokm == $ibeg_1 ) {
16101 # otherwise look one more token to left
16104 # okay if math operator, comma, or assignment
16105 $good_combo = ( $itokmm == $ibeg_1 )
16106 && ( $is_math_op{ $types_to_go[$itokmm] }
16107 || $types_to_go[$itokmm] =~ /^[,]$/
16108 || $is_assignment{ $types_to_go[$itokmm]
16113 # look for a single short token either side of the
16115 if ( !$good_combo ) {
16117 # Slight adjustment factor to make results
16118 # independent of break before or after operator
16119 # in long summed lists. (An operator and a
16120 # space make two spaces).
16121 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
16125 # numbers or id's on both sides of this joint
16126 $types_to_go[$itokp] =~ /^[in]$/
16127 && $types_to_go[$itokm] =~ /^[in]$/
16129 # one of the two lines must be short:
16132 # no more than 2 nonblank tokens right
16137 && token_sequence_length(
16140 $rOpts_short_concatenation_item_length
16143 # no more than 2 nonblank tokens left of
16148 && token_sequence_length(
16151 $rOpts_short_concatenation_item_length
16156 # keep pure terms; don't mix +- with */
16158 $is_plus_minus{$type}
16159 && ( $is_mult_div{ $types_to_go[$itokmm] }
16160 || $is_mult_div{ $types_to_go[$itokpp] }
16164 $is_mult_div{$type}
16165 && ( $is_plus_minus{ $types_to_go[$itokmm] }
16166 || $is_plus_minus{ $types_to_go[$itokpp]
16173 # it is also good to combine if we can reduce to 2
16175 if ( !$good_combo ) {
16177 # index on other line where same token would be
16180 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
16185 && $types_to_go[$iother] ne $type;
16188 next unless ($good_combo);
16192 elsif ( $is_amp_amp{$type} ) {
16196 elsif ( $is_assignment{$type} ) {
16198 } ## end assignment
16201 #----------------------------------------------------------
16202 # Recombine Section 1:
16203 # Join welded nested containers immediately
16204 #----------------------------------------------------------
16208 && ( $type_sequence_to_go[$iend_1]
16209 && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
16210 || $type_sequence_to_go[$ibeg_2]
16211 && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
16220 #----------------------------------------------------------
16221 # Recombine Section 2:
16222 # Examine token at $iend_1 (right end of first line of pair)
16223 #----------------------------------------------------------
16225 # an isolated '}' may join with a ';' terminated segment
16226 if ( $type_iend_1 eq '}' ) {
16228 # Check for cases where combining a semicolon terminated
16229 # statement with a previous isolated closing paren will
16230 # allow the combined line to be outdented. This is
16231 # generally a good move. For example, we can join up
16232 # the last two lines here:
16234 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
16235 # $size, $atime, $mtime, $ctime, $blksize, $blocks
16241 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
16242 # $size, $atime, $mtime, $ctime, $blksize, $blocks
16245 # which makes the parens line up.
16247 # Another example, from Joe Matarazzo, probably looks best
16248 # with the 'or' clause appended to the trailing paren:
16249 # $self->some_method(
16252 # ) or die "Some_method didn't work";
16254 # But we do not want to do this for something like the -lp
16255 # option where the paren is not outdentable because the
16256 # trailing clause will be far to the right.
16258 # The logic here is synchronized with the logic in sub
16259 # sub final_indentation_adjustment, which actually does
16262 $skip_Section_3 ||= $this_line_is_semicolon_terminated
16264 # only one token on last line
16265 && $ibeg_1 == $iend_1
16267 # must be structural paren
16268 && $tokens_to_go[$iend_1] eq ')'
16270 # style must allow outdenting,
16271 && !$closing_token_indentation{')'}
16273 # only leading '&&', '||', and ':' if no others seen
16274 # (but note: our count made below could be wrong
16275 # due to intervening comments)
16276 && ( $leading_amp_count == 0
16277 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
16279 # but leading colons probably line up with a
16280 # previous colon or question (count could be wrong).
16281 && $type_ibeg_2 ne ':'
16283 # only one step in depth allowed. this line must not
16284 # begin with a ')' itself.
16285 && ( $nesting_depth_to_go[$iend_1] ==
16286 $nesting_depth_to_go[$iend_2] + 1 );
16288 # YVES patch 2 of 2:
16289 # Allow cuddled eval chains, like this:
16296 # This patch works together with a patch in
16297 # setting adjusted indentation (where the closing eval
16298 # brace is outdented if possible).
16299 # The problem is that an 'eval' block has continuation
16300 # indentation and it looks better to undo it in some
16301 # cases. If we do not use this patch we would get:
16309 # The alternative, for uncuddled style, is to create
16310 # a patch in final_indentation_adjustment which undoes
16311 # the indentation of a leading line like 'or do {'.
16312 # This doesn't work well with -icb through
16314 $block_type_to_go[$iend_1] eq 'eval'
16315 && !ref( $leading_spaces_to_go[$iend_1] )
16316 && !$rOpts_indent_closing_brace
16317 && $tokens_to_go[$iend_2] eq '{'
16319 ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
16320 || ( $type_ibeg_2 eq 'k'
16321 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16322 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16326 $skip_Section_3 ||= 1;
16333 # handle '.' and '?' specially below
16334 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
16336 # fix for c054 (unusual -pbp case)
16337 || $type_ibeg_2 eq '=='
16342 elsif ( $type_iend_1 eq '{' ) {
16345 # honor breaks at opening brace
16346 # Added to prevent recombining something like this:
16347 # } || eval { package main;
16348 next if $forced_breakpoint_to_go[$iend_1];
16351 # do not recombine lines with ending &&, ||,
16352 elsif ( $is_amp_amp{$type_iend_1} ) {
16353 next unless $want_break_before{$type_iend_1};
16356 # Identify and recombine a broken ?/: chain
16357 elsif ( $type_iend_1 eq '?' ) {
16359 # Do not recombine different levels
16362 $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16364 # do not recombine unless next line ends in :
16365 next unless $type_iend_2 eq ':';
16368 # for lines ending in a comma...
16369 elsif ( $type_iend_1 eq ',' ) {
16371 # Do not recombine at comma which is following the
16373 # TODO: might be best to make a special flag
16374 next if ( $old_breakpoint_to_go[$iend_1] );
16376 # An isolated '},' may join with an identifier + ';'
16377 # This is useful for the class of a 'bless' statement
16379 if ( $type_ibeg_1 eq '}'
16380 && $type_ibeg_2 eq 'i' )
16383 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16384 && ( $iend_2 == ( $ibeg_2 + 1 ) )
16385 && $this_line_is_semicolon_terminated );
16387 # override breakpoint
16388 $forced_breakpoint_to_go[$iend_1] = 0;
16394 # do not recombine after a comma unless this will
16395 # leave just 1 more line
16396 next unless ( $n + 1 >= $nmax );
16398 # do not recombine if there is a change in
16399 # indentation depth
16401 if ( $levels_to_go[$iend_1] !=
16402 $levels_to_go[$iend_2] );
16404 # do not recombine a "complex expression" after a
16405 # comma. "complex" means no parens.
16407 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16408 if ( $tokens_to_go[$ii] eq '(' ) {
16413 next if $saw_paren;
16418 elsif ( $type_iend_1 eq '(' ) {
16420 # No longer doing this
16423 elsif ( $type_iend_1 eq ')' ) {
16425 # No longer doing this
16428 # keep a terminal for-semicolon
16429 elsif ( $type_iend_1 eq 'f' ) {
16433 # if '=' at end of line ...
16434 elsif ( $is_assignment{$type_iend_1} ) {
16436 # keep break after = if it was in input stream
16437 # this helps prevent 'blinkers'
16440 $old_breakpoint_to_go[$iend_1]
16442 # don't strand an isolated '='
16443 && $iend_1 != $ibeg_1
16446 my $is_short_quote =
16447 ( $type_ibeg_2 eq 'Q'
16448 && $ibeg_2 == $iend_2
16449 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
16450 $rOpts_short_concatenation_item_length );
16452 $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
16453 && $types_to_go[$ibeg_3] eq ':' )
16456 # always join an isolated '=', a short quote, or if this
16457 # will put ?/: at start of adjacent lines
16458 if ( $ibeg_1 != $iend_1
16459 && !$is_short_quote
16466 # unless we can reduce this to two lines
16469 # or three lines, the last with a leading
16471 || ( $nmax == $n + 2
16472 && $types_to_go[$ibeg_nmax] eq ';' )
16474 # or the next line ends with a here doc
16475 || $type_iend_2 eq 'h'
16477 # or the next line ends in an open paren or
16478 # brace and the break hasn't been forced
16480 || ( !$forced_breakpoint_to_go[$iend_1]
16481 && $type_iend_2 eq '{' )
16484 # do not recombine if the two lines might align
16485 # well this is a very approximate test for this
16488 # RT#127633 - the leading tokens are not
16490 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
16492 # or they are different
16495 $types_to_go[$ibeg_3] )
16501 # Recombine if we can make two lines
16504 # -lp users often prefer this:
16505 # my $title = function($env, $env, $sysarea,
16506 # "bubba Borrower Entry");
16507 # so we will recombine if -lp is used we have
16511 && ref( $leading_spaces_to_go[$ibeg_3] )
16512 && $type_iend_2 eq ','
16517 # otherwise, scan the rhs line up to last token
16518 # for complexity. Note that we are not
16519 # counting the last token in case it is an
16522 my $depth = $nesting_depth_to_go[$ibeg_2];
16523 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
16524 if ( $nesting_depth_to_go[$i] != $depth ) {
16526 last if ( $tv > 1 );
16528 $depth = $nesting_depth_to_go[$i];
16531 # ok to recombine if no level changes before
16535 # otherwise, do not recombine if more than
16536 # two level changes.
16537 next if ( $tv > 1 );
16539 # check total complexity of the two
16540 # adjacent lines that will occur if we do
16544 ? $ri_end->[ $n + 1 ]
16546 foreach my $i ( $iend_2 .. $istop ) {
16548 $nesting_depth_to_go[$i] != $depth )
16551 last if ( $tv > 2 );
16553 $depth = $nesting_depth_to_go[$i];
16556 # do not recombine if total is more than 2
16558 next if ( $tv > 2 );
16563 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16564 $forced_breakpoint_to_go[$iend_1] = 0;
16569 elsif ( $type_iend_1 eq 'k' ) {
16571 # make major control keywords stand out
16576 #/^(last|next|redo|return)$/
16577 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16579 # but only if followed by multiple lines
16583 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16585 unless $want_break_before{ $tokens_to_go[$iend_1]
16590 #----------------------------------------------------------
16591 # Recombine Section 3:
16592 # Examine token at $ibeg_2 (left end of second line of pair)
16593 #----------------------------------------------------------
16595 # join lines identified above as capable of
16596 # causing an outdented line with leading closing paren
16597 # Note that we are skipping the rest of this section
16598 # and the rest of the loop to do the join
16599 if ($skip_Section_3) {
16600 $forced_breakpoint_to_go[$iend_1] = 0;
16605 # handle lines with leading &&, ||
16606 elsif ( $is_amp_amp{$type_ibeg_2} ) {
16608 $leading_amp_count++;
16610 # ok to recombine if it follows a ? or :
16611 # and is followed by an open paren..
16613 ( $is_ternary{$type_ibeg_1}
16614 && $tokens_to_go[$iend_2] eq '(' )
16616 # or is followed by a ? or : at same depth
16618 # We are looking for something like this. We can
16619 # recombine the && line with the line above to make the
16620 # structure more clear:
16622 # exists $G->{Attr}->{V}
16623 # && exists $G->{Attr}->{V}->{$u}
16624 # ? %{ $G->{Attr}->{V}->{$u} }
16627 # We should probably leave something like this alone:
16629 # exists $G->{Attr}->{E}
16630 # && exists $G->{Attr}->{E}->{$u}
16631 # && exists $G->{Attr}->{E}->{$u}->{$v}
16632 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16634 # so that we either have all of the &&'s (or ||'s)
16635 # on one line, as in the first example, or break at
16636 # each one as in the second example. However, it
16637 # sometimes makes things worse to check for this because
16638 # it prevents multiple recombinations. So this is not done.
16640 && $is_ternary{ $types_to_go[$ibeg_3] }
16641 && $nesting_depth_to_go[$ibeg_3] ==
16642 $nesting_depth_to_go[$ibeg_2] );
16644 # Combine a trailing && term with an || term: fix for
16645 # c060 This is rare but can happen.
16648 && $type_ibeg_2 eq '&&'
16649 && $type_ibeg_1 eq '||'
16650 && $nesting_depth_to_go[$ibeg_2] ==
16651 $nesting_depth_to_go[$ibeg_1] );
16653 next if !$ok && $want_break_before{$type_ibeg_2};
16654 $forced_breakpoint_to_go[$iend_1] = 0;
16656 # tweak the bond strength to give this joint priority
16661 # Identify and recombine a broken ?/: chain
16662 elsif ( $type_ibeg_2 eq '?' ) {
16664 # Do not recombine different levels
16665 my $lev = $levels_to_go[$ibeg_2];
16666 next if ( $lev ne $levels_to_go[$ibeg_1] );
16668 # Do not recombine a '?' if either next line or
16669 # previous line does not start with a ':'. The reasons
16670 # are that (1) no alignment of the ? will be possible
16671 # and (2) the expression is somewhat complex, so the
16672 # '?' is harder to see in the interior of the line.
16673 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
16674 my $precedes_colon =
16675 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16676 next unless ( $follows_colon || $precedes_colon );
16678 # we will always combining a ? line following a : line
16679 if ( !$follows_colon ) {
16681 # ...otherwise recombine only if it looks like a
16682 # chain. we will just look at a few nearby lines
16683 # to see if this looks like a chain.
16684 my $local_count = 0;
16686 my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 )
16690 && $types_to_go[$ii] eq ':'
16691 && $levels_to_go[$ii] == $lev;
16693 next unless ( $local_count > 1 );
16695 $forced_breakpoint_to_go[$iend_1] = 0;
16698 # do not recombine lines with leading '.'
16699 elsif ( $type_ibeg_2 eq '.' ) {
16700 my $i_next_nonblank =
16701 min( $inext_to_go[$ibeg_2], $iend_2 );
16705 # ... unless there is just one and we can reduce
16706 # this to two lines if we do. For example, this
16710 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16712 # looks better than this:
16713 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16714 # . '$args .= $pat;'
16719 && $type_ibeg_1 ne $type_ibeg_2
16722 # ... or this would strand a short quote , like this
16723 # . "some long quote"
16726 || ( $types_to_go[$i_next_nonblank] eq 'Q'
16727 && $i_next_nonblank >= $iend_2 - 1
16728 && $token_lengths_to_go[$i_next_nonblank] <
16729 $rOpts_short_concatenation_item_length )
16733 # handle leading keyword..
16734 elsif ( $type_ibeg_2 eq 'k' ) {
16736 # handle leading "or"
16737 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16740 $this_line_is_semicolon_terminated
16742 $type_ibeg_1 eq '}'
16745 # following 'if' or 'unless' or 'or'
16746 $type_ibeg_1 eq 'k'
16747 && $is_if_unless{ $tokens_to_go[$ibeg_1]
16750 # important: only combine a very simple
16751 # or statement because the step below
16752 # may have combined a trailing 'and'
16753 # with this or, and we do not want to
16754 # then combine everything together
16755 && ( $iend_2 - $ibeg_2 <= 7 )
16761 $forced_breakpoint_to_go[$iend_1] = 0
16762 unless ( $old_breakpoint_to_go[$iend_1] );
16765 # handle leading 'and' and 'xor'
16766 elsif ($tokens_to_go[$ibeg_2] eq 'and'
16767 || $tokens_to_go[$ibeg_2] eq 'xor' )
16770 # Decide if we will combine a single terminal 'and'
16771 # after an 'if' or 'unless'.
16773 # This looks best with the 'and' on the same
16774 # line as the 'if':
16777 # if $seconds and $nu < 2;
16779 # But this looks better as shown:
16782 # if !$this->{Parents}{$_}
16783 # or $this->{Parents}{$_} eq $_;
16787 $this_line_is_semicolon_terminated
16790 # following 'if' or 'unless' or 'or'
16791 $type_ibeg_1 eq 'k'
16792 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
16793 || $tokens_to_go[$ibeg_1] eq 'or' )
16798 # handle leading "if" and "unless"
16799 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16801 # Combine something like:
16803 # if ( $lang !~ /${l}$/i );
16805 # next if ( $lang !~ /${l}$/i );
16808 $this_line_is_semicolon_terminated
16810 # previous line begins with 'and' or 'or'
16811 && $type_ibeg_1 eq 'k'
16812 && $is_and_or{ $tokens_to_go[$ibeg_1] }
16817 # handle all other leading keywords
16820 # keywords look best at start of lines,
16821 # but combine things like "1 while"
16822 unless ( $is_assignment{$type_iend_1} ) {
16824 if ( ( $type_iend_1 ne 'k' )
16825 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16830 # similar treatment of && and || as above for 'and' and
16831 # 'or': NOTE: This block of code is currently bypassed
16832 # because of a previous block but is retained for possible
16834 elsif ( $is_amp_amp{$type_ibeg_2} ) {
16836 # maybe looking at something like:
16837 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16841 $this_line_is_semicolon_terminated
16843 # previous line begins with an 'if' or 'unless'
16845 && $type_ibeg_1 eq 'k'
16846 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16851 # handle line with leading = or similar
16852 elsif ( $is_assignment{$type_ibeg_2} ) {
16853 next unless ( $n == 1 || $n == $nmax );
16854 next if ( $old_breakpoint_to_go[$iend_1] );
16858 # unless we can reduce this to two lines
16861 # or three lines, the last with a leading semicolon
16862 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16864 # or the next line ends with a here doc
16865 || $type_iend_2 eq 'h'
16867 # or this is a short line ending in ;
16869 && $this_line_is_semicolon_terminated )
16871 $forced_breakpoint_to_go[$iend_1] = 0;
16874 #----------------------------------------------------------
16875 # Recombine Section 4:
16876 # Combine the lines if we arrive here and it is possible
16877 #----------------------------------------------------------
16879 # honor hard breakpoints
16880 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16882 my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
16884 # Require a few extra spaces before recombining lines if we are
16885 # at an old breakpoint unless this is a simple list or terminal
16886 # line. The goal is to avoid oscillating between two
16887 # quasi-stable end states. For example this snippet caused
16891 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
16895 if ( $old_breakpoint_to_go[$iend_1]
16896 && !$this_line_is_semicolon_terminated
16899 && $type_iend_2 ne ',' );
16901 # do not recombine if we would skip in indentation levels
16902 if ( $n < $nmax ) {
16903 my $if_next = $ri_beg->[ $n + 1 ];
16906 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16907 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16909 # but an isolated 'if (' is undesirable
16912 && $iend_1 - $ibeg_1 <= 2
16913 && $type_ibeg_1 eq 'k'
16914 && $tokens_to_go[$ibeg_1] eq 'if'
16915 && $tokens_to_go[$iend_1] ne '('
16921 ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
16923 # remember the pair with the greatest bond strength
16930 if ( $bs > $bs_best ) {
16937 # recombine the pair with the greatest bond strength
16939 splice @{$ri_beg}, $n_best, 1;
16940 splice @{$ri_end}, $n_best - 1, 1;
16941 splice @joint, $n_best, 1;
16943 # keep going if we are still making progress
16946 } # end iteration loop
16948 } # end loop over sections
16952 if (DEBUG_RECOMBINE) {
16953 my $nmax_last = @{$ri_end} - 1;
16955 "exiting recombine with $nmax_last lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
16958 } ## end sub recombine_breakpoints
16959 } ## end closure recombine_breakpoints
16961 sub insert_final_ternary_breaks {
16963 my ( $self, $ri_left, $ri_right ) = @_;
16965 # Called once per batch to look for and do any final line breaks for
16966 # long ternary chains
16968 my $nmax = @{$ri_right} - 1;
16970 # scan the left and right end tokens of all lines
16972 my $i_first_colon = -1;
16973 for my $n ( 0 .. $nmax ) {
16974 my $il = $ri_left->[$n];
16975 my $ir = $ri_right->[$n];
16976 my $typel = $types_to_go[$il];
16977 my $typer = $types_to_go[$ir];
16978 return if ( $typel eq '?' );
16979 return if ( $typer eq '?' );
16980 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
16981 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16984 # For long ternary chains,
16985 # if the first : we see has its ? is in the interior
16986 # of a preceding line, then see if there are any good
16987 # breakpoints before the ?.
16988 if ( $i_first_colon > 0 ) {
16989 my $i_question = $mate_index_to_go[$i_first_colon];
16990 if ( $i_question > 0 ) {
16992 foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
16993 my $token = $tokens_to_go[$ii];
16994 my $type = $types_to_go[$ii];
16996 # For now, a good break is either a comma or,
16997 # in a long chain, a 'return'.
16998 # Patch for RT #126633: added the $nmax>1 check to avoid
16999 # breaking after a return for a simple ternary. For longer
17000 # chains the break after return allows vertical alignment, so
17001 # it is still done. So perltidy -wba='?' will not break
17002 # immediately after the return in the following statement:
17004 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
17005 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
17010 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
17012 && $self->in_same_container_i( $ii, $i_question )
17015 push @insert_list, $ii;
17020 # insert any new break points
17021 if (@insert_list) {
17022 $self->insert_additional_breaks( \@insert_list, $ri_left,
17028 } ## end sub insert_final_ternary_breaks
17030 sub insert_breaks_before_list_opening_containers {
17032 my ( $self, $ri_left, $ri_right ) = @_;
17034 # This routine is called once per batch to implement the parameters
17035 # --break-before-hash-brace, etc.
17037 # Nothing to do if none of these parameters has been set
17038 return unless %break_before_container_types;
17040 my $nmax = @{$ri_right} - 1;
17041 return unless ( $nmax >= 0 );
17043 my $rLL = $self->[_rLL_];
17045 my $rbreak_before_container_by_seqno =
17046 $self->[_rbreak_before_container_by_seqno_];
17047 my $rK_weld_left = $self->[_rK_weld_left_];
17049 # scan the ends of all lines
17051 for my $n ( 0 .. $nmax ) {
17052 my $il = $ri_left->[$n];
17053 my $ir = $ri_right->[$n];
17054 next unless ( $ir > $il );
17055 my $Kl = $K_to_go[$il];
17056 my $Kr = $K_to_go[$ir];
17058 my $type_end = $rLL->[$Kr]->[_TYPE_];
17060 # Backup before any side comment
17061 if ( $type_end eq '#' ) {
17062 $Kend = $self->K_previous_nonblank($Kr);
17063 next unless defined($Kend);
17064 $type_end = $rLL->[$Kend]->[_TYPE_];
17067 # Backup to the start of any weld; fix for b1173.
17068 if ($total_weld_count) {
17069 my $Kend_test = $rK_weld_left->{$Kend};
17070 if ( defined($Kend_test) && $Kend_test > $Kl ) {
17071 $Kend = $Kend_test;
17072 $Kend_test = $rK_weld_left->{$Kend};
17075 # Do not break if we did not back up to the start of a weld
17076 # (shouldn't happen)
17077 next if ( defined($Kend_test) );
17080 my $token = $rLL->[$Kend]->[_TOKEN_];
17081 next unless ( $is_opening_token{$token} );
17082 next unless ( $Kl < $Kend - 1 );
17084 my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
17085 next unless ( defined($seqno) );
17087 # Use the flag which was previously set
17088 next unless ( $rbreak_before_container_by_seqno->{$seqno} );
17090 # Install a break before this opening token.
17091 my $Kbreak = $self->K_previous_nonblank($Kend);
17092 my $ibreak = $Kbreak - $Kl + $il;
17093 next if ( $ibreak < $il );
17094 next if ( $nobreak_to_go[$ibreak] );
17095 push @insert_list, $ibreak;
17098 # insert any new break points
17099 if (@insert_list) {
17100 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17103 } ## end sub insert_breaks_before_list_opening_containers
17105 sub note_added_semicolon {
17106 my ( $self, $line_number ) = @_;
17107 $self->[_last_added_semicolon_at_] = $line_number;
17108 if ( $self->[_added_semicolon_count_] == 0 ) {
17109 $self->[_first_added_semicolon_at_] = $line_number;
17111 $self->[_added_semicolon_count_]++;
17112 write_logfile_entry("Added ';' here\n");
17114 } ## end sub note_added_semicolon
17116 sub note_deleted_semicolon {
17117 my ( $self, $line_number ) = @_;
17118 $self->[_last_deleted_semicolon_at_] = $line_number;
17119 if ( $self->[_deleted_semicolon_count_] == 0 ) {
17120 $self->[_first_deleted_semicolon_at_] = $line_number;
17122 $self->[_deleted_semicolon_count_]++;
17123 write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
17125 } ## end sub note_deleted_semicolon
17127 sub note_embedded_tab {
17128 my ( $self, $line_number ) = @_;
17129 $self->[_embedded_tab_count_]++;
17130 $self->[_last_embedded_tab_at_] = $line_number;
17131 if ( !$self->[_first_embedded_tab_at_] ) {
17132 $self->[_first_embedded_tab_at_] = $line_number;
17135 if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
17136 write_logfile_entry("Embedded tabs in quote or pattern\n");
17139 } ## end sub note_embedded_tab
17141 use constant DEBUG_CORRECT_LP => 0;
17143 sub correct_lp_indentation {
17145 # When the -lp option is used, we need to make a last pass through
17146 # each line to correct the indentation positions in case they differ
17147 # from the predictions. This is necessary because perltidy uses a
17148 # predictor/corrector method for aligning with opening parens. The
17149 # predictor is usually good, but sometimes stumbles. The corrector
17150 # tries to patch things up once the actual opening paren locations
17152 my ( $self, $ri_first, $ri_last ) = @_;
17153 my $K_opening_container = $self->[_K_opening_container_];
17154 my $K_closing_container = $self->[_K_closing_container_];
17155 my $do_not_pad = 0;
17157 # Note on flag '$do_not_pad':
17158 # We want to avoid a situation like this, where the aligner inserts
17159 # whitespace before the '=' to align it with a previous '=', because
17160 # otherwise the parens might become mis-aligned in a situation like
17161 # this, where the '=' has become aligned with the previous line,
17162 # pushing the opening '(' forward beyond where we want it.
17164 # $mkFloor::currentRoom = '';
17165 # $mkFloor::c_entry = $c->Entry(
17167 # -relief => 'sunken',
17171 # We leave it to the aligner to decide how to do this.
17173 # first remove continuation indentation if appropriate
17174 my $rLL = $self->[_rLL_];
17175 my $max_line = @{$ri_first} - 1;
17177 #---------------------------------------------------------------------------
17178 # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
17179 #---------------------------------------------------------------------------
17181 # The point is that sub 'starting_one_line_block' made one-line blocks based
17182 # on default indentation, not -lp indentation. So some of the one-line
17183 # blocks may be too long when given -lp indentation. We will fix that now
17184 # if possible, using the list of these closing block indexes.
17185 my $ri_starting_one_line_block =
17186 $self->[_this_batch_]->[_ri_starting_one_line_block_];
17187 if ( @{$ri_starting_one_line_block} ) {
17188 my @ilist = @{$ri_starting_one_line_block};
17189 my $inext = shift(@ilist);
17191 # loop over lines, checking length of each with a one-line block
17192 my ( $ibeg, $iend );
17193 foreach my $line ( 0 .. $max_line ) {
17194 $iend = $ri_last->[$line];
17195 next if ( $inext > $iend );
17196 $ibeg = $ri_first->[$line];
17198 # This is just for lines with indentation objects (c098)
17200 ref( $leading_spaces_to_go[$ibeg] )
17201 ? $self->excess_line_length( $ibeg, $iend )
17204 if ( $excess > 0 ) {
17205 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
17207 if ( $available_spaces > 0 ) {
17208 my $delete_want = min( $available_spaces, $excess );
17209 my $deleted_spaces =
17210 $self->reduce_lp_indentation( $ibeg, $delete_want );
17211 $available_spaces =
17212 $self->get_available_spaces_to_go($ibeg);
17216 # skip forward to next one-line block to check
17218 $inext = shift @ilist;
17219 next if ( $inext <= $iend );
17220 last if ( $inext > $iend );
17222 last if ( $inext <= $iend );
17226 #-------------------------------------------------------------------
17227 # PASS 2: look for and fix other problems in each line of this batch
17228 #-------------------------------------------------------------------
17230 # look at each output line ...
17231 my ( $ibeg, $iend );
17232 foreach my $line ( 0 .. $max_line ) {
17233 $ibeg = $ri_first->[$line];
17234 $iend = $ri_last->[$line];
17236 # looking at each token in this output line ...
17237 foreach my $i ( $ibeg .. $iend ) {
17239 # How many space characters to place before this token
17240 # for special alignment. Actual padding is done in the
17243 # looking for next unvisited indentation item ...
17244 my $indentation = $leading_spaces_to_go[$i];
17246 # This is just for indentation objects (c098)
17247 next unless ( ref($indentation) );
17249 # Visit each indentation object just once
17250 next if ( $indentation->get_marked() );
17253 $indentation->set_marked(1);
17255 # Skip indentation objects which do not align with container tokens
17256 my $align_seqno = $indentation->get_align_seqno();
17257 next unless ($align_seqno);
17259 # Skip a container which is entirely on this line
17260 my $Ko = $K_opening_container->{$align_seqno};
17261 my $Kc = $K_closing_container->{$align_seqno};
17262 if ( defined($Ko) && defined($Kc) ) {
17263 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
17266 if ( $line == 1 && $i == $ibeg ) {
17270 #--------------------------------------------
17271 # Now see what the error is and try to fix it
17272 #--------------------------------------------
17273 my $closing_index = $indentation->get_closed();
17274 my $predicted_pos = $indentation->get_spaces();
17276 # Find actual position:
17279 if ( $i == $ibeg ) {
17281 # Case 1: token is first character of of batch - table lookup
17282 if ( $line == 0 ) {
17284 $actual_pos = $predicted_pos;
17286 my ( $indent, $offset, $is_leading, $exists ) =
17287 get_saved_opening_indentation($align_seqno);
17288 if ( defined($indent) ) {
17290 # FIXME: should use '1' here if no space after opening
17291 # and '2' if want space; hardwired at 1 like -gnu-style
17292 $actual_pos = get_spaces($indent) + $offset + 1;
17296 # Case 2: token starts a new line - use length of previous line
17299 my $ibegm = $ri_first->[ $line - 1 ];
17300 my $iendm = $ri_last->[ $line - 1 ];
17301 $actual_pos = total_line_length( $ibegm, $iendm );
17305 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
17310 # Case 3: $i>$ibeg: token is mid-line - use length to previous token
17313 $actual_pos = total_line_length( $ibeg, $i - 1 );
17315 # for mid-line token, we must check to see if all
17316 # additional lines have continuation indentation,
17317 # and remove it if so. Otherwise, we do not get
17319 if ( $closing_index > $iend ) {
17320 my $ibeg_next = $ri_first->[ $line + 1 ];
17321 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
17322 $self->undo_lp_ci( $line, $i, $closing_index,
17323 $ri_first, $ri_last );
17328 # By how many spaces (plus or minus) would we need to increase the
17329 # indentation to get alignment with the opening token?
17330 my $move_right = $actual_pos - $predicted_pos;
17332 if (DEBUG_CORRECT_LP) {
17333 my $tok = substr( $tokens_to_go[$i], 0, 8 );
17334 my $avail = $self->get_available_spaces_to_go($ibeg);
17336 "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
17339 # nothing more to do if no error to correct (gnu2.t)
17340 if ( $move_right == 0 ) {
17341 $indentation->set_recoverable_spaces($move_right);
17345 # Get any collapsed length defined for -xlp
17346 my $collapsed_length =
17347 $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
17348 $collapsed_length = 0 unless ( defined($collapsed_length) );
17350 if (DEBUG_CORRECT_LP) {
17352 "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
17355 # if we have not seen closure for this indentation in this batch,
17356 # and do not have a collapsed length estimate, we can only pass on
17357 # a request to the vertical aligner
17358 if ( $closing_index < 0 && !$collapsed_length ) {
17359 $indentation->set_recoverable_spaces($move_right);
17363 # If necessary, look ahead to see if there is really any leading
17364 # whitespace dependent on this whitespace, and also find the
17365 # longest line using this whitespace. Since it is always safe to
17366 # move left if there are no dependents, we only need to do this if
17367 # we may have dependent nodes or need to move right.
17369 my $have_child = $indentation->get_have_child();
17370 my %saw_indentation;
17371 my $line_count = 1;
17372 $saw_indentation{$indentation} = $indentation;
17374 # How far can we move right before we hit the limit?
17375 # let $right_margen = the number of spaces that we can increase
17376 # the current indentation before hitting the maximum line length.
17377 my $right_margin = 0;
17379 if ( $have_child || $move_right > 0 ) {
17382 # include estimated collapsed length for incomplete containers
17383 my $max_length = 0;
17384 if ( $Kc > $K_to_go[$max_index_to_go] ) {
17385 $max_length = $collapsed_length + $predicted_pos;
17388 if ( $i == $ibeg ) {
17389 my $length = total_line_length( $ibeg, $iend );
17390 if ( $length > $max_length ) { $max_length = $length }
17393 # look ahead at the rest of the lines of this batch..
17394 foreach my $line_t ( $line + 1 .. $max_line ) {
17395 my $ibeg_t = $ri_first->[$line_t];
17396 my $iend_t = $ri_last->[$line_t];
17397 last if ( $closing_index <= $ibeg_t );
17399 # remember all different indentation objects
17400 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
17401 $saw_indentation{$indentation_t} = $indentation_t;
17404 # remember longest line in the group
17405 my $length_t = total_line_length( $ibeg_t, $iend_t );
17406 if ( $length_t > $max_length ) {
17407 $max_length = $length_t;
17412 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
17414 if ( $right_margin < 0 ) { $right_margin = 0 }
17417 my $first_line_comma_count =
17418 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
17419 my $comma_count = $indentation->get_comma_count();
17420 my $arrow_count = $indentation->get_arrow_count();
17422 # This is a simple approximate test for vertical alignment:
17423 # if we broke just after an opening paren, brace, bracket,
17424 # and there are 2 or more commas in the first line,
17425 # and there are no '=>'s,
17426 # then we are probably vertically aligned. We could set
17427 # an exact flag in sub break_lists, but this is good
17429 my $indentation_count = keys %saw_indentation;
17430 my $is_vertically_aligned =
17432 && $first_line_comma_count > 1
17433 && $indentation_count == 1
17434 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
17436 # Make the move if possible ..
17439 # we can always move left
17444 # incomplete container
17445 || ( $rOpts_extended_line_up_parentheses
17446 && $Kc > $K_to_go[$max_index_to_go] )
17447 || $closing_index < 0
17449 # but we should only move right if we are sure it will
17450 # not spoil vertical alignment
17451 || ( $comma_count == 0 )
17452 || ( $comma_count > 0 && !$is_vertically_aligned )
17456 ( $move_right <= $right_margin )
17460 if (DEBUG_CORRECT_LP) {
17462 "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
17465 foreach ( keys %saw_indentation ) {
17466 $saw_indentation{$_}
17467 ->permanently_decrease_available_spaces( -$move );
17471 # Otherwise, record what we want and the vertical aligner
17472 # will try to recover it.
17474 $indentation->set_recoverable_spaces($move_right);
17476 } ## end loop over tokens in a line
17477 } ## end loop over lines
17478 return $do_not_pad;
17479 } ## end sub correct_lp_indentation
17483 # If there is a single, long parameter within parens, like this:
17485 # $self->command( "/msg "
17486 # . $infoline->chan
17487 # . " You said $1, but did you know that it's square was "
17488 # . $1 * $1 . " ?" );
17490 # we can remove the continuation indentation of the 2nd and higher lines
17491 # to achieve this effect, which is more pleasing:
17493 # $self->command("/msg "
17494 # . $infoline->chan
17495 # . " You said $1, but did you know that it's square was "
17496 # . $1 * $1 . " ?");
17498 my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
17500 my $max_line = @{$ri_first} - 1;
17502 # must be multiple lines
17503 return unless $max_line > $line_open;
17505 my $lev_start = $levels_to_go[$i_start];
17506 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
17508 # see if all additional lines in this container have continuation
17510 my $line_1 = 1 + $line_open;
17511 my $n = $line_open;
17513 while ( ++$n <= $max_line ) {
17514 my $ibeg = $ri_first->[$n];
17515 my $iend = $ri_last->[$n];
17516 if ( $ibeg eq $closing_index ) { $n--; last }
17517 return if ( $lev_start != $levels_to_go[$ibeg] );
17518 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
17519 last if ( $closing_index <= $iend );
17522 # we can reduce the indentation of all continuation lines
17523 my $continuation_line_count = $n - $line_open;
17524 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17525 (0) x ($continuation_line_count);
17526 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17527 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
17529 } ## end sub undo_lp_ci
17531 ###############################################
17532 # CODE SECTION 10: Code to break long statments
17533 ###############################################
17535 sub break_long_lines {
17537 #-----------------------------------------------------------
17538 # Break a batch of tokens into lines which do not exceed the
17539 # maximum line length.
17540 #-----------------------------------------------------------
17542 # Define an array of indexes for inserting newline characters to
17543 # keep the line lengths below the maximum desired length. There is
17544 # an implied break after the last token, so it need not be included.
17547 # This routine is part of series of routines which adjust line
17548 # lengths. It is only called if a statement is longer than the
17549 # maximum line length, or if a preliminary scanning located
17550 # desirable break points. Sub break_lists has already looked at
17551 # these tokens and set breakpoints (in array
17552 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17553 # after commas, after opening parens, and before closing parens).
17554 # This routine will honor these breakpoints and also add additional
17555 # breakpoints as necessary to keep the line length below the maximum
17556 # requested. It bases its decision on where the 'bond strength' is
17559 # Output: returns references to the arrays:
17562 # which contain the indexes $i of the first and last tokens on each
17565 # In addition, the array:
17566 # $forced_breakpoint_to_go[$i]
17567 # may be updated to be =1 for any index $i after which there must be
17568 # a break. This signals later routines not to undo the breakpoint.
17570 my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
17572 # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
17575 use constant DEBUG_BREAK_LINES => 0;
17577 my @i_first = (); # the first index to output
17578 my @i_last = (); # the last index to output
17579 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
17580 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17582 my $rbond_strength_to_go = $self->set_bond_strengths();
17584 # Add any comma bias set by break_lists
17585 if ( @{$rbond_strength_bias} ) {
17586 foreach my $item ( @{$rbond_strength_bias} ) {
17587 my ( $ii, $bias ) = @{$item};
17588 if ( $ii >= 0 && $ii <= $max_index_to_go ) {
17589 $rbond_strength_to_go->[$ii] += $bias;
17591 elsif (DEVEL_MODE) {
17592 my $KK = $K_to_go[0];
17593 my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
17595 "Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
17602 my $imax = $max_index_to_go;
17603 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17604 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17605 my $i_begin = $imin; # index for starting next iteration
17607 my $leading_spaces = leading_spaces_to_go($imin);
17608 my $line_count = 0;
17609 my $last_break_strength = NO_BREAK;
17610 my $i_last_break = -1;
17611 my $max_bias = 0.001;
17612 my $tiny_bias = 0.0001;
17613 my $leading_alignment_token = EMPTY_STRING;
17614 my $leading_alignment_type = EMPTY_STRING;
17616 # see if any ?/:'s are in order
17617 my $colons_in_order = 1;
17618 my $last_tok = EMPTY_STRING;
17619 foreach ( @{$rcolon_list} ) {
17620 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17624 # This is a sufficient but not necessary condition for colon chain
17625 my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
17627 my $Msg = EMPTY_STRING;
17629 #-------------------------------------------------------
17630 # BEGINNING of main loop to set continuation breakpoints
17631 # Keep iterating until we reach the end
17632 #-------------------------------------------------------
17633 while ( $i_begin <= $imax ) {
17634 my $lowest_strength = NO_BREAK;
17635 my $starting_sum = $summed_lengths_to_go[$i_begin];
17638 my $lowest_next_token = EMPTY_STRING;
17639 my $lowest_next_type = 'b';
17640 my $i_lowest_next_nonblank = -1;
17641 my $maximum_line_length =
17642 $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
17644 # Do not separate an isolated bare word from an opening paren.
17645 # Alternate Fix #2 for issue b1299. This waits as long as possible
17646 # to make the decision.
17647 if ( $types_to_go[$i_begin] eq 'i'
17648 && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
17650 my $i_next_nonblank = $inext_to_go[$i_begin];
17651 if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
17652 $rbond_strength_to_go->[$i_begin] = NO_BREAK;
17656 #-------------------------------------------------------
17657 # BEGINNING of inner loop to find the best next breakpoint
17658 #-------------------------------------------------------
17659 my $strength = NO_BREAK;
17660 $i_test = $i_begin - 1;
17661 while ( ++$i_test <= $imax ) {
17662 my $type = $types_to_go[$i_test];
17663 my $token = $tokens_to_go[$i_test];
17664 my $next_type = $types_to_go[ $i_test + 1 ];
17665 my $next_token = $tokens_to_go[ $i_test + 1 ];
17666 my $i_next_nonblank = $inext_to_go[$i_test];
17667 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
17668 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17669 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17671 # adjustments to the previous bond strength may have been made, and
17672 # we must keep the bond strength of a token and its following blank
17674 my $last_strength = $strength;
17675 $strength = $rbond_strength_to_go->[$i_test];
17676 if ( $type eq 'b' ) { $strength = $last_strength }
17678 # reduce strength a bit to break ties at an old comma breakpoint ...
17681 $old_breakpoint_to_go[$i_test]
17683 # Patch: limited to just commas to avoid blinking states
17686 # which is a 'good' breakpoint, meaning ...
17687 # we don't want to break before it
17688 && !$want_break_before{$type}
17690 # and either we want to break before the next token
17691 # or the next token is not short (i.e. not a '*', '/' etc.)
17692 && $i_next_nonblank <= $imax
17693 && ( $want_break_before{$next_nonblank_type}
17694 || $token_lengths_to_go[$i_next_nonblank] > 2
17695 || $next_nonblank_type eq ','
17696 || $is_opening_type{$next_nonblank_type} )
17699 $strength -= $tiny_bias;
17700 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
17703 # otherwise increase strength a bit if this token would be at the
17704 # maximum line length. This is necessary to avoid blinking
17705 # in the above example when the -iob flag is added.
17709 $summed_lengths_to_go[ $i_test + 1 ] -
17711 if ( $len >= $maximum_line_length ) {
17712 $strength += $tiny_bias;
17713 DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
17717 my $must_break = 0;
17719 # Force an immediate break at certain operators
17720 # with lower level than the start of the line,
17721 # unless we've already seen a better break.
17723 #------------------------------------
17724 # Note on an issue with a preceding ?
17725 #------------------------------------
17726 # We don't include a ? in the above list, but there may
17727 # be a break at a previous ? if the line is long.
17728 # Because of this we do not want to force a break if
17729 # there is a previous ? on this line. For now the best way
17730 # to do this is to not break if we have seen a lower strength
17731 # point, which is probably a ?.
17733 # Example of unwanted breaks we are avoiding at a '.' following a ?
17734 # from pod2html using perltidy -gnu:
17736 # ? "\n<A NAME=\""
17738 # . "\">\n$text</A>\n"
17739 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
17741 ( $strength <= $lowest_strength )
17742 && ( $nesting_depth_to_go[$i_begin] >
17743 $nesting_depth_to_go[$i_next_nonblank] )
17745 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17747 $next_nonblank_type eq 'k'
17749 ## /^(and|or)$/ # note: includes 'xor' now
17750 && $is_and_or{$next_nonblank_token}
17755 $self->set_forced_breakpoint($i_next_nonblank);
17757 && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
17762 # Try to put a break where requested by break_lists
17763 $forced_breakpoint_to_go[$i_test]
17765 # break between ) { in a continued line so that the '{' can
17767 # See similar logic in break_lists which catches instances
17768 # where a line is just something like ') {'. We have to
17769 # be careful because the corresponding block keyword might
17770 # not be on the first line, such as 'for' here:
17774 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
17780 && ( $token eq ')' )
17781 && ( $next_nonblank_type eq '{' )
17782 && ($next_nonblank_block_type)
17783 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
17785 # RT #104427: Dont break before opening sub brace because
17786 # sub block breaks handled at higher level, unless
17787 # it looks like the preceding list is long and broken
17791 $next_nonblank_block_type =~ /$SUB_PATTERN/
17792 || $next_nonblank_block_type =~ /$ASUB_PATTERN/
17794 && ( $nesting_depth_to_go[$i_begin] ==
17795 $nesting_depth_to_go[$i_next_nonblank] )
17798 && !$rOpts_opening_brace_always_on_right
17801 # There is an implied forced break at a terminal opening brace
17802 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17806 # Forced breakpoints must sometimes be overridden, for example
17807 # because of a side comment causing a NO_BREAK. It is easier
17808 # to catch this here than when they are set.
17809 if ( $strength < NO_BREAK - 1 ) {
17810 $strength = $lowest_strength - $tiny_bias;
17813 && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
17817 # quit if a break here would put a good terminal token on
17818 # the next line and we already have a possible break
17821 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
17825 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
17827 ) > $maximum_line_length
17831 if ( $i_lowest >= 0 ) {
17832 DEBUG_BREAK_LINES && do {
17833 $Msg .= " :quit at good terminal='$next_nonblank_type'";
17839 # Avoid a break which would strand a single punctuation
17840 # token. For example, we do not want to strand a leading
17841 # '.' which is followed by a long quoted string.
17842 # But note that we do want to do this with -extrude (l=1)
17843 # so please test any changes to this code on -extrude.
17846 && ( $i_test == $i_begin )
17847 && ( $i_test < $imax )
17848 && ( $token eq $type )
17852 $summed_lengths_to_go[ $i_test + 1 ] -
17854 ) < $maximum_line_length
17858 $i_test = min( $imax, $inext_to_go[$i_test] );
17859 DEBUG_BREAK_LINES && do {
17860 $Msg .= " :redo at i=$i_test";
17865 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17868 # break at previous best break if it would have produced
17869 # a leading alignment of certain common tokens, and it
17870 # is different from the latest candidate break
17871 if ($leading_alignment_type) {
17872 DEBUG_BREAK_LINES && do {
17874 " :last at leading_alignment='$leading_alignment_type'";
17879 # Force at least one breakpoint if old code had good
17880 # break It is only called if a breakpoint is required or
17881 # desired. This will probably need some adjustments
17882 # over time. A goal is to try to be sure that, if a new
17883 # side comment is introduced into formatted text, then
17884 # the same breakpoints will occur. scbreak.t
17886 $i_test == $imax # we are at the end
17887 && !$forced_breakpoint_count
17888 && $saw_good_break # old line had good break
17889 && $type =~ /^[#;\{]$/ # and this line ends in
17890 # ';' or side comment
17891 && $i_last_break < 0 # and we haven't made a break
17892 && $i_lowest >= 0 # and we saw a possible break
17893 && $i_lowest < $imax - 1 # (but not just before this ;)
17894 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17898 DEBUG_BREAK_LINES && do {
17899 $Msg .= " :last at good old break\n";
17904 # Do not skip past an important break point in a short final
17905 # segment. For example, without this check we would miss the
17906 # break at the final / in the following code:
17909 # ( $tau * $mass_pellet * $q_0 *
17910 # ( 1. - exp( -$t_stop / $tau ) ) -
17911 # 4. * $pi * $factor * $k_ice *
17912 # ( $t_melt - $t_ice ) *
17915 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
17919 && $i_lowest >= 0 # and we saw a possible break
17920 && $i_lowest < $i_test
17921 && $i_test > $imax - 2
17922 && $nesting_depth_to_go[$i_begin] >
17923 $nesting_depth_to_go[$i_lowest]
17924 && $lowest_strength < $last_break_strength - .5 * WEAK
17927 # Make this break for math operators for now
17928 my $ir = $inext_to_go[$i_lowest];
17929 my $il = $iprev_to_go[$ir];
17930 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
17931 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
17933 DEBUG_BREAK_LINES && do {
17934 $Msg .= " :last-noskip_short";
17940 # Update the minimum bond strength location
17941 $lowest_strength = $strength;
17942 $i_lowest = $i_test;
17943 $lowest_next_token = $next_nonblank_token;
17944 $lowest_next_type = $next_nonblank_type;
17945 $i_lowest_next_nonblank = $i_next_nonblank;
17947 DEBUG_BREAK_LINES && do {
17948 $Msg .= " :last-must_break";
17953 # set flags to remember if a break here will produce a
17954 # leading alignment of certain common tokens
17955 if ( $line_count > 0
17957 && ( $lowest_strength - $last_break_strength <= $max_bias )
17960 my $i_last_end = $iprev_to_go[$i_begin];
17961 my $tok_beg = $tokens_to_go[$i_begin];
17962 my $type_beg = $types_to_go[$i_begin];
17965 # check for leading alignment of certain tokens
17967 $tok_beg eq $next_nonblank_token
17968 && $is_chain_operator{$tok_beg}
17969 && ( $type_beg eq 'k'
17970 || $type_beg eq $tok_beg )
17971 && $nesting_depth_to_go[$i_begin] >=
17972 $nesting_depth_to_go[$i_next_nonblank]
17975 || ( $tokens_to_go[$i_last_end] eq $token
17976 && $is_chain_operator{$token}
17977 && ( $type eq 'k' || $type eq $token )
17978 && $nesting_depth_to_go[$i_last_end] >=
17979 $nesting_depth_to_go[$i_test] )
17982 $leading_alignment_token = $next_nonblank_token;
17983 $leading_alignment_type = $next_nonblank_type;
17988 my $too_long = ( $i_test >= $imax );
17989 if ( !$too_long ) {
17992 $summed_lengths_to_go[ $i_test + 2 ] -
17994 $too_long = $next_length > $maximum_line_length;
17996 # To prevent blinkers we will avoid leaving a token exactly at
17997 # the line length limit unless it is the last token or one of
17998 # several "good" types.
18000 # The following code was a blinker with -pbp before this
18002 ## $last_nonblank_token eq '('
18003 ## && $is_indirect_object_taker{ $paren_type
18004 ## [$paren_depth] }
18005 # The issue causing the problem is that if the
18006 # term [$paren_depth] gets broken across a line then
18007 # the whitespace routine doesn't see both opening and closing
18008 # brackets and will format like '[ $paren_depth ]'. This
18009 # leads to an oscillation in length depending if we break
18010 # before the closing bracket or not.
18012 && $i_test + 1 < $imax
18013 && $next_nonblank_type ne ','
18014 && !$is_closing_type{$next_nonblank_type} )
18016 $too_long = $next_length >= $maximum_line_length;
18017 DEBUG_BREAK_LINES && do {
18018 $Msg .= " :too_long=$too_long" if ($too_long);
18023 DEBUG_BREAK_LINES && do {
18026 $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
18027 my $i_testp2 = $i_test + 2;
18028 if ( $i_testp2 > $max_index_to_go + 1 ) {
18029 $i_testp2 = $max_index_to_go + 1;
18031 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
18032 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
18034 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
18037 # allow one extra terminal token after exceeding line length
18038 # if it would strand this token.
18039 if ( $rOpts_fuzzy_line_length
18041 && $i_lowest == $i_test
18042 && $token_lengths_to_go[$i_test] > 1
18043 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
18047 DEBUG_BREAK_LINES && do {
18048 $Msg .= " :do_not_strand next='$next_nonblank_type'";
18052 # we are done if...
18055 # ... no more space and we have a break
18056 $too_long && $i_lowest >= 0
18058 # ... or no more tokens
18059 || $i_test == $imax
18062 DEBUG_BREAK_LINES && do {
18064 " :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
18070 #-------------------------------------------------------
18071 # END of inner loop to find the best next breakpoint
18072 # Now decide exactly where to put the breakpoint
18073 #-------------------------------------------------------
18075 # it's always ok to break at imax if no other break was found
18076 if ( $i_lowest < 0 ) { $i_lowest = $imax }
18078 # semi-final index calculation
18079 my $i_next_nonblank = $inext_to_go[$i_lowest];
18080 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
18081 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18083 #-------------------------------------------------------
18084 # ?/: rule 1 : if a break here will separate a '?' on this
18085 # line from its closing ':', then break at the '?' instead.
18086 #-------------------------------------------------------
18087 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
18088 next unless ( $tokens_to_go[$i] eq '?' );
18090 # do not break if probable sequence of ?/: statements
18091 next if ($is_colon_chain);
18093 # do not break if statement is broken by side comment
18095 if ( $tokens_to_go[$max_index_to_go] eq '#'
18096 && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
18098 # no break needed if matching : is also on the line
18100 if ( $mate_index_to_go[$i] >= 0
18101 && $mate_index_to_go[$i] <= $i_next_nonblank );
18104 if ( $want_break_before{'?'} ) { $i_lowest-- }
18108 #-------------------------------------------------------
18109 # END of inner loop to find the best next breakpoint:
18110 # Break the line after the token with index i=$i_lowest
18111 #-------------------------------------------------------
18113 # final index calculation
18114 $i_next_nonblank = $inext_to_go[$i_lowest];
18115 $next_nonblank_type = $types_to_go[$i_next_nonblank];
18116 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18120 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
18121 $Msg = EMPTY_STRING;
18123 #-------------------------------------------------------
18124 # ?/: rule 2 : if we break at a '?', then break at its ':'
18126 # Note: this rule is also in sub break_lists to handle a break
18127 # at the start and end of a line (in case breaks are dictated
18128 # by side comments).
18129 #-------------------------------------------------------
18130 if ( $next_nonblank_type eq '?' ) {
18131 $self->set_closing_breakpoint($i_next_nonblank);
18133 elsif ( $types_to_go[$i_lowest] eq '?' ) {
18134 $self->set_closing_breakpoint($i_lowest);
18137 #-------------------------------------------------------
18138 # ?/: rule 3 : if we break at a ':' then we save
18139 # its location for further work below. We may need to go
18140 # back and break at its '?'.
18141 #-------------------------------------------------------
18142 if ( $next_nonblank_type eq ':' ) {
18143 push @i_colon_breaks, $i_next_nonblank;
18145 elsif ( $types_to_go[$i_lowest] eq ':' ) {
18146 push @i_colon_breaks, $i_lowest;
18149 # here we should set breaks for all '?'/':' pairs which are
18150 # separated by this line
18154 # save this line segment, after trimming blanks at the ends
18156 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
18158 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
18160 # set a forced breakpoint at a container opening, if necessary, to
18161 # signal a break at a closing container. Excepting '(' for now.
18164 $tokens_to_go[$i_lowest] eq '{'
18165 || $tokens_to_go[$i_lowest] eq '['
18167 && !$forced_breakpoint_to_go[$i_lowest]
18170 $self->set_closing_breakpoint($i_lowest);
18173 # get ready to go again
18174 $i_begin = $i_lowest + 1;
18175 $last_break_strength = $lowest_strength;
18176 $i_last_break = $i_lowest;
18177 $leading_alignment_token = EMPTY_STRING;
18178 $leading_alignment_type = EMPTY_STRING;
18179 $lowest_next_token = EMPTY_STRING;
18180 $lowest_next_type = 'b';
18182 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
18186 # update indentation size
18187 if ( $i_begin <= $imax ) {
18188 $leading_spaces = leading_spaces_to_go($i_begin);
18191 "updating leading spaces to be $leading_spaces at i=$i_begin\n";
18195 #-------------------------------------------------------
18196 # END of main loop to set continuation breakpoints
18197 # Now go back and make any necessary corrections
18198 #-------------------------------------------------------
18200 #-------------------------------------------------------
18201 # ?/: rule 4 -- if we broke at a ':', then break at
18202 # corresponding '?' unless this is a chain of ?: expressions
18203 #-------------------------------------------------------
18204 if (@i_colon_breaks) {
18206 # using a simple method for deciding if we are in a ?/: chain --
18207 # this is a chain if it has multiple ?/: pairs all in order;
18209 # Note that if line starts in a ':' we count that above as a break
18210 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
18212 unless ($is_chain) {
18213 my @insert_list = ();
18214 foreach (@i_colon_breaks) {
18215 my $i_question = $mate_index_to_go[$_];
18216 if ( $i_question >= 0 ) {
18217 if ( $want_break_before{'?'} ) {
18218 $i_question = $iprev_to_go[$i_question];
18221 if ( $i_question >= 0 ) {
18222 push @insert_list, $i_question;
18225 $self->insert_additional_breaks( \@insert_list, \@i_first,
18230 return ( \@i_first, \@i_last, $rbond_strength_to_go );
18231 } ## end sub break_long_lines
18233 ###########################################
18234 # CODE SECTION 11: Code to break long lists
18235 ###########################################
18237 { ## begin closure break_lists
18239 # These routines and variables are involved in finding good
18240 # places to break long lists.
18242 use constant DEBUG_BREAK_LISTS => 0;
18245 $block_type, $current_depth,
18247 $i_last_nonblank_token, $last_nonblank_token,
18248 $last_nonblank_type, $last_nonblank_block_type,
18249 $last_old_breakpoint_count, $minimum_depth,
18250 $next_nonblank_block_type, $next_nonblank_token,
18251 $next_nonblank_type, $old_breakpoint_count,
18252 $starting_breakpoint_count, $starting_depth,
18258 @breakpoint_stack, @breakpoint_undo_stack,
18259 @comma_index, @container_type,
18260 @identifier_count_stack, @index_before_arrow,
18261 @interrupted_list, @item_count_stack,
18262 @last_comma_index, @last_dot_index,
18263 @last_nonblank_type, @old_breakpoint_count_stack,
18264 @opening_structure_index_stack, @rfor_semicolon_list,
18265 @has_old_logical_breakpoints, @rand_or_list,
18266 @i_equals, @override_cab3,
18267 @type_sequence_stack,
18270 # these arrays must retain values between calls
18271 my ( @has_broken_sublist, @dont_align, @want_comma_break );
18275 my $list_stress_level;
18277 sub initialize_break_lists {
18279 @has_broken_sublist = ();
18280 @want_comma_break = ();
18282 #---------------------------------------------------
18283 # Set tolerances to prevent formatting instabilities
18284 #---------------------------------------------------
18286 # Define tolerances to use when checking if closed
18287 # containers will fit on one line. This is necessary to avoid
18288 # formatting instability. The basic tolerance is based on the
18291 # - Always allow for at least one extra space after a closing token so
18292 # that we do not strand a comma or semicolon. (oneline.t).
18294 # - Use an increased line length tolerance when -ci > -i to avoid
18295 # blinking states (case b923 and others).
18297 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
18299 # In addition, it may be necessary to use a few extra tolerance spaces
18300 # when -lp is used and/or when -xci is used. The history of this
18301 # so far is as follows:
18303 # FIX1: At least 3 characters were been found to be required for -lp
18304 # to fixes cases b1059 b1063 b1117.
18306 # FIX2: Further testing showed that we need a total of 3 extra spaces
18307 # when -lp is set for non-lists, and at least 2 spaces when -lp and
18309 # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
18310 # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
18313 # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
18314 # 'find_token_starting_list' to go back before an initial blank space.
18315 # This fixed these three cases, and allowed the tolerances to be
18316 # reduced to continue to fix all other known cases of instability.
18317 # This gives the current tolerance formulation.
18321 if ($rOpts_line_up_parentheses) {
18323 # boost tol for combination -lp -xci
18324 if ($rOpts_extended_continuation_indentation) {
18328 # boost tol for combination -lp and any -vtc > 0, but only for
18329 # non-list containers
18331 foreach ( keys %closing_vertical_tightness ) {
18333 unless ( $closing_vertical_tightness{$_} );
18334 $lp_tol_boost = 1; # Fixes B1193;
18340 # Define a level where list formatting becomes highly stressed and
18341 # needs to be simplified. Introduced for case b1262.
18342 $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
18345 } ## end sub initialize_break_lists
18347 # routine to define essential variables when we go 'up' to
18349 sub check_for_new_minimum_depth {
18350 my ( $self, $depth_t, $seqno ) = @_;
18351 if ( $depth_t < $minimum_depth ) {
18353 $minimum_depth = $depth_t;
18355 # these arrays need not retain values between calls
18356 $type_sequence_stack[$depth_t] = $seqno;
18357 $override_cab3[$depth_t] =
18358 $rOpts_comma_arrow_breakpoints == 3
18360 && $self->[_roverride_cab3_]->{$seqno};
18362 $override_cab3[$depth_t] = undef;
18363 $breakpoint_stack[$depth_t] = $starting_breakpoint_count;
18364 $container_type[$depth_t] = EMPTY_STRING;
18365 $identifier_count_stack[$depth_t] = 0;
18366 $index_before_arrow[$depth_t] = -1;
18367 $interrupted_list[$depth_t] = 1;
18368 $item_count_stack[$depth_t] = 0;
18369 $last_nonblank_type[$depth_t] = EMPTY_STRING;
18370 $opening_structure_index_stack[$depth_t] = -1;
18372 $breakpoint_undo_stack[$depth_t] = undef;
18373 $comma_index[$depth_t] = undef;
18374 $last_comma_index[$depth_t] = undef;
18375 $last_dot_index[$depth_t] = undef;
18376 $old_breakpoint_count_stack[$depth_t] = undef;
18377 $has_old_logical_breakpoints[$depth_t] = 0;
18378 $rand_or_list[$depth_t] = [];
18379 $rfor_semicolon_list[$depth_t] = [];
18380 $i_equals[$depth_t] = -1;
18382 # these arrays must retain values between calls
18383 if ( !defined( $has_broken_sublist[$depth_t] ) ) {
18384 $dont_align[$depth_t] = 0;
18385 $has_broken_sublist[$depth_t] = 0;
18386 $want_comma_break[$depth_t] = 0;
18390 } ## end sub check_for_new_minimum_depth
18392 # routine to decide which commas to break at within a container;
18394 # $bp_count = number of comma breakpoints set
18395 # $do_not_break_apart = a flag indicating if container need not
18397 sub set_comma_breakpoints {
18399 my ( $self, $dd, $rbond_strength_bias ) = @_;
18401 my $do_not_break_apart = 0;
18403 # Do not break a list unless there are some non-line-ending commas.
18404 # This avoids getting different results with only non-essential commas,
18406 my $seqno = $type_sequence_stack[$dd];
18407 my $real_comma_count =
18408 $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
18411 if ( $item_count_stack[$dd] ) {
18413 # handle commas not in containers...
18414 if ( $dont_align[$dd] ) {
18415 $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
18418 # handle commas within containers...
18419 elsif ($real_comma_count) {
18420 my $fbc = $forced_breakpoint_count;
18422 # always open comma lists not preceded by keywords,
18423 # barewords, identifiers (that is, anything that doesn't
18424 # look like a function call)
18425 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
18427 $self->set_comma_breakpoints_do(
18430 i_opening_paren => $opening_structure_index_stack[$dd],
18431 i_closing_paren => $i,
18432 item_count => $item_count_stack[$dd],
18433 identifier_count => $identifier_count_stack[$dd],
18434 rcomma_index => $comma_index[$dd],
18435 next_nonblank_type => $next_nonblank_type,
18436 list_type => $container_type[$dd],
18437 interrupted => $interrupted_list[$dd],
18438 rdo_not_break_apart => \$do_not_break_apart,
18439 must_break_open => $must_break_open,
18440 has_broken_sublist => $has_broken_sublist[$dd],
18443 $bp_count = $forced_breakpoint_count - $fbc;
18444 $do_not_break_apart = 0 if $must_break_open;
18447 return ( $bp_count, $do_not_break_apart );
18448 } ## end sub set_comma_breakpoints
18450 # These types are excluded at breakpoints to prevent blinking
18451 # Switched from excluded to included as part of fix for b1214
18452 my %is_uncontained_comma_break_included_type;
18456 my @q = qw< k R } ) ] Y Z U w i q Q .
18457 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
18458 @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
18461 sub do_uncontained_comma_breaks {
18463 # Handle commas not in containers...
18464 # This is a catch-all routine for commas that we
18465 # don't know what to do with because the don't fall
18466 # within containers. We will bias the bond strength
18467 # to break at commas which ended lines in the input
18468 # file. This usually works better than just trying
18469 # to put as many items on a line as possible. A
18470 # downside is that if the input file is garbage it
18471 # won't work very well. However, the user can always
18472 # prevent following the old breakpoints with the
18474 my ( $self, $dd, $rbond_strength_bias ) = @_;
18476 # Check added for issue c131; an error here would be due to an
18477 # error initializing @comma_index when entering depth $dd.
18479 foreach my $ii ( @{ $comma_index[$dd] } ) {
18480 if ( $ii < 0 || $ii > $max_index_to_go ) {
18481 my $KK = $K_to_go[0];
18482 my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
18484 Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
18491 my $old_comma_break_count = 0;
18492 foreach my $ii ( @{ $comma_index[$dd] } ) {
18494 if ( $old_breakpoint_to_go[$ii] ) {
18495 $old_comma_break_count++;
18497 # Store the bias info for use by sub set_bond_strength
18498 push @{$rbond_strength_bias}, [ $ii, $bias ];
18500 # reduce bias magnitude to force breaks in order
18505 # Also put a break before the first comma if
18506 # (1) there was a break there in the input, and
18507 # (2) there was exactly one old break before the first comma break
18508 # (3) OLD: there are multiple old comma breaks
18509 # (3) NEW: there are one or more old comma breaks (see return example)
18510 # (4) the first comma is at the starting level ...
18511 # ... fixes cases b064 b065 b068 b210 b747
18512 # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
18513 # ... fixes b1220. If ci>0 we are in the middle of a snippet,
18514 # maybe because -boc has been forcing out previous lines.
18516 # For example, we will follow the user and break after
18517 # 'print' in this snippet:
18519 # "conformability (Not the same dimension)\n",
18520 # "\t", $have, " is ", text_unit($hu), "\n",
18521 # "\t", $want, " is ", text_unit($wu), "\n",
18524 # Another example, just one comma, where we will break after
18527 # $x * cos($a) - $y * sin($a),
18528 # $x * sin($a) + $y * cos($a);
18530 # Breaking a print statement:
18532 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
18533 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
18535 # But we will not force a break after the opening paren here
18536 # (causes a blinker):
18537 # $heap->{stream}->set_output_filter(
18538 # poe::filter::reference->new('myotherfreezer') ),
18541 my $i_first_comma = $comma_index[$dd]->[0];
18542 my $level_comma = $levels_to_go[$i_first_comma];
18543 my $ci_start = $ci_levels_to_go[0];
18545 # Here we want to use the value of ci before any -xci adjustment
18546 if ( $ci_start && $rOpts_extended_continuation_indentation ) {
18547 my $K0 = $K_to_go[0];
18548 if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
18551 && $old_breakpoint_to_go[$i_first_comma]
18552 && $level_comma == $levels_to_go[0] )
18556 foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
18557 if ( $old_breakpoint_to_go[$ii] ) {
18559 last if ( $obp_count > 1 );
18561 if ( $levels_to_go[$ii] == $level_comma );
18565 # Changed rule from multiple old commas to just one here:
18566 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
18568 my $ibreak_m = $ibreak;
18569 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
18570 if ( $ibreak_m >= 0 ) {
18572 # In order to avoid blinkers we have to be fairly
18576 # Rule 1: Do not to break before an opening token
18577 # Rule 2: avoid breaking at ternary operators
18578 # (see b931, which is similar to the above print example)
18579 # Rule 3: Do not break at chain operators to fix case b1119
18580 # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
18582 # NEW Rule, replaced above rules after case b1214:
18583 # only break at one of the included types
18585 # Be sure to test any changes to these rules against runs
18586 # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
18588 my $type_m = $types_to_go[$ibreak_m];
18590 # Switched from excluded to included for b1214. If necessary
18591 # the token could also be checked if type_m eq 'k'
18592 if ( $is_uncontained_comma_break_included_type{$type_m} ) {
18593 $self->set_forced_breakpoint($ibreak);
18599 } ## end sub do_uncontained_comma_breaks
18601 my %is_logical_container;
18605 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
18606 @is_logical_container{@q} = (1) x scalar(@q);
18608 # This filter will allow most tokens to skip past a section of code
18609 %quick_filter = %is_assignment;
18610 @q = qw# => . ; < > ~ #;
18612 @quick_filter{@q} = (1) x scalar(@q);
18615 sub set_for_semicolon_breakpoints {
18616 my ( $self, $dd ) = @_;
18617 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
18618 $self->set_forced_breakpoint($_);
18623 sub set_logical_breakpoints {
18624 my ( $self, $dd ) = @_;
18626 $item_count_stack[$dd] == 0
18627 && $is_logical_container{ $container_type[$dd] }
18629 || $has_old_logical_breakpoints[$dd]
18633 # Look for breaks in this order:
18636 foreach my $i ( 0 .. 3 ) {
18637 if ( $rand_or_list[$dd][$i] ) {
18638 foreach ( @{ $rand_or_list[$dd][$i] } ) {
18639 $self->set_forced_breakpoint($_);
18642 # break at any 'if' and 'unless' too
18643 foreach ( @{ $rand_or_list[$dd][4] } ) {
18644 $self->set_forced_breakpoint($_);
18646 $rand_or_list[$dd] = [];
18652 } ## end sub set_logical_breakpoints
18654 sub is_unbreakable_container {
18656 # never break a container of one of these types
18657 # because bad things can happen (map1.t)
18659 return $is_sort_map_grep{ $container_type[$dd] };
18664 my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
18666 #----------------------------------------------------------------------
18667 # This routine is called once per batch, if the batch is a list, to set
18668 # line breaks so that hierarchical structure can be displayed and so
18669 # that list items can be vertically aligned. The output of this
18670 # routine is stored in the array @forced_breakpoint_to_go, which is
18671 # used by sub 'break_long_lines' to set final breakpoints.
18672 #----------------------------------------------------------------------
18674 my $rLL = $self->[_rLL_];
18675 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
18676 my $ris_broken_container = $self->[_ris_broken_container_];
18677 my $rbreak_before_container_by_seqno =
18678 $self->[_rbreak_before_container_by_seqno_];
18680 $starting_depth = $nesting_depth_to_go[0];
18682 $block_type = SPACE;
18683 $current_depth = $starting_depth;
18685 $last_nonblank_token = ';';
18686 $last_nonblank_type = ';';
18687 $last_nonblank_block_type = SPACE;
18688 $last_old_breakpoint_count = 0;
18689 $minimum_depth = $current_depth + 1; # forces update in check below
18690 $old_breakpoint_count = 0;
18691 $starting_breakpoint_count = $forced_breakpoint_count;
18694 $type_sequence = EMPTY_STRING;
18696 my $total_depth_variation = 0;
18697 my $i_old_assignment_break;
18698 my $depth_last = $starting_depth;
18699 my $comma_follows_last_closing_token;
18701 $self->check_for_new_minimum_depth( $current_depth,
18702 $parent_seqno_to_go[0] );
18704 my $want_previous_breakpoint = -1;
18706 my $saw_good_breakpoint;
18707 my $i_line_end = -1;
18708 my $i_line_start = -1;
18709 my $i_last_colon = -1;
18711 #----------------------------------------
18712 # Main loop over all tokens in this batch
18713 #----------------------------------------
18714 while ( ++$i <= $max_index_to_go ) {
18715 if ( $type ne 'b' ) {
18716 $i_last_nonblank_token = $i - 1;
18717 $last_nonblank_type = $type;
18718 $last_nonblank_token = $token;
18719 $last_nonblank_block_type = $block_type;
18720 } ## end if ( $type ne 'b' )
18721 $type = $types_to_go[$i];
18722 $block_type = $block_type_to_go[$i];
18723 $token = $tokens_to_go[$i];
18724 $type_sequence = $type_sequence_to_go[$i];
18725 my $next_type = $types_to_go[ $i + 1 ];
18726 my $next_token = $tokens_to_go[ $i + 1 ];
18727 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
18728 $next_nonblank_type = $types_to_go[$i_next_nonblank];
18729 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18730 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18732 # set break if flag was set
18733 if ( $want_previous_breakpoint >= 0 ) {
18734 $self->set_forced_breakpoint($want_previous_breakpoint);
18735 $want_previous_breakpoint = -1;
18738 $last_old_breakpoint_count = $old_breakpoint_count;
18740 # Fixed for case b1097 to not consider old breaks at highly
18741 # stressed locations, such as types 'L' and 'R'. It might be
18742 # useful to generalize this concept in the future by looking at
18743 # actual bond strengths.
18744 if ( $old_breakpoint_to_go[$i]
18746 && $next_nonblank_type ne 'R' )
18749 $i_line_start = $i_next_nonblank;
18751 $old_breakpoint_count++;
18753 # Break before certain keywords if user broke there and
18754 # this is a 'safe' break point. The idea is to retain
18755 # any preferred breaks for sequential list operations,
18756 # like a schwartzian transform.
18757 if ($rOpts_break_at_old_keyword_breakpoints) {
18759 $next_nonblank_type eq 'k'
18760 && $is_keyword_returning_list{$next_nonblank_token}
18761 && ( $type =~ /^[=\)\]\}Riw]$/
18763 && $is_keyword_returning_list{$token} )
18767 # we actually have to set this break next time through
18768 # the loop because if we are at a closing token (such
18769 # as '}') which forms a one-line block, this break might
18772 # And do not do this at an equals if the user wants
18773 # breaks before an equals (blinker cases b434 b903)
18774 unless ( $type eq '=' && $want_break_before{$type} ) {
18775 $want_previous_breakpoint = $i;
18777 } ## end if ( $next_nonblank_type...)
18778 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
18780 # Break before attributes if user broke there
18781 if ($rOpts_break_at_old_attribute_breakpoints) {
18782 if ( $next_nonblank_type eq 'A' ) {
18783 $want_previous_breakpoint = $i;
18787 # remember an = break as possible good break point
18788 if ( $is_assignment{$type} ) {
18789 $i_old_assignment_break = $i;
18791 elsif ( $is_assignment{$next_nonblank_type} ) {
18792 $i_old_assignment_break = $i_next_nonblank;
18794 } ## end if ( $old_breakpoint_to_go...)
18796 next if ( $type eq 'b' );
18797 $depth = $nesting_depth_to_go[ $i + 1 ];
18799 $total_depth_variation += abs( $depth - $depth_last );
18800 $depth_last = $depth;
18802 # safety check - be sure we always break after a comment
18803 # Shouldn't happen .. an error here probably means that the
18804 # nobreak flag did not get turned off correctly during
18806 if ( $type eq '#' ) {
18807 if ( $i != $max_index_to_go ) {
18810 Non-fatal program bug: backup logic required to break after a comment
18813 $nobreak_to_go[$i] = 0;
18814 $self->set_forced_breakpoint($i);
18815 } ## end if ( $i != $max_index_to_go)
18816 } ## end if ( $type eq '#' )
18818 # Force breakpoints at certain tokens in long lines.
18819 # Note that such breakpoints will be undone later if these tokens
18820 # are fully contained within parens on a line.
18823 # break before a keyword within a line
18827 # if one of these keywords:
18828 && $is_if_unless_while_until_for_foreach{$token}
18830 # but do not break at something like '1 while'
18831 && ( $last_nonblank_type ne 'n' || $i > 2 )
18833 # and let keywords follow a closing 'do' brace
18834 && $last_nonblank_block_type ne 'do'
18839 # or container is broken (by side-comment, etc)
18840 || ( $next_nonblank_token eq '('
18841 && $mate_index_to_go[$i_next_nonblank] < $i )
18845 $self->set_forced_breakpoint( $i - 1 );
18846 } ## end if ( $type eq 'k' && $i...)
18848 # remember locations of '||' and '&&' for possible breaks if we
18849 # decide this is a long logical expression.
18850 if ( $type eq '||' ) {
18851 push @{ $rand_or_list[$depth][2] }, $i;
18852 ++$has_old_logical_breakpoints[$depth]
18853 if ( ( $i == $i_line_start || $i == $i_line_end )
18854 && $rOpts_break_at_old_logical_breakpoints );
18855 } ## end elsif ( $type eq '||' )
18856 elsif ( $type eq '&&' ) {
18857 push @{ $rand_or_list[$depth][3] }, $i;
18858 ++$has_old_logical_breakpoints[$depth]
18859 if ( ( $i == $i_line_start || $i == $i_line_end )
18860 && $rOpts_break_at_old_logical_breakpoints );
18861 } ## end elsif ( $type eq '&&' )
18862 elsif ( $type eq 'f' ) {
18863 push @{ $rfor_semicolon_list[$depth] }, $i;
18865 elsif ( $type eq 'k' ) {
18866 if ( $token eq 'and' ) {
18867 push @{ $rand_or_list[$depth][1] }, $i;
18868 ++$has_old_logical_breakpoints[$depth]
18869 if ( ( $i == $i_line_start || $i == $i_line_end )
18870 && $rOpts_break_at_old_logical_breakpoints );
18871 } ## end if ( $token eq 'and' )
18873 # break immediately at 'or's which are probably not in a logical
18874 # block -- but we will break in logical breaks below so that
18875 # they do not add to the forced_breakpoint_count
18876 elsif ( $token eq 'or' ) {
18877 push @{ $rand_or_list[$depth][0] }, $i;
18878 ++$has_old_logical_breakpoints[$depth]
18879 if ( ( $i == $i_line_start || $i == $i_line_end )
18880 && $rOpts_break_at_old_logical_breakpoints );
18881 if ( $is_logical_container{ $container_type[$depth] } ) {
18884 if ($is_long_line) { $self->set_forced_breakpoint($i) }
18885 elsif ( ( $i == $i_line_start || $i == $i_line_end )
18886 && $rOpts_break_at_old_logical_breakpoints )
18888 $saw_good_breakpoint = 1;
18890 } ## end else [ if ( $is_logical_container...)]
18891 } ## end elsif ( $token eq 'or' )
18892 elsif ( $token eq 'if' || $token eq 'unless' ) {
18893 push @{ $rand_or_list[$depth][4] }, $i;
18894 if ( ( $i == $i_line_start || $i == $i_line_end )
18895 && $rOpts_break_at_old_logical_breakpoints )
18897 $self->set_forced_breakpoint($i);
18899 } ## end elsif ( $token eq 'if' ||...)
18900 } ## end elsif ( $type eq 'k' )
18901 elsif ( $is_assignment{$type} ) {
18902 $i_equals[$depth] = $i;
18905 if ($type_sequence) {
18907 # handle any postponed closing breakpoints
18908 if ( $is_closing_sequence_token{$token} ) {
18909 if ( $type eq ':' ) {
18910 $i_last_colon = $i;
18912 # retain break at a ':' line break
18913 if ( ( $i == $i_line_start || $i == $i_line_end )
18914 && $rOpts_break_at_old_ternary_breakpoints
18915 && $levels_to_go[$i] < $list_stress_level )
18918 $self->set_forced_breakpoint($i);
18920 # Break at a previous '=', but only if it is before
18921 # the mating '?'. Mate_index test fixes b1287.
18922 my $ieq = $i_equals[$depth];
18923 if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
18924 $self->set_forced_breakpoint(
18925 $i_equals[$depth] );
18926 $i_equals[$depth] = -1;
18928 } ## end if ( ( $i == $i_line_start...))
18929 } ## end if ( $type eq ':' )
18930 if ( has_postponed_breakpoint($type_sequence) ) {
18931 my $inc = ( $type eq ':' ) ? 0 : 1;
18932 if ( $i >= $inc ) {
18933 $self->set_forced_breakpoint( $i - $inc );
18936 } ## end if ( $is_closing_sequence_token{$token} )
18938 # set breaks at ?/: if they will get separated (and are
18939 # not a ?/: chain), or if the '?' is at the end of the
18941 elsif ( $token eq '?' ) {
18942 my $i_colon = $mate_index_to_go[$i];
18944 $i_colon <= 0 # the ':' is not in this batch
18945 || $i == 0 # this '?' is the first token of the line
18947 $max_index_to_go # or this '?' is the last token
18951 # don't break if # this has a side comment, and
18952 # don't break at a '?' if preceded by ':' on
18953 # this line of previous ?/: pair on this line.
18954 # This is an attempt to preserve a chain of ?/:
18955 # expressions (elsif2.t).
18959 || $parent_seqno_to_go[$i_last_colon] !=
18960 $parent_seqno_to_go[$i]
18962 && $tokens_to_go[$max_index_to_go] ne '#'
18965 $self->set_forced_breakpoint($i);
18967 $self->set_closing_breakpoint($i);
18968 } ## end if ( $i_colon <= 0 ||...)
18969 } ## end elsif ( $token eq '?' )
18971 elsif ( $is_opening_token{$token} ) {
18973 # do requested -lp breaks at the OPENING token for BROKEN
18974 # blocks. NOTE: this can be done for both -lp and -xlp,
18975 # but only -xlp can really take advantage of this. So this
18976 # is currently restricted to -xlp to avoid excess changes to
18977 # existing -lp formatting.
18978 if ( $rOpts_extended_line_up_parentheses
18979 && $mate_index_to_go[$i] < 0 )
18982 $self->[_rlp_object_by_seqno_]->{$type_sequence};
18984 my $K_begin_line = $lp_object->get_K_begin_line();
18985 my $i_begin_line = $K_begin_line - $K_to_go[0];
18986 $self->set_forced_lp_break( $i_begin_line, $i );
18991 } ## end if ($type_sequence)
18993 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
18995 #------------------------------------------------------------
18996 # Handle Increasing Depth..
18998 # prepare for a new list when depth increases
18999 # token $i is a '(','{', or '['
19000 #------------------------------------------------------------
19001 # hardened against bad input syntax: depth jump must be 1 and type
19002 # must be opening..fixes c102
19003 if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
19005 #----------------------------------------------------------
19006 # BEGIN initialize depth arrays
19007 # ... use the same order as sub check_for_new_minimum_depth
19008 #----------------------------------------------------------
19009 $type_sequence_stack[$depth] = $type_sequence;
19010 $override_cab3[$depth] =
19011 $rOpts_comma_arrow_breakpoints == 3
19013 && $self->[_roverride_cab3_]->{$type_sequence};
19015 $breakpoint_stack[$depth] = $forced_breakpoint_count;
19016 $container_type[$depth] =
19019 $is_container_label_type{$last_nonblank_type}
19020 ? $last_nonblank_token
19022 $identifier_count_stack[$depth] = 0;
19023 $index_before_arrow[$depth] = -1;
19024 $interrupted_list[$depth] = 0;
19025 $item_count_stack[$depth] = 0;
19026 $last_nonblank_type[$depth] = $last_nonblank_type;
19027 $opening_structure_index_stack[$depth] = $i;
19029 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
19030 $comma_index[$depth] = undef;
19031 $last_comma_index[$depth] = undef;
19032 $last_dot_index[$depth] = undef;
19033 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
19034 $has_old_logical_breakpoints[$depth] = 0;
19035 $rand_or_list[$depth] = [];
19036 $rfor_semicolon_list[$depth] = [];
19037 $i_equals[$depth] = -1;
19039 # if line ends here then signal closing token to break
19040 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
19042 $self->set_closing_breakpoint($i);
19045 # Not all lists of values should be vertically aligned..
19046 $dont_align[$depth] =
19048 # code BLOCKS are handled at a higher level
19049 ( $block_type ne EMPTY_STRING )
19051 # certain paren lists
19052 || ( $type eq '(' ) && (
19054 # it does not usually look good to align a list of
19055 # identifiers in a parameter list, as in:
19056 # my($var1, $var2, ...)
19057 # (This test should probably be refined, for now I'm just
19058 # testing for any keyword)
19059 ( $last_nonblank_type eq 'k' )
19061 # a trailing '(' usually indicates a non-list
19062 || ( $next_nonblank_type eq '(' )
19064 $has_broken_sublist[$depth] = 0;
19065 $want_comma_break[$depth] = 0;
19067 #-------------------------------------
19068 # END initialize depth arrays
19069 #-------------------------------------
19071 # patch to outdent opening brace of long if/for/..
19072 # statements (like this one). See similar coding in
19073 # set_continuation breaks. We have also catch it here for
19074 # short line fragments which otherwise will not go through
19075 # break_long_lines.
19079 # if we have the ')' but not its '(' in this batch..
19080 && ( $last_nonblank_token eq ')' )
19081 && $mate_index_to_go[$i_last_nonblank_token] < 0
19083 # and user wants brace to left
19084 && !$rOpts_opening_brace_always_on_right
19086 && ( $type eq '{' ) # should be true
19087 && ( $token eq '{' ) # should be true
19090 $self->set_forced_breakpoint( $i - 1 );
19091 } ## end if ( $block_type && ( ...))
19092 } ## end if ( $depth > $current_depth)
19094 #------------------------------------------------------------
19095 # Handle Decreasing Depth..
19097 # finish off any old list when depth decreases
19098 # token $i is a ')','}', or ']'
19099 #------------------------------------------------------------
19100 # hardened against bad input syntax: depth jump must be 1 and type
19101 # must be closing .. fixes c102
19102 elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
19104 $self->check_for_new_minimum_depth( $depth,
19105 $parent_seqno_to_go[$i] );
19107 $comma_follows_last_closing_token =
19108 $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
19110 # force all outer logical containers to break after we see on
19112 $has_old_logical_breakpoints[$depth] ||=
19113 $has_old_logical_breakpoints[$current_depth];
19115 # Patch to break between ') {' if the paren list is broken.
19116 # There is similar logic in break_long_lines for
19117 # non-broken lists.
19119 && $next_nonblank_block_type
19120 && $interrupted_list[$current_depth]
19121 && $next_nonblank_type eq '{'
19122 && !$rOpts_opening_brace_always_on_right )
19124 $self->set_forced_breakpoint($i);
19125 } ## end if ( $token eq ')' && ...
19127 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
19129 # set breaks at commas if necessary
19130 my ( $bp_count, $do_not_break_apart ) =
19131 $self->set_comma_breakpoints( $current_depth,
19132 $rbond_strength_bias );
19134 my $i_opening = $opening_structure_index_stack[$current_depth];
19135 my $saw_opening_structure = ( $i_opening >= 0 );
19137 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
19138 $lp_object = $self->[_rlp_object_by_seqno_]
19139 ->{ $type_sequence_to_go[$i_opening] };
19142 # this term is long if we had to break at interior commas..
19143 my $is_long_term = $bp_count > 0;
19145 # If this is a short container with one or more comma arrows,
19146 # then we will mark it as a long term to open it if requested.
19147 # $rOpts_comma_arrow_breakpoints =
19148 # 0 - open only if comma precedes closing brace
19149 # 1 - stable: except for one line blocks
19150 # 2 - try to form 1 line blocks
19152 # 4 - always open up if vt=0
19153 # 5 - stable: even for one line blocks if vt=0
19155 # PATCH: Modify the -cab flag if we are not processing a list:
19156 # We only want the -cab flag to apply to list containers, so
19157 # for non-lists we use the default and stable -cab=5 value.
19158 # Fixes case b939a.
19159 my $cab_flag = $rOpts_comma_arrow_breakpoints;
19160 if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
19164 # Ignore old breakpoints when under stress.
19165 # Fixes b1203 b1204 as well as b1197-b1200.
19166 # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
19167 # b1264 to see if this check is still required at all, and
19168 # these still require a check, but at higher level beta+3
19169 # instead of beta: b1193 b780
19170 if ( $saw_opening_structure
19172 && $levels_to_go[$i_opening] >= $list_stress_level )
19176 # Do not break hash braces under stress (fixes b1238)
19177 $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
19179 # This option fixes b1235, b1237, b1240 with old and new
19180 # -lp, but formatting is nicer with next option.
19181 ## $is_long_term ||=
19182 ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
19184 # This option fixes b1240 but not b1235, b1237 with new -lp,
19185 # but this gives better formatting than the previous option.
19186 $do_not_break_apart ||=
19187 $levels_to_go[$i_opening] > $stress_level_beta;
19190 if ( !$is_long_term
19191 && $saw_opening_structure
19192 && $is_opening_token{ $tokens_to_go[$i_opening] }
19193 && $index_before_arrow[ $depth + 1 ] > 0
19194 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
19199 || $cab_flag == 0 && $last_nonblank_token eq ','
19200 || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
19201 } ## end if ( !$is_long_term &&...)
19203 # mark term as long if the length between opening and closing
19204 # parens exceeds allowed line length
19205 if ( !$is_long_term && $saw_opening_structure ) {
19207 my $i_opening_minus =
19208 $self->find_token_starting_list($i_opening);
19211 $self->excess_line_length( $i_opening_minus, $i );
19213 # Use standard spaces for indentation of lists in -lp mode
19214 # if it gives a longer line length. This helps to avoid an
19215 # instability due to forming and breaking one-line blocks.
19216 # This fixes case b1314.
19217 my $indentation = $leading_spaces_to_go[$i_opening_minus];
19218 if ( ref($indentation)
19219 && $ris_broken_container->{$type_sequence} )
19221 my $lp_spaces = $indentation->get_spaces();
19222 my $std_spaces = $indentation->get_standard_spaces();
19223 my $diff = $std_spaces - $lp_spaces;
19224 if ( $diff > 0 ) { $excess += $diff }
19227 my $tol = $length_tol;
19229 # boost tol for an -lp container
19233 && ( $rOpts_extended_continuation_indentation
19234 || !$ris_list_by_seqno->{$type_sequence} )
19237 $tol += $lp_tol_boost;
19240 # Patch to avoid blinking with -bbxi=2 and -cab=2
19241 # in which variations in -ci cause unstable formatting
19242 # in edge cases. We just always add one ci level so that
19243 # the formatting is independent of the -BBX results.
19244 # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
19245 # b1161 b1166 b1167 b1168
19246 if ( !$ci_levels_to_go[$i_opening]
19247 && $rbreak_before_container_by_seqno->{$type_sequence} )
19249 $tol += $rOpts->{'continuation-indentation'};
19252 $is_long_term = $excess + $tol > 0;
19254 } ## end if ( !$is_long_term &&...)
19256 # We've set breaks after all comma-arrows. Now we have to
19257 # undo them if this can be a one-line block
19258 # (the only breakpoints set will be due to comma-arrows)
19262 # user doesn't require breaking after all comma-arrows
19263 ( $cab_flag != 0 ) && ( $cab_flag != 4 )
19265 # and if the opening structure is in this batch
19266 && $saw_opening_structure
19268 # and either on the same old line
19270 $old_breakpoint_count_stack[$current_depth] ==
19271 $last_old_breakpoint_count
19273 # or user wants to form long blocks with arrows
19276 # if -cab=3 is overridden then use -cab=2 behavior
19277 || $cab_flag == 3 && $override_cab3[$current_depth]
19280 # and we made breakpoints between the opening and closing
19281 && ( $breakpoint_undo_stack[$current_depth] <
19282 $forced_breakpoint_undo_count )
19284 # and this block is short enough to fit on one line
19285 # Note: use < because need 1 more space for possible comma
19290 $self->undo_forced_breakpoint_stack(
19291 $breakpoint_undo_stack[$current_depth] );
19292 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
19294 # now see if we have any comma breakpoints left
19295 my $has_comma_breakpoints =
19296 ( $breakpoint_stack[$current_depth] !=
19297 $forced_breakpoint_count );
19299 # update broken-sublist flag of the outer container
19300 $has_broken_sublist[$depth] =
19301 $has_broken_sublist[$depth]
19302 || $has_broken_sublist[$current_depth]
19304 || $has_comma_breakpoints;
19306 # Having come to the closing ')', '}', or ']', now we have to decide if we
19307 # should 'open up' the structure by placing breaks at the opening and
19308 # closing containers. This is a tricky decision. Here are some of the
19309 # basic considerations:
19311 # -If this is a BLOCK container, then any breakpoints will have already
19312 # been set (and according to user preferences), so we need do nothing here.
19314 # -If we have a comma-separated list for which we can align the list items,
19315 # then we need to do so because otherwise the vertical aligner cannot
19316 # currently do the alignment.
19318 # -If this container does itself contain a container which has been broken
19319 # open, then it should be broken open to properly show the structure.
19321 # -If there is nothing to align, and no other reason to break apart,
19322 # then do not do it.
19324 # We will not break open the parens of a long but 'simple' logical expression.
19327 # This is an example of a simple logical expression and its formatting:
19329 # if ( $bigwasteofspace1 && $bigwasteofspace2
19330 # || $bigwasteofspace3 && $bigwasteofspace4 )
19332 # Most people would prefer this than the 'spacey' version:
19335 # $bigwasteofspace1 && $bigwasteofspace2
19336 # || $bigwasteofspace3 && $bigwasteofspace4
19339 # To illustrate the rules for breaking logical expressions, consider:
19343 # and ( exists $ids_excl_uc{$id_uc}
19344 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
19346 # This is on the verge of being difficult to read. The current default is to
19347 # open it up like this:
19352 # and ( exists $ids_excl_uc{$id_uc}
19353 # or grep $id_uc =~ /$_/, @ids_excl_uc )
19356 # This is a compromise which tries to avoid being too dense and to spacey.
19357 # A more spaced version would be:
19363 # exists $ids_excl_uc{$id_uc}
19364 # or grep $id_uc =~ /$_/, @ids_excl_uc
19368 # Some people might prefer the spacey version -- an option could be added. The
19369 # innermost expression contains a long block '( exists $ids_... ')'.
19371 # Here is how the logic goes: We will force a break at the 'or' that the
19372 # innermost expression contains, but we will not break apart its opening and
19373 # closing containers because (1) it contains no multi-line sub-containers itself,
19374 # and (2) there is no alignment to be gained by breaking it open like this
19377 # exists $ids_excl_uc{$id_uc}
19378 # or grep $id_uc =~ /$_/, @ids_excl_uc
19381 # (although this looks perfectly ok and might be good for long expressions). The
19382 # outer 'if' container, though, contains a broken sub-container, so it will be
19383 # broken open to avoid too much density. Also, since it contains no 'or's, there
19384 # will be a forced break at its 'and'.
19386 # Open-up if parens if requested. We do this by pretending we
19387 # did not see the opening structure, since in that case parens
19388 # always get opened up.
19389 if ( $saw_opening_structure
19390 && $rOpts_break_open_compact_parens )
19393 # This parameter is a one-character flag, as follows:
19394 # '0' matches no parens -> break open NOT OK
19395 # '1' matches all parens -> break open OK
19396 # Other values are same as used by the weld-exclusion-list
19397 my $flag = $rOpts_break_open_compact_parens;
19401 $saw_opening_structure = 0;
19404 my $KK = $K_to_go[$i_opening];
19405 $saw_opening_structure =
19406 !$self->match_paren_flag( $KK, $flag );
19410 # set some flags telling something about this container..
19411 my $is_simple_logical_expression = 0;
19412 if ( $item_count_stack[$current_depth] == 0
19413 && $saw_opening_structure
19414 && $tokens_to_go[$i_opening] eq '('
19415 && $is_logical_container{ $container_type[$current_depth] }
19419 # This seems to be a simple logical expression with
19420 # no existing breakpoints. Set a flag to prevent
19422 if ( !$has_comma_breakpoints ) {
19423 $is_simple_logical_expression = 1;
19426 # This seems to be a simple logical expression with
19427 # breakpoints (broken sublists, for example). Break
19428 # at all 'or's and '||'s.
19430 $self->set_logical_breakpoints($current_depth);
19432 } ## end if ( $item_count_stack...)
19435 && @{ $rfor_semicolon_list[$current_depth] } )
19437 $self->set_for_semicolon_breakpoints($current_depth);
19439 # open up a long 'for' or 'foreach' container to allow
19440 # leading term alignment unless -lp is used.
19441 $has_comma_breakpoints = 1 unless ($lp_object);
19442 } ## end if ( $is_long_term && ...)
19446 # breaks for code BLOCKS are handled at a higher level
19449 # we do not need to break at the top level of an 'if'
19451 && !$is_simple_logical_expression
19453 ## modification to keep ': (' containers vertically tight;
19454 ## but probably better to let user set -vt=1 to avoid
19455 ## inconsistency with other paren types
19456 ## && ($container_type[$current_depth] ne ':')
19458 # otherwise, we require one of these reasons for breaking:
19461 # - this term has forced line breaks
19462 $has_comma_breakpoints
19464 # - the opening container is separated from this batch
19465 # for some reason (comment, blank line, code block)
19466 # - this is a non-paren container spanning multiple lines
19467 || !$saw_opening_structure
19469 # - this is a long block contained in another breakable
19471 || $is_long_term && !$self->is_in_block_by_i($i_opening)
19476 # do special -lp breaks at the CLOSING token for INTACT
19477 # blocks (because we might not do them if the block does
19480 my $K_begin_line = $lp_object->get_K_begin_line();
19481 my $i_begin_line = $K_begin_line - $K_to_go[0];
19482 $self->set_forced_lp_break( $i_begin_line, $i_opening );
19485 # break after opening structure.
19486 # note: break before closing structure will be automatic
19487 if ( $minimum_depth <= $current_depth ) {
19489 if ( $i_opening >= 0 ) {
19490 $self->set_forced_breakpoint($i_opening)
19491 unless ( $do_not_break_apart
19492 || is_unbreakable_container($current_depth) );
19495 # break at ',' of lower depth level before opening token
19496 if ( $last_comma_index[$depth] ) {
19497 $self->set_forced_breakpoint(
19498 $last_comma_index[$depth] );
19501 # break at '.' of lower depth level before opening token
19502 if ( $last_dot_index[$depth] ) {
19503 $self->set_forced_breakpoint(
19504 $last_dot_index[$depth] );
19507 # break before opening structure if preceded by another
19508 # closing structure and a comma. This is normally
19509 # done by the previous closing brace, but not
19510 # if it was a one-line block.
19511 if ( $i_opening > 2 ) {
19513 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
19518 $types_to_go[$i_prev] eq ','
19519 && ( $types_to_go[ $i_prev - 1 ] eq ')'
19520 || $types_to_go[ $i_prev - 1 ] eq '}' )
19523 $self->set_forced_breakpoint($i_prev);
19526 # also break before something like ':(' or '?('
19529 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
19531 my $token_prev = $tokens_to_go[$i_prev];
19532 if ( $want_break_before{$token_prev} ) {
19533 $self->set_forced_breakpoint($i_prev);
19535 } ## end elsif ( $types_to_go[$i_prev...])
19536 } ## end if ( $i_opening > 2 )
19537 } ## end if ( $minimum_depth <=...)
19539 # break after comma following closing structure
19540 if ( $next_type eq ',' ) {
19541 $self->set_forced_breakpoint( $i + 1 );
19544 # break before an '=' following closing structure
19546 $is_assignment{$next_nonblank_type}
19547 && ( $breakpoint_stack[$current_depth] !=
19548 $forced_breakpoint_count )
19551 $self->set_forced_breakpoint($i);
19552 } ## end if ( $is_assignment{$next_nonblank_type...})
19554 # break at any comma before the opening structure Added
19555 # for -lp, but seems to be good in general. It isn't
19556 # obvious how far back to look; the '5' below seems to
19557 # work well and will catch the comma in something like
19558 # push @list, myfunc( $param, $param, ..
19560 my $icomma = $last_comma_index[$depth];
19561 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
19562 unless ( $forced_breakpoint_to_go[$icomma] ) {
19563 $self->set_forced_breakpoint($icomma);
19566 } ## end logic to open up a container
19568 # Break open a logical container open if it was already open
19569 elsif ($is_simple_logical_expression
19570 && $has_old_logical_breakpoints[$current_depth] )
19572 $self->set_logical_breakpoints($current_depth);
19575 # Handle long container which does not get opened up
19576 elsif ($is_long_term) {
19578 # must set fake breakpoint to alert outer containers that
19580 set_fake_breakpoint();
19581 } ## end elsif ($is_long_term)
19583 } ## end elsif ( $depth < $current_depth)
19585 #------------------------------------------------------------
19586 # Handle this token
19587 #------------------------------------------------------------
19589 $current_depth = $depth;
19591 # most token types can skip the rest of this loop
19592 next unless ( $quick_filter{$type} );
19594 # handle comma-arrow
19595 if ( $type eq '=>' ) {
19596 next if ( $last_nonblank_type eq '=>' );
19597 next if $rOpts_break_at_old_comma_breakpoints;
19599 if ( $rOpts_comma_arrow_breakpoints == 3
19600 && !$override_cab3[$depth] );
19601 $want_comma_break[$depth] = 1;
19602 $index_before_arrow[$depth] = $i_last_nonblank_token;
19604 } ## end if ( $type eq '=>' )
19606 elsif ( $type eq '.' ) {
19607 $last_dot_index[$depth] = $i;
19610 # Turn off alignment if we are sure that this is not a list
19611 # environment. To be safe, we will do this if we see certain
19612 # non-list tokens, such as ';', and also the environment is
19613 # not a list. Note that '=' could be in any of the = operators
19614 # (lextest.t). We can't just use the reported environment
19615 # because it can be incorrect in some cases.
19616 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
19617 && !$self->is_in_list_by_i($i) )
19619 $dont_align[$depth] = 1;
19620 $want_comma_break[$depth] = 0;
19621 $index_before_arrow[$depth] = -1;
19622 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
19624 # now just handle any commas
19625 next unless ( $type eq ',' );
19627 $last_dot_index[$depth] = undef;
19628 $last_comma_index[$depth] = $i;
19630 # break here if this comma follows a '=>'
19631 # but not if there is a side comment after the comma
19632 if ( $want_comma_break[$depth] ) {
19634 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
19635 if ($rOpts_comma_arrow_breakpoints) {
19636 $want_comma_break[$depth] = 0;
19641 $self->set_forced_breakpoint($i)
19642 unless ( $next_nonblank_type eq '#' );
19644 # break before the previous token if it looks safe
19645 # Example of something that we will not try to break before:
19646 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
19647 # Also we don't want to break at a binary operator (like +):
19651 # $y - $R, -fill => 'black',
19653 my $ibreak = $index_before_arrow[$depth] - 1;
19655 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
19657 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
19658 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
19659 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
19661 # don't break pointer calls, such as the following:
19662 # File::Spec->curdir => 1,
19663 # (This is tokenized as adjacent 'w' tokens)
19664 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
19666 # And don't break before a comma, as in the following:
19667 # ( LONGER_THAN,=> 1,
19668 # EIGHTY_CHARACTERS,=> 2,
19669 # CAUSES_FORMATTING,=> 3,
19672 # This example is for -tso but should be general rule
19673 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
19674 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
19676 $self->set_forced_breakpoint($ibreak);
19678 } ## end if ( $types_to_go[$ibreak...])
19679 } ## end if ( $ibreak > 0 && $tokens_to_go...)
19681 $want_comma_break[$depth] = 0;
19682 $index_before_arrow[$depth] = -1;
19684 # handle list which mixes '=>'s and ','s:
19685 # treat any list items so far as an interrupted list
19686 $interrupted_list[$depth] = 1;
19688 } ## end if ( $want_comma_break...)
19690 # Break after all commas above starting depth...
19691 # But only if the last closing token was followed by a comma,
19692 # to avoid breaking a list operator (issue c119)
19693 if ( $depth < $starting_depth
19694 && $comma_follows_last_closing_token
19695 && !$dont_align[$depth] )
19697 $self->set_forced_breakpoint($i)
19698 unless ( $next_nonblank_type eq '#' );
19702 # add this comma to the list..
19703 my $item_count = $item_count_stack[$depth];
19704 if ( $item_count == 0 ) {
19706 # but do not form a list with no opening structure
19709 # open INFILE_COPY, ">$input_file_copy"
19710 # or die ("very long message");
19711 if ( ( $opening_structure_index_stack[$depth] < 0 )
19712 && $self->is_in_block_by_i($i) )
19714 $dont_align[$depth] = 1;
19716 } ## end if ( $item_count == 0 )
19718 $comma_index[$depth][$item_count] = $i;
19719 ++$item_count_stack[$depth];
19720 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
19721 $identifier_count_stack[$depth]++;
19723 } ## end while ( ++$i <= $max_index_to_go)
19725 #-------------------------------------------
19726 # end of loop over all tokens in this batch
19727 #-------------------------------------------
19729 # set breaks for any unfinished lists ..
19730 foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
19732 $interrupted_list[$dd] = 1;
19733 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
19734 $self->set_comma_breakpoints( $dd, $rbond_strength_bias );
19735 $self->set_logical_breakpoints($dd)
19736 if ( $has_old_logical_breakpoints[$dd] );
19737 $self->set_for_semicolon_breakpoints($dd);
19739 # break open container...
19740 my $i_opening = $opening_structure_index_stack[$dd];
19741 if ( defined($i_opening) && $i_opening >= 0 ) {
19742 $self->set_forced_breakpoint($i_opening)
19744 is_unbreakable_container($dd)
19746 # Avoid a break which would place an isolated ' or "
19749 && $i_opening >= $max_index_to_go - 2
19750 && ( $token eq "'" || $token eq '"' ) )
19753 } ## end for ( my $dd = $current_depth...)
19755 # Return a flag indicating if the input file had some good breakpoints.
19756 # This flag will be used to force a break in a line shorter than the
19757 # allowed line length.
19758 if ( $has_old_logical_breakpoints[$current_depth] ) {
19759 $saw_good_breakpoint = 1;
19762 # A complex line with one break at an = has a good breakpoint.
19763 # This is not complex ($total_depth_variation=0):
19767 # This is complex ($total_depth_variation=6):
19769 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
19771 # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
19772 elsif ($i_old_assignment_break
19773 && $total_depth_variation > 4
19774 && $old_breakpoint_count == 1
19775 && $i_old_assignment_break < $max_index_to_go )
19777 $saw_good_breakpoint = 1;
19778 } ## end elsif ( $i_old_assignment_break...)
19780 return $saw_good_breakpoint;
19781 } ## end sub break_lists
19782 } ## end closure break_lists
19789 # Added 'w' to fix b1172
19790 my @q = qw(k w i Z ->);
19791 @is_kwiZ{@q} = (1) x scalar(@q);
19793 # added = for b1211
19794 @q = qw<( [ { L R } ] ) = b>;
19796 @is_key_type{@q} = (1) x scalar(@q);
19799 use constant DEBUG_FIND_START => 0;
19801 sub find_token_starting_list {
19803 # When testing to see if a block will fit on one line, some
19804 # previous token(s) may also need to be on the line; particularly
19805 # if this is a sub call. So we will look back at least one
19807 my ( $self, $i_opening_paren ) = @_;
19809 # This will be the return index
19810 my $i_opening_minus = $i_opening_paren;
19812 goto RETURN if ( $i_opening_minus <= 0 );
19814 my $im1 = $i_opening_paren - 1;
19815 my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
19816 if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
19818 $type_prev_nb = $types_to_go[$iprev_nb];
19821 if ( $type_prev_nb eq ',' ) {
19823 # a previous comma is a good break point
19824 # $i_opening_minus = $i_opening_paren;
19828 $tokens_to_go[$i_opening_paren] eq '('
19830 # non-parens added here to fix case b1186
19831 || $is_kwiZ{$type_prev_nb}
19834 $i_opening_minus = $im1;
19836 # Walk back to improve length estimate...
19837 # FIX for cases b1169 b1170 b1171: start walking back
19838 # at the previous nonblank. This makes the result insensitive
19839 # to the flag --space-function-paren, and similar.
19840 # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
19841 foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
19842 if ( $is_key_type{ $types_to_go[$j] } ) {
19845 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
19848 $i_opening_minus = $j;
19850 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
19855 DEBUG_FIND_START && print <<EOM;
19856 FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
19859 return $i_opening_minus;
19860 } ## end sub find_token_starting_list
19862 { ## begin closure set_comma_breakpoints_do
19864 my %is_keyword_with_special_leading_term;
19868 # These keywords have prototypes which allow a special leading item
19869 # followed by a list
19871 qw(formline grep kill map printf sprintf push chmod join pack unshift);
19872 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
19875 use constant DEBUG_SPARSE => 0;
19877 sub set_comma_breakpoints_do {
19879 # Given a list with some commas, set breakpoints at some of the
19880 # commas, if necessary, to make it easy to read.
19882 my ( $self, $rinput_hash ) = @_;
19884 my $depth = $rinput_hash->{depth};
19885 my $i_opening_paren = $rinput_hash->{i_opening_paren};
19886 my $i_closing_paren = $rinput_hash->{i_closing_paren};
19887 my $item_count = $rinput_hash->{item_count};
19888 my $identifier_count = $rinput_hash->{identifier_count};
19889 my $rcomma_index = $rinput_hash->{rcomma_index};
19890 my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
19891 my $list_type = $rinput_hash->{list_type};
19892 my $interrupted = $rinput_hash->{interrupted};
19893 my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
19894 my $must_break_open = $rinput_hash->{must_break_open};
19895 my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
19897 # nothing to do if no commas seen
19898 return if ( $item_count < 1 );
19900 my $i_first_comma = $rcomma_index->[0];
19901 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
19902 my $i_last_comma = $i_true_last_comma;
19903 if ( $i_last_comma >= $max_index_to_go ) {
19904 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
19905 return if ( $item_count < 1 );
19907 my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
19909 #---------------------------------------------------------------
19910 # find lengths of all items in the list to calculate page layout
19911 #---------------------------------------------------------------
19912 my $comma_count = $item_count;
19918 my @max_length = ( 0, 0 );
19919 my $first_term_length;
19920 my $i = $i_opening_paren;
19923 foreach my $j ( 0 .. $comma_count - 1 ) {
19924 $is_odd = 1 - $is_odd;
19925 $i_prev_plus = $i + 1;
19926 $i = $rcomma_index->[$j];
19929 ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
19931 ( $types_to_go[$i_prev_plus] eq 'b' )
19934 push @i_term_begin, $i_term_begin;
19935 push @i_term_end, $i_term_end;
19936 push @i_term_comma, $i;
19938 # note: currently adding 2 to all lengths (for comma and space)
19940 2 + token_sequence_length( $i_term_begin, $i_term_end );
19941 push @item_lengths, $length;
19944 $first_term_length = $length;
19948 if ( $length > $max_length[$is_odd] ) {
19949 $max_length[$is_odd] = $length;
19954 # now we have to make a distinction between the comma count and item
19955 # count, because the item count will be one greater than the comma
19956 # count if the last item is not terminated with a comma
19958 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
19959 ? $i_last_comma + 1
19962 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
19963 ? $i_closing_paren - 2
19964 : $i_closing_paren - 1;
19965 my $i_effective_last_comma = $i_last_comma;
19967 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
19969 if ( $last_item_length > 0 ) {
19971 # add 2 to length because other lengths include a comma and a blank
19972 $last_item_length += 2;
19973 push @item_lengths, $last_item_length;
19974 push @i_term_begin, $i_b + 1;
19975 push @i_term_end, $i_e;
19976 push @i_term_comma, undef;
19978 my $i_odd = $item_count % 2;
19980 if ( $last_item_length > $max_length[$i_odd] ) {
19981 $max_length[$i_odd] = $last_item_length;
19985 $i_effective_last_comma = $i_e + 1;
19987 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
19988 $identifier_count++;
19992 #---------------------------------------------------------------
19993 # End of length calculations
19994 #---------------------------------------------------------------
19996 #---------------------------------------------------------------
19997 # Compound List Rule 1:
19998 # Break at (almost) every comma for a list containing a broken
19999 # sublist. This has higher priority than the Interrupted List
20001 #---------------------------------------------------------------
20002 if ($has_broken_sublist) {
20004 # Break at every comma except for a comma between two
20005 # simple, small terms. This prevents long vertical
20006 # columns of, say, just 0's.
20007 my $small_length = 10; # 2 + actual maximum length wanted
20009 # We'll insert a break in long runs of small terms to
20010 # allow alignment in uniform tables.
20011 my $skipped_count = 0;
20012 my $columns = table_columns_available($i_first_comma);
20013 my $fields = int( $columns / $small_length );
20014 if ( $rOpts_maximum_fields_per_table
20015 && $fields > $rOpts_maximum_fields_per_table )
20017 $fields = $rOpts_maximum_fields_per_table;
20019 my $max_skipped_count = $fields - 1;
20021 my $is_simple_last_term = 0;
20022 my $is_simple_next_term = 0;
20023 foreach my $j ( 0 .. $item_count ) {
20024 $is_simple_last_term = $is_simple_next_term;
20025 $is_simple_next_term = 0;
20026 if ( $j < $item_count
20027 && $i_term_end[$j] == $i_term_begin[$j]
20028 && $item_lengths[$j] <= $small_length )
20030 $is_simple_next_term = 1;
20033 if ( $is_simple_last_term
20034 && $is_simple_next_term
20035 && $skipped_count < $max_skipped_count )
20040 $skipped_count = 0;
20041 my $i_tc = $i_term_comma[ $j - 1 ];
20042 last unless defined $i_tc;
20043 $self->set_forced_breakpoint($i_tc);
20047 # always break at the last comma if this list is
20048 # interrupted; we wouldn't want to leave a terminal '{', for
20050 if ($interrupted) {
20051 $self->set_forced_breakpoint($i_true_last_comma);
20056 #my ( $a, $b, $c ) = caller();
20057 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
20058 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
20059 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
20061 #---------------------------------------------------------------
20062 # Interrupted List Rule:
20063 # A list is forced to use old breakpoints if it was interrupted
20064 # by side comments or blank lines, or requested by user.
20065 #---------------------------------------------------------------
20066 if ( $rOpts_break_at_old_comma_breakpoints
20068 || $i_opening_paren < 0 )
20070 $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
20074 #---------------------------------------------------------------
20075 # Looks like a list of items. We have to look at it and size it up.
20076 #---------------------------------------------------------------
20078 my $opening_token = $tokens_to_go[$i_opening_paren];
20079 my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
20081 #-------------------------------------------------------------------
20082 # Return if this will fit on one line
20083 #-------------------------------------------------------------------
20085 # The -bbxi=2 parameters can add an extra hidden level of indentation;
20086 # this needs a tolerance to avoid instability. Fixes b1259, 1260.
20088 if ( $break_before_container_types{$opening_token}
20089 && $container_indentation_options{$opening_token}
20090 && $container_indentation_options{$opening_token} == 2 )
20092 $tol = $rOpts_indent_columns;
20094 # use greater of -ci and -i (fix for case b1334)
20095 if ( $tol < $rOpts_continuation_indentation ) {
20096 $tol = $rOpts_continuation_indentation;
20100 my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
20102 unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
20105 #-------------------------------------------------------------------
20106 # Now we know that this block spans multiple lines; we have to set
20107 # at least one breakpoint -- real or fake -- as a signal to break
20108 # open any outer containers.
20109 #-------------------------------------------------------------------
20110 set_fake_breakpoint();
20112 # be sure we do not extend beyond the current list length
20113 if ( $i_effective_last_comma >= $max_index_to_go ) {
20114 $i_effective_last_comma = $max_index_to_go - 1;
20117 # Set a flag indicating if we need to break open to keep -lp
20118 # items aligned. This is necessary if any of the list terms
20119 # exceeds the available space after the '('.
20120 my $need_lp_break_open = $must_break_open;
20121 if ( $is_lp_formatting && !$must_break_open ) {
20122 my $columns_if_unbroken =
20123 $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
20124 - total_line_length( $i_opening_minus, $i_opening_paren );
20125 $need_lp_break_open =
20126 ( $max_length[0] > $columns_if_unbroken )
20127 || ( $max_length[1] > $columns_if_unbroken )
20128 || ( $first_term_length > $columns_if_unbroken );
20131 # Specify if the list must have an even number of fields or not.
20132 # It is generally safest to assume an even number, because the
20133 # list items might be a hash list. But if we can be sure that
20134 # it is not a hash, then we can allow an odd number for more
20136 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
20138 if ( $identifier_count >= $item_count - 1
20139 || $is_assignment{$next_nonblank_type}
20140 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
20146 # do we have a long first term which should be
20147 # left on a line by itself?
20148 my $use_separate_first_term = (
20149 $odd_or_even == 1 # only if we can use 1 field/line
20150 && $item_count > 3 # need several items
20151 && $first_term_length >
20152 2 * $max_length[0] - 2 # need long first term
20153 && $first_term_length >
20154 2 * $max_length[1] - 2 # need long first term
20157 # or do we know from the type of list that the first term should
20159 if ( !$use_separate_first_term ) {
20160 if ( $is_keyword_with_special_leading_term{$list_type} ) {
20161 $use_separate_first_term = 1;
20163 # should the container be broken open?
20164 if ( $item_count < 3 ) {
20165 if ( $i_first_comma - $i_opening_paren < 4 ) {
20166 ${$rdo_not_break_apart} = 1;
20169 elsif ($first_term_length < 20
20170 && $i_first_comma - $i_opening_paren < 4 )
20172 my $columns = table_columns_available($i_first_comma);
20173 if ( $first_term_length < $columns ) {
20174 ${$rdo_not_break_apart} = 1;
20181 if ($use_separate_first_term) {
20183 # ..set a break and update starting values
20184 $use_separate_first_term = 1;
20185 $self->set_forced_breakpoint($i_first_comma);
20186 $i_opening_paren = $i_first_comma;
20187 $i_first_comma = $rcomma_index->[1];
20189 return if $comma_count == 1;
20190 shift @item_lengths;
20191 shift @i_term_begin;
20193 shift @i_term_comma;
20196 # if not, update the metrics to include the first term
20198 if ( $first_term_length > $max_length[0] ) {
20199 $max_length[0] = $first_term_length;
20203 # Field width parameters
20204 my $pair_width = ( $max_length[0] + $max_length[1] );
20206 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
20208 # Number of free columns across the page width for laying out tables
20209 my $columns = table_columns_available($i_first_comma);
20211 # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
20212 # to break after an opening paren, then the maximum line length for the
20213 # first line could be less than the later lines. So we need to reduce
20214 # the line length. Normally, we will get a break after an opening
20215 # paren, but in some cases we might not.
20216 if ( $rOpts_variable_maximum_line_length
20217 && $tokens_to_go[$i_opening_paren] eq '('
20219 ##&& !$old_breakpoint_to_go[$i_opening_paren] ) ## in b1210 patch
20221 my $ib = $i_term_begin[0];
20222 my $type = $types_to_go[$ib];
20224 # So far, the only known instance of this problem is when
20225 # a bareword follows an opening paren with -vmll
20226 if ( $type eq 'w' ) {
20228 # If a line starts with paren+space+terms, then its max length
20229 # could be up to ci+2-i spaces less than if the term went out
20230 # on a line after the paren. So..
20231 my $tol_w = max( 0,
20232 2 + $rOpts_continuation_indentation -
20233 $rOpts_indent_columns );
20234 $columns = max( 0, $columns - $tol_w );
20236 ## Here is the original b1210 fix, but it failed on b1216-b1218
20237 ##my $columns2 = table_columns_available($i_opening_paren);
20238 ##$columns = min( $columns, $columns2 );
20242 # Estimated maximum number of fields which fit this space
20243 # This will be our first guess
20244 my $number_of_fields_max =
20245 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
20247 my $number_of_fields = $number_of_fields_max;
20249 # Find the best-looking number of fields
20250 # and make this our second guess if possible
20251 my ( $number_of_fields_best, $ri_ragged_break_list,
20252 $new_identifier_count )
20253 = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
20254 \@item_lengths, $max_width );
20256 if ( $number_of_fields_best != 0
20257 && $number_of_fields_best < $number_of_fields_max )
20259 $number_of_fields = $number_of_fields_best;
20262 # ----------------------------------------------------------------------
20263 # If we are crowded and the -lp option is being used, try to
20264 # undo some indentation
20265 # ----------------------------------------------------------------------
20269 $number_of_fields == 0
20270 || ( $number_of_fields == 1
20271 && $number_of_fields != $number_of_fields_best )
20275 my $available_spaces =
20276 $self->get_available_spaces_to_go($i_first_comma);
20277 if ( $available_spaces > 0 ) {
20279 my $spaces_wanted = $max_width - $columns; # for 1 field
20281 if ( $number_of_fields_best == 0 ) {
20282 $number_of_fields_best =
20283 get_maximum_fields_wanted( \@item_lengths );
20286 if ( $number_of_fields_best != 1 ) {
20287 my $spaces_wanted_2 =
20288 1 + $pair_width - $columns; # for 2 fields
20289 if ( $available_spaces > $spaces_wanted_2 ) {
20290 $spaces_wanted = $spaces_wanted_2;
20294 if ( $spaces_wanted > 0 ) {
20295 my $deleted_spaces =
20296 $self->reduce_lp_indentation( $i_first_comma,
20300 if ( $deleted_spaces > 0 ) {
20301 $columns = table_columns_available($i_first_comma);
20302 $number_of_fields_max =
20303 maximum_number_of_fields( $columns, $odd_or_even,
20304 $max_width, $pair_width );
20305 $number_of_fields = $number_of_fields_max;
20307 if ( $number_of_fields_best == 1
20308 && $number_of_fields >= 1 )
20310 $number_of_fields = $number_of_fields_best;
20317 # try for one column if two won't work
20318 if ( $number_of_fields <= 0 ) {
20319 $number_of_fields = int( $columns / $max_width );
20322 # The user can place an upper bound on the number of fields,
20323 # which can be useful for doing maintenance on tables
20324 if ( $rOpts_maximum_fields_per_table
20325 && $number_of_fields > $rOpts_maximum_fields_per_table )
20327 $number_of_fields = $rOpts_maximum_fields_per_table;
20330 # How many columns (characters) and lines would this container take
20331 # if no additional whitespace were added?
20332 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
20333 $i_effective_last_comma + 1 );
20334 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
20335 my $packed_lines = 1 + int( $packed_columns / $columns );
20337 # are we an item contained in an outer list?
20338 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
20340 if ( $number_of_fields <= 0 ) {
20342 # #---------------------------------------------------------------
20343 # # We're in trouble. We can't find a single field width that works.
20344 # # There is no simple answer here; we may have a single long list
20346 # #---------------------------------------------------------------
20348 # In many cases, it may be best to not force a break if there is just one
20349 # comma, because the standard continuation break logic will do a better
20352 # In the common case that all but one of the terms can fit
20353 # on a single line, it may look better not to break open the
20354 # containing parens. Consider, for example
20358 # sort { $color_value{$::a} <=> $color_value{$::b}; }
20361 # which will look like this with the container broken:
20365 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
20368 # Here is an example of this rule for a long last term:
20370 # log_message( 0, 256, 128,
20371 # "Number of routes in adj-RIB-in to be considered: $peercount" );
20373 # And here is an example with a long first term:
20376 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
20377 # $r, $pu, $ps, $cu, $cs, $tt
20379 # if $style eq 'all';
20381 $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
20383 my $long_last_term =
20384 $self->excess_line_length( 0, $i_last_comma ) <= 0;
20385 my $long_first_term =
20386 $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
20389 # break at every comma ...
20392 # if requested by user or is best looking
20393 $number_of_fields_best == 1
20395 # or if this is a sublist of a larger list
20396 || $in_hierarchical_list
20398 # or if multiple commas and we don't have a long first or last
20400 || ( $comma_count > 1
20401 && !( $long_last_term || $long_first_term ) )
20404 foreach ( 0 .. $comma_count - 1 ) {
20405 $self->set_forced_breakpoint( $rcomma_index->[$_] );
20408 elsif ($long_last_term) {
20410 $self->set_forced_breakpoint($i_last_comma);
20411 ${$rdo_not_break_apart} = 1 unless $must_break_open;
20413 elsif ($long_first_term) {
20415 $self->set_forced_breakpoint($i_first_comma);
20419 # let breaks be defined by default bond strength logic
20424 # --------------------------------------------------------
20425 # We have a tentative field count that seems to work.
20426 # How many lines will this require?
20427 # --------------------------------------------------------
20428 my $formatted_lines = $item_count / ($number_of_fields);
20429 if ( $formatted_lines != int $formatted_lines ) {
20430 $formatted_lines = 1 + int $formatted_lines;
20433 # So far we've been trying to fill out to the right margin. But
20434 # compact tables are easier to read, so let's see if we can use fewer
20435 # fields without increasing the number of lines.
20436 $number_of_fields =
20437 compactify_table( $item_count, $number_of_fields, $formatted_lines,
20440 # How many spaces across the page will we fill?
20441 my $columns_per_line =
20442 ( int $number_of_fields / 2 ) * $pair_width +
20443 ( $number_of_fields % 2 ) * $max_width;
20445 my $formatted_columns;
20447 if ( $number_of_fields > 1 ) {
20448 $formatted_columns =
20449 ( $pair_width * ( int( $item_count / 2 ) ) +
20450 ( $item_count % 2 ) * $max_width );
20453 $formatted_columns = $max_width * $item_count;
20455 if ( $formatted_columns < $packed_columns ) {
20456 $formatted_columns = $packed_columns;
20459 my $unused_columns = $formatted_columns - $packed_columns;
20461 # set some empirical parameters to help decide if we should try to
20462 # align; high sparsity does not look good, especially with few lines
20463 my $sparsity = ($unused_columns) / ($formatted_columns);
20464 my $max_allowed_sparsity =
20465 ( $item_count < 3 ) ? 0.1
20466 : ( $packed_lines == 1 ) ? 0.15
20467 : ( $packed_lines == 2 ) ? 0.4
20470 my $two_line_word_wrap_ok;
20471 if ( $opening_token eq '(' ) {
20473 # default is to allow wrapping of short paren lists
20474 $two_line_word_wrap_ok = 1;
20476 # but turn off word wrap where requested
20477 if ($rOpts_break_open_compact_parens) {
20479 # This parameter is a one-character flag, as follows:
20480 # '0' matches no parens -> break open NOT OK -> word wrap OK
20481 # '1' matches all parens -> break open OK -> word wrap NOT OK
20482 # Other values are the same as used by the weld-exclusion-list
20483 my $flag = $rOpts_break_open_compact_parens;
20487 $two_line_word_wrap_ok = 0;
20489 elsif ( $flag eq '0' ) {
20490 $two_line_word_wrap_ok = 1;
20493 my $KK = $K_to_go[$i_opening_paren];
20494 $two_line_word_wrap_ok =
20495 !$self->match_paren_flag( $KK, $flag );
20500 # Begin check for shortcut methods, which avoid treating a list
20501 # as a table for relatively small parenthesized lists. These
20502 # are usually easier to read if not formatted as tables.
20504 $packed_lines <= 2 # probably can fit in 2 lines
20505 && $item_count < 9 # doesn't have too many items
20506 && $opening_is_in_block # not a sub-container
20507 && $two_line_word_wrap_ok # ok to wrap this paren list
20508 ##&& $opening_token eq '(' # is paren list
20512 # Shortcut method 1: for -lp and just one comma:
20513 # This is a no-brainer, just break at the comma.
20515 $is_lp_formatting # -lp
20516 && $item_count == 2 # two items, one comma
20517 && !$must_break_open
20520 my $i_break = $rcomma_index->[0];
20521 $self->set_forced_breakpoint($i_break);
20522 ${$rdo_not_break_apart} = 1;
20527 # method 2 is for most small ragged lists which might look
20528 # best if not displayed as a table.
20530 ( $number_of_fields == 2 && $item_count == 3 )
20532 $new_identifier_count > 0 # isn't all quotes
20533 && $sparsity > 0.15
20534 ) # would be fairly spaced gaps if aligned
20538 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20539 $ri_ragged_break_list );
20540 ++$break_count if ($use_separate_first_term);
20542 # NOTE: we should really use the true break count here,
20543 # which can be greater if there are large terms and
20544 # little space, but usually this will work well enough.
20545 unless ($must_break_open) {
20547 if ( $break_count <= 1 ) {
20548 ${$rdo_not_break_apart} = 1;
20550 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20551 ${$rdo_not_break_apart} = 1;
20557 } ## end shortcut methods
20560 DEBUG_SPARSE && do {
20562 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
20566 #---------------------------------------------------------------
20567 # Compound List Rule 2:
20568 # If this list is too long for one line, and it is an item of a
20569 # larger list, then we must format it, regardless of sparsity
20570 # (ian.t). One reason that we have to do this is to trigger
20571 # Compound List Rule 1, above, which causes breaks at all commas of
20572 # all outer lists. In this way, the structure will be properly
20574 #---------------------------------------------------------------
20576 # Decide if this list is too long for one line unless broken
20577 my $total_columns = table_columns_available($i_opening_paren);
20578 my $too_long = $packed_columns > $total_columns;
20580 # For a paren list, include the length of the token just before the
20581 # '(' because this is likely a sub call, and we would have to
20582 # include the sub name on the same line as the list. This is still
20583 # imprecise, but not too bad. (steve.t)
20584 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
20586 $too_long = $self->excess_line_length( $i_opening_minus,
20587 $i_effective_last_comma + 1 ) > 0;
20590 # FIXME: For an item after a '=>', try to include the length of the
20591 # thing before the '=>'. This is crude and should be improved by
20592 # actually looking back token by token.
20593 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
20594 my $i_opening_minus_test = $i_opening_paren - 4;
20595 if ( $i_opening_minus >= 0 ) {
20596 $too_long = $self->excess_line_length( $i_opening_minus_test,
20597 $i_effective_last_comma + 1 ) > 0;
20601 # Always break lists contained in '[' and '{' if too long for 1 line,
20602 # and always break lists which are too long and part of a more complex
20604 my $must_break_open_container = $must_break_open
20606 && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
20608 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
20610 #---------------------------------------------------------------
20611 # The main decision:
20612 # Now decide if we will align the data into aligned columns. Do not
20613 # attempt to align columns if this is a tiny table or it would be
20614 # too spaced. It seems that the more packed lines we have, the
20615 # sparser the list that can be allowed and still look ok.
20616 #---------------------------------------------------------------
20618 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
20619 || ( $formatted_lines < 2 )
20620 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
20624 #---------------------------------------------------------------
20625 # too sparse: would look ugly if aligned in a table;
20626 #---------------------------------------------------------------
20628 # use old breakpoints if this is a 'big' list
20629 if ( $packed_lines > 2 && $item_count > 10 ) {
20630 write_logfile_entry("List sparse: using old breakpoints\n");
20631 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
20634 # let the continuation logic handle it if 2 lines
20637 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20638 $ri_ragged_break_list );
20639 ++$break_count if ($use_separate_first_term);
20641 unless ($must_break_open_container) {
20642 if ( $break_count <= 1 ) {
20643 ${$rdo_not_break_apart} = 1;
20645 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20646 ${$rdo_not_break_apart} = 1;
20653 #---------------------------------------------------------------
20654 # go ahead and format as a table
20655 #---------------------------------------------------------------
20656 write_logfile_entry(
20657 "List: auto formatting with $number_of_fields fields/row\n");
20659 my $j_first_break =
20660 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
20662 my $j = $j_first_break;
20663 while ( $j < $comma_count ) {
20664 my $i_comma = $rcomma_index->[$j];
20665 $self->set_forced_breakpoint($i_comma);
20666 $j += $number_of_fields;
20669 } ## end sub set_comma_breakpoints_do
20670 } ## end closure set_comma_breakpoints_do
20672 sub study_list_complexity {
20674 # Look for complex tables which should be formatted with one term per line.
20675 # Returns the following:
20677 # \@i_ragged_break_list = list of good breakpoints to avoid lines
20678 # which are hard to read
20679 # $number_of_fields_best = suggested number of fields based on
20680 # complexity; = 0 if any number may be used.
20682 my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
20683 my $item_count = @{$ri_term_begin};
20684 my $complex_item_count = 0;
20685 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
20686 my $i_max = @{$ritem_lengths} - 1;
20687 ##my @item_complexity;
20689 my $i_last_last_break = -3;
20690 my $i_last_break = -2;
20691 my @i_ragged_break_list;
20693 my $definitely_complex = 30;
20694 my $definitely_simple = 12;
20695 my $quote_count = 0;
20697 for my $i ( 0 .. $i_max ) {
20698 my $ib = $ri_term_begin->[$i];
20699 my $ie = $ri_term_end->[$i];
20701 # define complexity: start with the actual term length
20702 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
20704 ##TBD: join types here and check for variations
20705 ##my $str=join "", @tokens_to_go[$ib..$ie];
20708 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
20712 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
20716 if ( $ib eq $ie ) {
20717 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
20718 $complex_item_count++;
20719 $weighted_length *= 2;
20725 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
20726 $complex_item_count++;
20727 $weighted_length *= 2;
20729 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
20730 $weighted_length += 4;
20734 # add weight for extra tokens.
20735 $weighted_length += 2 * ( $ie - $ib );
20737 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
20738 ## print "# COMPLEXITY:$weighted_length $BUB\n";
20740 ##push @item_complexity, $weighted_length;
20742 # now mark a ragged break after this item it if it is 'long and
20744 if ( $weighted_length >= $definitely_complex ) {
20746 # if we broke after the previous term
20747 # then break before it too
20748 if ( $i_last_break == $i - 1
20750 && $i_last_last_break != $i - 2 )
20753 ## FIXME: don't strand a small term
20754 pop @i_ragged_break_list;
20755 push @i_ragged_break_list, $i - 2;
20756 push @i_ragged_break_list, $i - 1;
20759 push @i_ragged_break_list, $i;
20760 $i_last_last_break = $i_last_break;
20761 $i_last_break = $i;
20764 # don't break before a small last term -- it will
20765 # not look good on a line by itself.
20766 elsif ($i == $i_max
20767 && $i_last_break == $i - 1
20768 && $weighted_length <= $definitely_simple )
20770 pop @i_ragged_break_list;
20774 my $identifier_count = $i_max + 1 - $quote_count;
20776 # Need more tuning here..
20777 if ( $max_width > 12
20778 && $complex_item_count > $item_count / 2
20779 && $number_of_fields_best != 2 )
20781 $number_of_fields_best = 1;
20784 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
20785 } ## end sub study_list_complexity
20787 sub get_maximum_fields_wanted {
20789 # Not all tables look good with more than one field of items.
20790 # This routine looks at a table and decides if it should be
20791 # formatted with just one field or not.
20792 # This coding is still under development.
20793 my ($ritem_lengths) = @_;
20795 my $number_of_fields_best = 0;
20797 # For just a few items, we tentatively assume just 1 field.
20798 my $item_count = @{$ritem_lengths};
20799 if ( $item_count <= 5 ) {
20800 $number_of_fields_best = 1;
20803 # For larger tables, look at it both ways and see what looks best
20807 my @max_length = ( 0, 0 );
20808 my @last_length_2 = ( undef, undef );
20809 my @first_length_2 = ( undef, undef );
20810 my $last_length = undef;
20811 my $total_variation_1 = 0;
20812 my $total_variation_2 = 0;
20813 my @total_variation_2 = ( 0, 0 );
20815 foreach my $j ( 0 .. $item_count - 1 ) {
20817 $is_odd = 1 - $is_odd;
20818 my $length = $ritem_lengths->[$j];
20819 if ( $length > $max_length[$is_odd] ) {
20820 $max_length[$is_odd] = $length;
20823 if ( defined($last_length) ) {
20824 my $dl = abs( $length - $last_length );
20825 $total_variation_1 += $dl;
20827 $last_length = $length;
20829 my $ll = $last_length_2[$is_odd];
20830 if ( defined($ll) ) {
20831 my $dl = abs( $length - $ll );
20832 $total_variation_2[$is_odd] += $dl;
20835 $first_length_2[$is_odd] = $length;
20837 $last_length_2[$is_odd] = $length;
20839 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
20841 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
20842 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
20843 $number_of_fields_best = 1;
20846 return ($number_of_fields_best);
20847 } ## end sub get_maximum_fields_wanted
20849 sub table_columns_available {
20850 my $i_first_comma = shift;
20852 $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
20853 leading_spaces_to_go($i_first_comma);
20855 # Patch: the vertical formatter does not line up lines whose lengths
20856 # exactly equal the available line length because of allowances
20857 # that must be made for side comments. Therefore, the number of
20858 # available columns is reduced by 1 character.
20861 } ## end sub table_columns_available
20863 sub maximum_number_of_fields {
20865 # how many fields will fit in the available space?
20866 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
20867 my $max_pairs = int( $columns / $pair_width );
20868 my $number_of_fields = $max_pairs * 2;
20869 if ( $odd_or_even == 1
20870 && $max_pairs * $pair_width + $max_width <= $columns )
20872 $number_of_fields++;
20874 return $number_of_fields;
20875 } ## end sub maximum_number_of_fields
20877 sub compactify_table {
20879 # given a table with a certain number of fields and a certain number
20880 # of lines, see if reducing the number of fields will make it look
20882 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
20883 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
20885 my $min_fields = $number_of_fields;
20887 while ($min_fields >= $odd_or_even
20888 && $min_fields * $formatted_lines >= $item_count )
20890 $number_of_fields = $min_fields;
20891 $min_fields -= $odd_or_even;
20894 return $number_of_fields;
20895 } ## end sub compactify_table
20897 sub set_ragged_breakpoints {
20899 # Set breakpoints in a list that cannot be formatted nicely as a
20901 my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
20903 my $break_count = 0;
20904 foreach ( @{$ri_ragged_break_list} ) {
20905 my $j = $ri_term_comma->[$_];
20907 $self->set_forced_breakpoint($j);
20911 return $break_count;
20912 } ## end sub set_ragged_breakpoints
20914 sub copy_old_breakpoints {
20915 my ( $self, $i_first_comma, $i_last_comma ) = @_;
20916 for my $i ( $i_first_comma .. $i_last_comma ) {
20917 if ( $old_breakpoint_to_go[$i] ) {
20918 $self->set_forced_breakpoint($i);
20925 my ( $self, $i, $j ) = @_;
20926 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
20929 my ( $a, $b, $c ) = caller();
20931 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
20934 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
20937 # shouldn't happen; non-critical error
20940 my ( $a, $b, $c ) = caller();
20942 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
20946 } ## end sub set_nobreaks
20948 ###############################################
20949 # CODE SECTION 12: Code for setting indentation
20950 ###############################################
20952 sub token_sequence_length {
20954 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
20955 my ( $ibeg, $iend ) = @_;
20957 # fix possible negative starting index
20958 if ( $ibeg < 0 ) { $ibeg = 0 }
20960 # returns 0 if index range is empty (some subs assume this)
20961 if ( $ibeg > $iend ) {
20965 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
20966 } ## end sub token_sequence_length
20968 sub total_line_length {
20970 # return length of a line of tokens ($ibeg .. $iend)
20971 my ( $ibeg, $iend ) = @_;
20973 # Start with the leading spaces on this line ...
20974 my $length = $leading_spaces_to_go[$ibeg];
20975 if ( ref($length) ) { $length = $length->get_spaces() }
20977 # ... then add the net token length
20979 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
20982 } ## end sub total_line_length
20984 sub excess_line_length {
20986 # return number of characters by which a line of tokens ($ibeg..$iend)
20987 # exceeds the allowable line length.
20988 # NOTE: profiling shows that efficiency of this routine is essential.
20990 my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
20992 # Start with the leading spaces on this line ...
20993 my $excess = $leading_spaces_to_go[$ibeg];
20994 if ( ref($excess) ) { $excess = $excess->get_spaces() }
20996 # ... then add the net token length, minus the maximum length
20998 $summed_lengths_to_go[ $iend + 1 ] -
20999 $summed_lengths_to_go[$ibeg] -
21000 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
21002 # ... and include right weld lengths unless requested not to
21003 if ( $total_weld_count
21004 && $type_sequence_to_go[$iend]
21005 && !$ignore_right_weld )
21007 my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
21008 $excess += $wr if defined($wr);
21012 } ## end sub excess_line_length
21016 # return the number of leading spaces associated with an indentation
21017 # variable $indentation is either a constant number of spaces or an object
21018 # with a get_spaces method.
21019 my $indentation = shift;
21020 return ref($indentation) ? $indentation->get_spaces() : $indentation;
21023 sub get_recoverable_spaces {
21025 # return the number of spaces (+ means shift right, - means shift left)
21026 # that we would like to shift a group of lines with the same indentation
21027 # to get them to line up with their opening parens
21028 my $indentation = shift;
21029 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
21032 sub get_available_spaces_to_go {
21034 my ( $self, $ii ) = @_;
21035 my $item = $leading_spaces_to_go[$ii];
21037 # return the number of available leading spaces associated with an
21038 # indentation variable. $indentation is either a constant number of
21039 # spaces or an object with a get_available_spaces method.
21040 return ref($item) ? $item->get_available_spaces() : 0;
21041 } ## end sub get_available_spaces_to_go
21043 { ## begin closure set_lp_indentation
21045 use constant DEBUG_LP => 0;
21047 # Stack of -lp index objects which survives between batches.
21051 # The predicted position of the next opening container which may start
21052 # an -lp indentation level. This survives between batches.
21053 my $lp_position_predictor;
21055 # A level at which the lp format becomes too highly stressed to continue
21056 my $lp_cutoff_level;
21060 # Index names for the -lp stack variables.
21061 # Do not combine with other BEGIN blocks (c101).
21065 _lp_ci_level_ => $i++,
21066 _lp_level_ => $i++,
21067 _lp_object_ => $i++,
21068 _lp_container_seqno_ => $i++,
21069 _lp_space_count_ => $i++,
21073 sub initialize_lp_vars {
21075 # initialize gnu variables for a new file;
21076 # must be called once at the start of a new file.
21078 $lp_position_predictor = 0;
21080 $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 );
21082 # we can turn off -lp if all levels will be at or above the cutoff
21083 if ( $lp_cutoff_level <= 1 ) {
21084 $rOpts_line_up_parentheses = 0;
21085 $rOpts_extended_line_up_parentheses = 0;
21090 # initialize the leading whitespace stack to negative levels
21091 # so that we can never run off the end of the stack
21092 $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
21093 $rLP->[$max_lp_stack]->[_lp_level_] = -1;
21094 $rLP->[$max_lp_stack]->[_lp_object_] = undef;
21095 $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
21096 $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
21099 } ## end sub initialize_lp_vars
21101 # hashes for efficient testing
21107 my @q = qw< } ) ] >;
21108 @hash_test1{@q} = (1) x scalar(@q);
21111 @hash_test2{@q} = (1) x scalar(@q);
21112 @q = qw( . || && );
21113 @hash_test3{@q} = (1) x scalar(@q);
21116 sub set_lp_indentation {
21118 #------------------------------------------------------------------
21119 # Define the leading whitespace for all tokens in the current batch
21120 # when the -lp formatting is selected.
21121 #------------------------------------------------------------------
21125 return unless ($rOpts_line_up_parentheses);
21126 return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
21128 # List of -lp indentation objects created in this batch
21129 my $rlp_object_list = [];
21130 my $max_lp_object_list = UNDEFINED_INDEX;
21132 my %last_lp_equals;
21133 my %lp_comma_count;
21134 my %lp_arrow_count;
21135 my $ii_begin_line = 0;
21137 my $rLL = $self->[_rLL_];
21138 my $Klimit = $self->[_Klimit_];
21139 my $rbreak_container = $self->[_rbreak_container_];
21140 my $rshort_nested = $self->[_rshort_nested_];
21141 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
21142 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
21143 my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
21144 my $K_closing_container = $self->[_K_closing_container_];
21145 my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_];
21146 my $radjusted_levels = $self->[_radjusted_levels_];
21147 my $rbreak_before_container_by_seqno =
21148 $self->[_rbreak_before_container_by_seqno_];
21149 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
21151 my $nws = @{$radjusted_levels};
21154 # The 'starting_in_quote' flag means that the first token is the first
21155 # token of a line and it is also the continuation of some kind of
21156 # multi-line quote or pattern. It must have no added leading
21157 # whitespace, so we can skip it.
21158 if ($starting_in_quote) {
21162 my $K_last_nonblank;
21163 my $Kpnb = $K_to_go[0] - 1;
21164 if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
21167 if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
21168 $K_last_nonblank = $Kpnb;
21171 my $last_nonblank_token = EMPTY_STRING;
21172 my $last_nonblank_type = EMPTY_STRING;
21173 my $last_last_nonblank_type = EMPTY_STRING;
21175 if ( defined($K_last_nonblank) ) {
21176 $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
21177 $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
21180 my ( $space_count, $current_level, $current_ci_level, $in_lp_mode );
21181 my $stack_changed = 1;
21183 #-----------------------------------
21184 # Loop over all tokens in this batch
21185 #-----------------------------------
21186 foreach my $ii ( $imin .. $max_index_to_go ) {
21188 my $KK = $K_to_go[$ii];
21189 my $type = $types_to_go[$ii];
21190 my $token = $tokens_to_go[$ii];
21191 my $level = $levels_to_go[$ii];
21192 my $ci_level = $ci_levels_to_go[$ii];
21193 my $total_depth = $nesting_depth_to_go[$ii];
21194 my $standard_spaces = $leading_spaces_to_go[$ii];
21196 #--------------------------------------------------
21197 # Adjust levels if necessary to recycle whitespace:
21198 #--------------------------------------------------
21199 if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
21201 $level = $radjusted_levels->[$KK];
21202 if ( $level < 0 ) { $level = 0 } # note: this should not happen
21205 # get the top state from the stack if it has changed
21206 if ($stack_changed) {
21207 my $rLP_top = $rLP->[$max_lp_stack];
21208 my $lp_object = $rLP_top->[_lp_object_];
21210 ( $space_count, $current_level, $current_ci_level ) =
21211 @{ $lp_object->get_spaces_level_ci() };
21214 $current_ci_level = $rLP_top->[_lp_ci_level_];
21215 $current_level = $rLP_top->[_lp_level_];
21216 $space_count = $rLP_top->[_lp_space_count_];
21218 $stack_changed = 0;
21221 #------------------------------
21222 # update the position predictor
21223 #------------------------------
21224 if ( $type eq '{' || $type eq '(' ) {
21226 $lp_comma_count{ $total_depth + 1 } = 0;
21227 $lp_arrow_count{ $total_depth + 1 } = 0;
21229 # If we come to an opening token after an '=' token of some
21230 # type, see if it would be helpful to 'break' after the '=' to
21232 my $last_equals = $last_lp_equals{$total_depth};
21234 # Skip an empty set of parens, such as after channel():
21235 # my $exchange = $self->_channel()->exchange(
21236 # This fixes issues b1318 b1322 b1323 b1328
21237 # TODO: maybe also skip parens with just one token?
21238 my $is_empty_container;
21239 if ( $last_equals && $ii < $max_index_to_go ) {
21240 my $seqno = $type_sequence_to_go[$ii];
21241 my $inext_nb = $ii + 1;
21243 if ( $types_to_go[$inext_nb] eq 'b' );
21244 my $seqno_nb = $type_sequence_to_go[$inext_nb];
21245 $is_empty_container =
21246 $seqno && $seqno_nb && $seqno_nb == $seqno;
21250 && $last_equals > $ii_begin_line
21251 && !$is_empty_container )
21254 my $seqno = $type_sequence_to_go[$ii];
21256 # find the position if we break at the '='
21257 my $i_test = $last_equals;
21259 # Fix for issue b1229, check for break before
21260 if ( $want_break_before{ $types_to_go[$i_test] } ) {
21261 if ( $i_test > 0 ) { $i_test-- }
21263 elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
21265 my $test_position = total_line_length( $i_test, $ii );
21267 $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
21269 #------------------------------------------------------
21270 # Break if structure will reach the maximum line length
21271 #------------------------------------------------------
21273 # Historically, -lp just used one-half line length here
21274 my $len_increase = $rOpts_maximum_line_length / 2;
21276 # For -xlp, we can also use the pre-computed lengths
21277 my $min_len = $rcollapsed_length_by_seqno->{$seqno};
21278 if ( $min_len && $min_len > $len_increase ) {
21279 $len_increase = $min_len;
21284 # if we might exceed the maximum line length
21285 $lp_position_predictor + $len_increase > $mll
21287 # if a -bbx flag WANTS a break before this opening token
21289 && $rbreak_before_container_by_seqno->{$seqno} )
21291 # or we are beyond the 1/4 point and there was an old
21292 # break at an assignment (not '=>') [fix for b1035]
21294 $lp_position_predictor >
21295 $mll - $rOpts_maximum_line_length * 3 / 4
21296 && $types_to_go[$last_equals] ne '=>'
21298 $old_breakpoint_to_go[$last_equals]
21299 || ( $last_equals > 0
21300 && $old_breakpoint_to_go[ $last_equals - 1 ]
21302 || ( $last_equals > 1
21303 && $types_to_go[ $last_equals - 1 ] eq 'b'
21304 && $old_breakpoint_to_go[ $last_equals - 2 ]
21311 # then make the switch -- note that we do not set a
21312 # real breakpoint here because we may not really need
21313 # one; sub break_lists will do that if necessary.
21315 my $Kc = $K_closing_container->{$seqno};
21318 # For -lp, only if the closing token is in this
21319 # batch (c117). Otherwise it cannot be done by sub
21321 defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
21323 # For -xlp, we only need one nonblank token after
21324 # the opening token.
21325 || $rOpts_extended_line_up_parentheses
21328 $ii_begin_line = $i_test + 1;
21329 $lp_position_predictor = $test_position;
21331 #--------------------------------------------------
21332 # Fix for an opening container terminating a batch:
21333 #--------------------------------------------------
21334 # To get alignment of a -lp container with its
21335 # contents, we have to put a break after $i_test.
21336 # For $ii<$max_index_to_go, this will be done by
21337 # sub break_lists based on the indentation object.
21338 # But for $ii=$max_index_to_go, the indentation
21339 # object for this seqno will not be created until
21340 # the next batch, so we have to set a break at
21341 # $i_test right now in order to get one.
21342 if ( $ii == $max_index_to_go
21343 && !$block_type_to_go[$ii]
21346 && !$ris_excluded_lp_container->{$seqno} )
21348 $self->set_forced_lp_break( $ii_begin_line,
21354 } ## end update position predictor
21356 #------------------------
21357 # Handle decreasing depth
21358 #------------------------
21359 # Note that one token may have both decreasing and then increasing
21360 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
21361 # in this example we would first go back to (1,0) then up to (2,0)
21362 # in a single call.
21363 if ( $level < $current_level || $ci_level < $current_ci_level ) {
21365 # loop to find the first entry at or completely below this level
21367 if ($max_lp_stack) {
21369 # save index of token which closes this level
21370 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21372 $rLP->[$max_lp_stack]->[_lp_object_];
21374 $lp_object->set_closed($ii);
21376 my $comma_count = 0;
21377 my $arrow_count = 0;
21378 if ( $type eq '}' || $type eq ')' ) {
21379 $comma_count = $lp_comma_count{$total_depth};
21380 $arrow_count = $lp_arrow_count{$total_depth};
21381 $comma_count = 0 unless $comma_count;
21382 $arrow_count = 0 unless $arrow_count;
21385 $lp_object->set_comma_count($comma_count);
21386 $lp_object->set_arrow_count($arrow_count);
21388 # Undo any extra indentation if we saw no commas
21389 my $available_spaces =
21390 $lp_object->get_available_spaces();
21391 my $K_start = $lp_object->get_K_begin_line();
21393 if ( $available_spaces > 0
21394 && $K_start >= $K_to_go[0]
21395 && ( $comma_count <= 0 || $arrow_count > 0 ) )
21398 my $i = $lp_object->get_lp_item_index();
21400 # Safety check for a valid stack index. It
21401 # should be ok because we just checked that the
21402 # index K of the token associated with this
21403 # indentation is in this batch.
21404 if ( $i < 0 || $i > $max_lp_object_list ) {
21406 my $lno = $rLL->[$KK]->[_LINE_INDEX_];
21408 Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
21413 if ( $arrow_count == 0 ) {
21414 $rlp_object_list->[$i]
21415 ->permanently_decrease_available_spaces
21416 ($available_spaces);
21419 $rlp_object_list->[$i]
21420 ->tentatively_decrease_available_spaces
21421 ($available_spaces);
21424 my $j ( $i + 1 .. $max_lp_object_list )
21426 $rlp_object_list->[$j]
21427 ->decrease_SPACES($available_spaces);
21433 # go down one level
21436 my $rLP_top = $rLP->[$max_lp_stack];
21437 my $ci_lev = $rLP_top->[_lp_ci_level_];
21438 my $lev = $rLP_top->[_lp_level_];
21439 my $spaces = $rLP_top->[_lp_space_count_];
21440 if ( $rLP_top->[_lp_object_] ) {
21441 my $lp_obj = $rLP_top->[_lp_object_];
21442 ( $spaces, $lev, $ci_lev ) =
21443 @{ $lp_obj->get_spaces_level_ci() };
21446 # stop when we reach a level at or below the current
21448 if ( $lev <= $level && $ci_lev <= $ci_level ) {
21449 $space_count = $spaces;
21450 $current_level = $lev;
21451 $current_ci_level = $ci_lev;
21456 # reached bottom of stack .. should never happen because
21457 # only negative levels can get here, and $level was forced
21458 # to be positive above.
21461 # non-fatal, keep going except in DEVEL_MODE
21463 ##program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp
21465 program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
21471 } ## end decreasing depth
21473 #------------------------
21474 # handle increasing depth
21475 #------------------------
21476 if ( $level > $current_level || $ci_level > $current_ci_level ) {
21478 $stack_changed = 1;
21480 # Compute the standard incremental whitespace. This will be
21481 # the minimum incremental whitespace that will be used. This
21482 # choice results in a smooth transition between the gnu-style
21483 # and the standard style.
21484 my $standard_increment =
21485 ( $level - $current_level ) *
21486 $rOpts_indent_columns +
21487 ( $ci_level - $current_ci_level ) *
21488 $rOpts_continuation_indentation;
21490 # Now we have to define how much extra incremental space
21491 # ("$available_space") we want. This extra space will be
21492 # reduced as necessary when long lines are encountered or when
21493 # it becomes clear that we do not have a good list.
21494 my $available_spaces = 0;
21495 my $align_seqno = 0;
21497 my $last_nonblank_seqno;
21498 my $last_nonblank_block_type;
21499 if ( defined($K_last_nonblank) ) {
21500 $last_nonblank_seqno =
21501 $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
21502 $last_nonblank_block_type =
21503 $last_nonblank_seqno
21504 ? $rblock_type_of_seqno->{$last_nonblank_seqno}
21508 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
21510 #-----------------------------------------------
21511 # Initialize indentation spaces on empty stack..
21512 #-----------------------------------------------
21513 if ( $max_lp_stack == 0 ) {
21514 $space_count = $level * $rOpts_indent_columns;
21517 #----------------------------------------
21518 # Add the standard space increment if ...
21519 #----------------------------------------
21522 # if this is a BLOCK, add the standard increment
21523 $last_nonblank_block_type
21525 # or if this is not a sequenced item
21526 || !$last_nonblank_seqno
21528 # or this container is excluded by user rules
21529 # or contains here-docs or multiline qw text
21530 || defined($last_nonblank_seqno)
21531 && $ris_excluded_lp_container->{$last_nonblank_seqno}
21533 # or if last nonblank token was not structural indentation
21534 || $last_nonblank_type ne '{'
21536 # and do not start -lp under stress .. fixes b1244, b1255
21537 || !$in_lp_mode && $level >= $lp_cutoff_level
21542 # If we have entered lp mode, use the top lp object to get
21543 # the current indentation spaces because it may have
21544 # changed. Fixes b1285, b1286.
21546 $space_count = $in_lp_mode->get_spaces();
21548 $space_count += $standard_increment;
21551 #---------------------------------------------------------------
21552 # -lp mode: try to use space to the first non-blank level change
21553 #---------------------------------------------------------------
21556 # see how much space we have available
21557 my $test_space_count = $lp_position_predictor;
21560 $rcollapsed_length_by_seqno->{$last_nonblank_seqno};
21561 my $next_opening_too_far;
21563 if ( defined($min_len) ) {
21565 $test_space_count +
21567 $maximum_line_length_at_level[$level];
21568 if ( $excess > 0 ) {
21569 $test_space_count -= $excess;
21571 # will the next opening token be a long way out?
21572 $next_opening_too_far =
21573 $lp_position_predictor + $excess >
21574 $maximum_line_length_at_level[$level];
21578 my $rLP_top = $rLP->[$max_lp_stack];
21579 my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
21580 if ( $rLP_top->[_lp_object_] ) {
21581 $min_gnu_indentation =
21582 $rLP_top->[_lp_object_]->get_spaces();
21584 $available_spaces =
21585 $test_space_count - $min_gnu_indentation;
21587 # Do not startup -lp indentation mode if no space ...
21588 # ... or if it puts the opening far to the right
21590 && ( $available_spaces <= 0 || $next_opening_too_far ) )
21592 $space_count += $standard_increment;
21593 $available_spaces = 0;
21598 $space_count = $test_space_count;
21601 if ( $available_spaces >= $standard_increment ) {
21602 $min_gnu_indentation += $standard_increment;
21604 elsif ( $available_spaces > 1 ) {
21605 $min_gnu_indentation += $available_spaces + 1;
21607 ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
21608 elsif ( $is_opening_token{$last_nonblank_token} ) {
21609 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
21610 $min_gnu_indentation += 2;
21613 $min_gnu_indentation += 1;
21617 $min_gnu_indentation += $standard_increment;
21619 $available_spaces = $space_count - $min_gnu_indentation;
21621 if ( $available_spaces < 0 ) {
21622 $space_count = $min_gnu_indentation;
21623 $available_spaces = 0;
21625 $align_seqno = $last_nonblank_seqno;
21629 #-------------------------------------------
21630 # update the state, but not on a blank token
21631 #-------------------------------------------
21632 if ( $type ne 'b' ) {
21634 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21635 $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
21639 #----------------------------------------
21640 # Create indentation object if in lp-mode
21641 #----------------------------------------
21646 # A negative level implies not to store the item in the
21648 my $lp_item_index = 0;
21649 if ( $level >= 0 ) {
21650 $lp_item_index = ++$max_lp_object_list;
21653 my $K_begin_line = 0;
21654 if ( $ii_begin_line >= 0
21655 && $ii_begin_line <= $max_index_to_go )
21657 $K_begin_line = $K_to_go[$ii_begin_line];
21660 # Minor Fix: when creating indentation at a side
21661 # comment we don't know what the space to the actual
21662 # next code token will be. We will allow a space for
21663 # sub correct_lp to move it in if necessary.
21665 && $max_index_to_go > 0
21668 $available_spaces += 1;
21671 $lp_object = Perl::Tidy::IndentationItem->new(
21672 spaces => $space_count,
21674 ci_level => $ci_level,
21675 available_spaces => $available_spaces,
21676 lp_item_index => $lp_item_index,
21677 align_seqno => $align_seqno,
21678 stack_depth => $max_lp_stack,
21679 K_begin_line => $K_begin_line,
21680 standard_spaces => $standard_spaces,
21684 my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
21685 print STDERR <<EOM;
21686 DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
21690 if ( $level >= 0 ) {
21691 $rlp_object_list->[$max_lp_object_list] =
21695 ##if ( $last_nonblank_token =~ /^[\{\[\(]$/
21696 if ( $is_opening_token{$last_nonblank_token}
21697 && $last_nonblank_seqno )
21699 $rlp_object_by_seqno->{$last_nonblank_seqno} =
21704 #------------------------------------
21705 # Store this indentation on the stack
21706 #------------------------------------
21707 $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
21708 $rLP->[$max_lp_stack]->[_lp_level_] = $level;
21709 $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
21710 $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
21711 $last_nonblank_seqno;
21712 $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
21714 # If the opening paren is beyond the half-line length, then
21715 # we will use the minimum (standard) indentation. This will
21716 # help avoid problems associated with running out of space
21717 # near the end of a line. As a result, in deeply nested
21718 # lists, there will be some indentations which are limited
21719 # to this minimum standard indentation. But the most deeply
21720 # nested container will still probably be able to shift its
21721 # parameters to the right for proper alignment, so in most
21722 # cases this will not be noticeable.
21723 if ( $available_spaces > 0 && $lp_object ) {
21725 $maximum_line_length_at_level[$level] -
21726 $rOpts_maximum_line_length / 2;
21727 $lp_object->tentatively_decrease_available_spaces(
21729 if ( $space_count > $halfway );
21732 } ## end increasing depth
21734 #------------------
21735 # Handle all tokens
21736 #------------------
21737 if ( $type ne 'b' ) {
21739 # Count commas and look for non-list characters. Once we see a
21740 # non-list character, we give up and don't look for any more
21742 if ( $type eq '=>' ) {
21743 $lp_arrow_count{$total_depth}++;
21745 # remember '=>' like '=' for estimating breaks (but see
21746 # above note for b1035)
21747 $last_lp_equals{$total_depth} = $ii;
21750 elsif ( $type eq ',' ) {
21751 $lp_comma_count{$total_depth}++;
21754 elsif ( $is_assignment{$type} ) {
21755 $last_lp_equals{$total_depth} = $ii;
21758 # this token might start a new line if ..
21761 # this is the first nonblank token of the line
21762 $ii == 1 && $types_to_go[0] eq 'b'
21764 # or previous character was one of these:
21766 || $hash_test2{$last_nonblank_type}
21768 # or previous character was opening and this is not closing
21769 || ( $last_nonblank_type eq '{' && $type ne '}' )
21770 || ( $last_nonblank_type eq '(' and $type ne ')' )
21772 # or this token is one of these:
21773 # /^([\.]|\|\||\&\&)$/
21774 || $hash_test3{$type}
21776 # or this is a closing structure
21777 || ( $last_nonblank_type eq '}'
21778 && $last_nonblank_token eq $last_nonblank_type )
21780 # or previous token was keyword 'return'
21782 $last_nonblank_type eq 'k'
21783 && ( $last_nonblank_token eq 'return'
21787 # or starting a new line at certain keywords is fine
21789 && $is_if_unless_and_or_last_next_redo_return{$token} )
21791 # or this is after an assignment after a closing structure
21793 $is_assignment{$last_nonblank_type}
21796 $hash_test1{$last_last_nonblank_type}
21798 # and it is significantly to the right
21799 || $lp_position_predictor > (
21800 $maximum_line_length_at_level[$level] -
21801 $rOpts_maximum_line_length / 2
21807 check_for_long_gnu_style_lines( $ii, $rlp_object_list );
21808 $ii_begin_line = $ii;
21810 # back up 1 token if we want to break before that type
21811 # otherwise, we may strand tokens like '?' or ':' on a line
21812 if ( $ii_begin_line > 0 ) {
21813 if ( $last_nonblank_type eq 'k' ) {
21815 if ( $want_break_before{$last_nonblank_token} ) {
21819 elsif ( $want_break_before{$last_nonblank_type} ) {
21823 } ## end if ( $ii == 1 && $types_to_go...)
21825 $K_last_nonblank = $KK;
21827 $last_last_nonblank_type = $last_nonblank_type;
21828 $last_nonblank_type = $type;
21829 $last_nonblank_token = $token;
21831 } ## end if ( $type ne 'b' )
21833 # remember the predicted position of this token on the output line
21834 if ( $ii > $ii_begin_line ) {
21836 ## NOTE: this is a critical loop - the following call has been
21837 ## expanded for about 2x speedup:
21838 ## $lp_position_predictor =
21839 ## total_line_length( $ii_begin_line, $ii );
21841 my $indentation = $leading_spaces_to_go[$ii_begin_line];
21842 if ( ref($indentation) ) {
21843 $indentation = $indentation->get_spaces();
21845 $lp_position_predictor =
21847 $summed_lengths_to_go[ $ii + 1 ] -
21848 $summed_lengths_to_go[$ii_begin_line];
21851 $lp_position_predictor =
21852 $space_count + $token_lengths_to_go[$ii];
21855 # Store the indentation object for this token.
21856 # This allows us to manipulate the leading whitespace
21857 # (in case we have to reduce indentation to fit a line) without
21858 # having to change any token values.
21860 #---------------------------------------------------------------
21861 # replace leading whitespace with indentation objects where used
21862 #---------------------------------------------------------------
21863 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21864 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
21865 $leading_spaces_to_go[$ii] = $lp_object;
21866 if ( $max_lp_stack > 0
21868 && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
21870 $reduced_spaces_to_go[$ii] =
21871 $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
21874 $reduced_spaces_to_go[$ii] = $lp_object;
21877 } ## end loop over all tokens in this batch
21879 undo_incomplete_lp_indentation($rlp_object_list)
21880 if ( !$rOpts_extended_line_up_parentheses );
21883 } ## end sub set_lp_indentation
21885 sub check_for_long_gnu_style_lines {
21887 # look at the current estimated maximum line length, and
21888 # remove some whitespace if it exceeds the desired maximum
21889 my ( $mx_index_to_go, $rlp_object_list ) = @_;
21891 my $max_lp_object_list = @{$rlp_object_list} - 1;
21893 # nothing can be done if no stack items defined for this line
21894 return if ( $max_lp_object_list < 0 );
21896 # see if we have exceeded the maximum desired line length
21897 # keep 2 extra free because they are needed in some cases
21898 # (result of trial-and-error testing)
21899 my $spaces_needed =
21900 $lp_position_predictor -
21901 $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
21903 return if ( $spaces_needed <= 0 );
21905 # We are over the limit, so try to remove a requested number of
21906 # spaces from leading whitespace. We are only allowed to remove
21907 # from whitespace items created on this batch, since others have
21908 # already been used and cannot be undone.
21909 my @candidates = ();
21911 # loop over all whitespace items created for the current batch
21912 foreach my $i ( 0 .. $max_lp_object_list ) {
21913 my $item = $rlp_object_list->[$i];
21915 # item must still be open to be a candidate (otherwise it
21916 # cannot influence the current token)
21917 next if ( $item->get_closed() >= 0 );
21919 my $available_spaces = $item->get_available_spaces();
21921 if ( $available_spaces > 0 ) {
21922 push( @candidates, [ $i, $available_spaces ] );
21926 return unless (@candidates);
21928 # sort by available whitespace so that we can remove whitespace
21929 # from the maximum available first.
21931 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates;
21933 # keep removing whitespace until we are done or have no more
21934 foreach my $candidate (@candidates) {
21935 my ( $i, $available_spaces ) = @{$candidate};
21936 my $deleted_spaces =
21937 ( $available_spaces > $spaces_needed )
21939 : $available_spaces;
21941 # remove the incremental space from this item
21942 $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
21946 # update the leading whitespace of this item and all items
21947 # that came after it
21949 while ( ++$i <= $max_lp_object_list ) {
21951 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
21952 if ( $old_spaces >= $deleted_spaces ) {
21953 $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
21956 # shouldn't happen except for code bug:
21958 # non-fatal, keep going except in DEVEL_MODE
21960 my $level = $rlp_object_list->[$i_debug]->get_level();
21962 $rlp_object_list->[$i_debug]->get_ci_level();
21963 my $old_level = $rlp_object_list->[$i]->get_level();
21965 $rlp_object_list->[$i]->get_ci_level();
21967 program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
21972 $lp_position_predictor -= $deleted_spaces;
21973 $spaces_needed -= $deleted_spaces;
21974 last unless ( $spaces_needed > 0 );
21977 } ## end sub check_for_long_gnu_style_lines
21979 sub undo_incomplete_lp_indentation {
21981 #------------------------------------------------------------------
21982 # Undo indentation for all incomplete -lp indentation levels of the
21983 # current batch unless -xlp is set.
21984 #------------------------------------------------------------------
21986 # This routine is called once after each output stream batch is
21987 # finished to undo indentation for all incomplete -lp indentation
21988 # levels. If this routine is called then comments and blank lines will
21989 # disrupt this indentation style. In older versions of perltidy this
21990 # was always done because it could cause problems otherwise, but recent
21991 # improvements allow fairly good results to be obtained by skipping
21992 # this step with the -xlp flag.
21993 my ($rlp_object_list) = @_;
21995 my $max_lp_object_list = @{$rlp_object_list} - 1;
21997 # nothing to do if no stack items defined for this line
21998 return if ( $max_lp_object_list < 0 );
22000 # loop over all whitespace items created for the current batch
22001 foreach my $i ( 0 .. $max_lp_object_list ) {
22002 my $item = $rlp_object_list->[$i];
22004 # only look for open items
22005 next if ( $item->get_closed() >= 0 );
22007 # Tentatively remove all of the available space
22008 # (The vertical aligner will try to get it back later)
22009 my $available_spaces = $item->get_available_spaces();
22010 if ( $available_spaces > 0 ) {
22012 # delete incremental space for this item
22013 $rlp_object_list->[$i]
22014 ->tentatively_decrease_available_spaces($available_spaces);
22016 # Reduce the total indentation space of any nodes that follow
22017 # Note that any such nodes must necessarily be dependents
22019 foreach ( $i + 1 .. $max_lp_object_list ) {
22020 $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
22025 } ## end sub undo_incomplete_lp_indentation
22026 } ## end closure set_lp_indentation
22028 #----------------------------------------------------------------------
22029 # sub to set a requested break before an opening container in -lp mode.
22030 #----------------------------------------------------------------------
22031 sub set_forced_lp_break {
22033 my ( $self, $i_begin_line, $i_opening ) = @_;
22036 # $i_begin_line = index of break in the _to_go arrays
22037 # $i_opening = index of the opening container
22039 # Set any requested break at a token before this opening container
22040 # token. This is often an '=' or '=>' but can also be things like
22041 # '.', ',', 'return'. It was defined by sub set_lp_indentation.
22044 # For intact containers, call this at the closing token.
22045 # For broken containers, call this at the opening token.
22046 # This will avoid needless breaks when it turns out that the
22047 # container does not actually get broken. This isn't known until
22048 # the closing container for intact blocks.
22051 if ( $i_begin_line < 0
22052 || $i_begin_line > $max_index_to_go );
22054 # Handle request to put a break break immediately before this token.
22055 # We may not want to do that since we are also breaking after it.
22056 if ( $i_begin_line == $i_opening ) {
22058 # The following rules should be reviewed. We may want to always
22059 # allow the break. If we do not do the break, the indentation
22062 # RULE: don't break before it unless it is welded to a qw.
22063 # This works well, but we may want to relax this to allow
22064 # breaks in additional cases.
22066 if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
22067 return unless ( $types_to_go[$max_index_to_go] eq 'q' );
22070 # Only break for breakpoints at the same
22071 # indentation level as the opening paren
22072 my $test1 = $nesting_depth_to_go[$i_opening];
22073 my $test2 = $nesting_depth_to_go[$i_begin_line];
22074 return if ( $test2 != $test1 );
22076 # Back up at a blank (fixes case b932)
22077 my $ibr = $i_begin_line - 1;
22079 && $types_to_go[$ibr] eq 'b' )
22084 my $i_nonblank = $self->set_forced_breakpoint($ibr);
22086 # Crude patch to prevent sub recombine_breakpoints from undoing
22087 # this break, especially after an '='. It will leave old
22088 # breakpoints alone. See c098/x045 for some examples.
22089 if ( defined($i_nonblank) ) {
22090 $old_breakpoint_to_go[$i_nonblank] = 1;
22094 } ## end sub set_forced_lp_break
22096 sub reduce_lp_indentation {
22098 # reduce the leading whitespace at token $i if possible by $spaces_needed
22099 # (a large value of $spaces_needed will remove all excess space)
22100 # NOTE: to be called from break_lists only for a sequence of tokens
22101 # contained between opening and closing parens/braces/brackets
22103 my ( $self, $i, $spaces_wanted ) = @_;
22104 my $deleted_spaces = 0;
22106 my $item = $leading_spaces_to_go[$i];
22107 my $available_spaces = $item->get_available_spaces();
22110 $available_spaces > 0
22111 && ( ( $spaces_wanted <= $available_spaces )
22112 || !$item->get_have_child() )
22116 # we'll remove these spaces, but mark them as recoverable
22118 $item->tentatively_decrease_available_spaces($spaces_wanted);
22121 return $deleted_spaces;
22122 } ## end sub reduce_lp_indentation
22124 ###########################################################
22125 # CODE SECTION 13: Preparing batches for vertical alignment
22126 ###########################################################
22128 sub check_convey_batch_input {
22130 # Check for valid input to sub convey_batch_to_vertical_aligner. An
22131 # error here would most likely be due to an error in the calling
22132 # routine 'sub grind_batch_of_CODE'.
22133 my ( $self, $ri_first, $ri_last ) = @_;
22135 if ( !defined($ri_first) || !defined($ri_last) ) {
22137 Undefined line ranges ri_first and/r ri_last
22141 my $nmax = @{$ri_first} - 1;
22142 my $nmax_check = @{$ri_last} - 1;
22143 if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
22145 Line range index error: nmax=$nmax but nmax_check=$nmax_check
22146 These should be equal and >=0
22149 my ( $ibeg, $iend );
22150 foreach my $n ( 0 .. $nmax ) {
22151 my $ibeg_m = $ibeg;
22152 my $iend_m = $iend;
22153 $ibeg = $ri_first->[$n];
22154 $iend = $ri_last->[$n];
22155 if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
22157 Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
22158 These should have iend >= ibeg and be in the range (0..$max_index_to_go)
22161 next if ( $n == 0 );
22162 if ( $ibeg <= $iend_m ) {
22164 Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
22169 } ## end sub check_convey_batch_input
22171 sub convey_batch_to_vertical_aligner {
22175 # This routine receives a batch of code for which the final line breaks
22176 # have been defined. Here we prepare the lines for passing to the vertical
22177 # aligner. We do the following tasks:
22178 # - mark certain vertical alignment tokens, such as '=', in each line
22179 # - make minor indentation adjustments
22180 # - do logical padding: insert extra blank spaces to help display certain
22181 # logical constructions
22183 my $this_batch = $self->[_this_batch_];
22184 my $ri_first = $this_batch->[_ri_first_];
22185 my $ri_last = $this_batch->[_ri_last_];
22187 $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
22189 my $n_last_line = @{$ri_first} - 1;
22191 my $do_not_pad = $this_batch->[_do_not_pad_];
22192 my $peak_batch_size = $this_batch->[_peak_batch_size_];
22193 my $starting_in_quote = $this_batch->[_starting_in_quote_];
22194 my $ending_in_quote = $this_batch->[_ending_in_quote_];
22195 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
22196 my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
22197 my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
22199 my $rLL = $self->[_rLL_];
22200 my $Klimit = $self->[_Klimit_];
22201 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
22202 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
22204 my $ibeg_next = $ri_first->[0];
22205 my $iend_next = $ri_last->[0];
22207 my $type_beg_next = $types_to_go[$ibeg_next];
22208 my $type_end_next = $types_to_go[$iend_next];
22209 my $token_beg_next = $tokens_to_go[$ibeg_next];
22211 my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#';
22213 my $rindentation_list = [0]; # ref to indentations for each line
22214 my ( $cscw_block_comment, $closing_side_comment );
22215 if ($rOpts_closing_side_comments) {
22216 ( $closing_side_comment, $cscw_block_comment ) =
22217 $self->add_closing_side_comment( $ri_first, $ri_last );
22220 # flush before a long if statement to avoid unwanted alignment
22221 if ( $n_last_line > 0
22222 && $type_beg_next eq 'k'
22223 && $is_if_unless{$token_beg_next} )
22225 $self->flush_vertical_aligner();
22228 $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci )
22229 if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation );
22231 $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
22232 $starting_in_quote )
22233 if ( $n_last_line > 0 && $rOpts_logical_padding );
22235 if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
22237 # ----------------------------------------------------------
22238 # define the vertical alignments for all lines of this batch
22239 # ----------------------------------------------------------
22240 my $rline_alignments =
22241 $self->make_vertical_alignments( $ri_first, $ri_last );
22243 # ----------------------------------------------
22244 # loop to send each line to the vertical aligner
22245 # ----------------------------------------------
22246 my ( $type_beg, $type_end, $token_beg );
22248 for my $n ( 0 .. $n_last_line ) {
22250 # ----------------------------------------------------------------
22251 # This hash will hold the args for vertical alignment of this line
22252 # We will populate it as we go.
22253 # ----------------------------------------------------------------
22254 my $rvao_args = {};
22256 my $type_beg_last = $type_beg;
22257 my $type_end_last = $type_end;
22259 my $ibeg = $ibeg_next;
22260 my $iend = $iend_next;
22261 my $Kbeg = $K_to_go[$ibeg];
22262 my $Kend = $K_to_go[$iend];
22264 $type_beg = $type_beg_next;
22265 $type_end = $type_end_next;
22266 $token_beg = $token_beg_next;
22268 # ---------------------------------------------------
22269 # Define the check value 'Kend' to send for this line
22270 # ---------------------------------------------------
22271 # The 'Kend' value is an integer for checking that lines come out of
22272 # the far end of the pipeline in the right order. It increases
22273 # linearly along the token stream. But we only send ending K values of
22274 # non-comments down the pipeline. This is equivalent to checking that
22275 # the last CODE_type is blank or equal to 'VER'. See also sub
22276 # resync_lines_and_tokens for related coding. Note that
22277 # '$batch_CODE_type' is the code type of the line to which the ending
22280 $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
22282 # $ljump is a level jump needed by 'sub final_indentation_adjustment'
22285 # Get some vars on line [n+1], if any:
22286 if ( $n < $n_last_line ) {
22287 $ibeg_next = $ri_first->[ $n + 1 ];
22288 $iend_next = $ri_last->[ $n + 1 ];
22290 $type_beg_next = $types_to_go[$ibeg_next];
22291 $type_end_next = $types_to_go[$iend_next];
22292 $token_beg_next = $tokens_to_go[$ibeg_next];
22294 my $Kbeg_next = $K_to_go[$ibeg_next];
22295 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22297 elsif ( !$is_block_comment && $Kend < $Klimit ) {
22299 # Patch for git #51, a bare closing qw paren was not outdented
22300 # if the flag '-nodelete-old-newlines is set
22301 # Note that we are just looking ahead for the next nonblank
22302 # character. We could scan past an arbitrary number of block
22303 # comments or hanging side comments by calling K_next_code, but it
22304 # could add significant run time with very little to be gained.
22305 my $Kbeg_next = $Kend + 1;
22306 if ( $Kbeg_next < $Klimit
22307 && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
22312 $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22315 # ---------------------------------------------
22316 # get the vertical alignment info for this line
22317 # ---------------------------------------------
22319 # The lines are broken into fields which can be spaced by the vertical
22320 # to achieve vertical alignment. These fields are the actual text
22321 # which will be output, so from here on no more changes can be made to
22323 my $rline_alignment = $rline_alignments->[$n];
22324 my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
22325 @{$rline_alignment};
22327 # Programming check: (shouldn't happen)
22328 # The number of tokens which separate the fields must always be
22329 # one less than the number of fields. If this is not true then
22330 # an error has been introduced in sub make_alignment_patterns.
22332 if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
22333 my $nt = @{$rtokens};
22334 my $nf = @{$rfields};
22336 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
22337 The number of tokens = $nt should be one less than number of fields: $nf
22343 # --------------------------------------
22344 # get the final indentation of this line
22345 # --------------------------------------
22346 my ( $indentation, $lev, $level_end, $terminal_type,
22347 $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
22348 = $self->final_indentation_adjustment( $ibeg, $iend, $rfields,
22349 $rpatterns, $ri_first, $ri_last,
22350 $rindentation_list, $ljump, $starting_in_quote,
22351 $is_static_block_comment, );
22353 # --------------------------------
22354 # define flag 'outdent_long_lines'
22355 # --------------------------------
22357 # we will allow outdenting of long lines..
22358 # which are long quotes, if allowed
22359 ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
22361 # which are long block comments, if allowed
22364 && $rOpts_outdent_long_comments
22366 # but not if this is a static block comment
22367 && !$is_static_block_comment
22371 $rvao_args->{outdent_long_lines} = 1;
22373 # convert -lp indentation objects to spaces to allow outdenting
22374 if ( ref($indentation) ) {
22375 $indentation = $indentation->get_spaces();
22379 # --------------------------------------------------
22380 # define flags 'break_alignment_before' and '_after'
22381 # --------------------------------------------------
22383 # These flags tell the vertical aligner to stop alignment before or
22385 if ($is_outdented_line) {
22386 $rvao_args->{break_alignment_before} = 1;
22387 $rvao_args->{break_alignment_after} = 1;
22389 elsif ($do_not_pad) {
22390 $rvao_args->{break_alignment_before} = 1;
22393 # flush at an 'if' which follows a line with (1) terminal semicolon
22394 # or (2) terminal block_type which is not an 'if'. This prevents
22395 # unwanted alignment between the lines.
22396 elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
22401 my $Km = $Kbeg - 1;
22402 $type_m = $rLL->[$Km]->[_TYPE_];
22403 if ( $type_m eq 'b' && $Km > 0 ) {
22405 $type_m = $rLL->[$Km]->[_TYPE_];
22407 if ( $type_m eq '#' && $Km > 0 ) {
22409 $type_m = $rLL->[$Km]->[_TYPE_];
22410 if ( $type_m eq 'b' && $Km > 0 ) {
22412 $type_m = $rLL->[$Km]->[_TYPE_];
22416 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
22418 $block_type_m = $rblock_type_of_seqno->{$seqno_m};
22422 # break after anything that is not if-like
22425 || ( $type_m eq '}'
22427 && $block_type_m ne 'if'
22428 && $block_type_m ne 'unless'
22429 && $block_type_m ne 'elsif'
22430 && $block_type_m ne 'else' )
22433 $rvao_args->{break_alignment_before} = 1;
22437 # ----------------------------------
22438 # define 'rvertical_tightness_flags'
22439 # ----------------------------------
22440 # These flags tell the vertical aligner if/when to combine consecutive
22441 # lines, based on the user input parameters.
22442 $rvao_args->{rvertical_tightness_flags} =
22443 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
22444 $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
22445 if ( !$is_block_comment );
22447 # ----------------------------------
22448 # define 'is_terminal_ternary' flag
22449 # ----------------------------------
22451 # This flag is set at the final ':' of a ternary chain to request
22452 # vertical alignment of the final term. Here is a slightly complex
22455 # $self->{_text} = (
22457 # : $type eq 'item' ? "the $section entry"
22458 # : "the section on $section"
22462 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
22463 # : ' elsewhere in this document'
22466 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
22468 my $is_terminal_ternary = 0;
22469 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
22470 if ( $terminal_type ne ';'
22471 && $n_last_line > $n
22472 && $level_end == $lev )
22474 my $Kbeg_next = $K_to_go[$ibeg_next];
22475 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
22476 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
22479 $last_leading_type eq ':'
22480 && ( ( $terminal_type eq ';' && $level_end <= $lev )
22481 || ( $terminal_type ne ':' && $level_end < $lev ) )
22485 # the terminal term must not contain any ternary terms, as in
22487 # $Is_MSWin32 ? ".\\echo$$"
22488 # : $Is_MacOS ? ":echo$$"
22489 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
22491 $is_terminal_ternary = 1;
22493 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
22494 while ( defined($KP) && $KP <= $Kend ) {
22495 my $type_KP = $rLL->[$KP]->[_TYPE_];
22496 if ( $type_KP eq '?' || $type_KP eq ':' ) {
22497 $is_terminal_ternary = 0;
22500 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
22503 $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
22506 # -------------------------------------------------
22507 # add any new closing side comment to the last line
22508 # -------------------------------------------------
22509 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
22511 $rfields->[-1] .= " $closing_side_comment";
22513 # NOTE: Patch for csc. We can just use 1 for the length of the csc
22514 # because its length should not be a limiting factor from here on.
22515 $rfield_lengths->[-1] += 2;
22519 [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
22522 # ------------------------
22523 # define flag 'list_seqno'
22524 # ------------------------
22526 # This flag indicates if this line is contained in a multi-line list
22527 if ( !$is_block_comment ) {
22528 my $parent_seqno = $parent_seqno_to_go[$ibeg];
22529 $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
22532 # The alignment tokens have been marked with nesting_depths, so we need
22533 # to pass nesting depths to the vertical aligner. They remain invariant
22534 # under all formatting operations. Previously, level values were sent
22535 # to the aligner. But they can be altered in welding and other
22536 # operations, and this can lead to alignment errors.
22537 my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
22538 my $nesting_depth_end = $nesting_depth_to_go[$iend];
22540 # A quirk in the definition of nesting depths is that the closing token
22541 # has the same depth as internal tokens. The vertical aligner is
22542 # programmed to expect them to have the lower depth, so we fix this.
22543 if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
22544 if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
22546 # Adjust nesting depths to keep -lp indentation for qw lists. This is
22547 # required because qw lists contained in brackets do not get nesting
22548 # depths, but the vertical aligner is watching nesting depth changes to
22549 # decide if a -lp block is intact. Without this patch, qw lists
22550 # enclosed in angle brackets will not get the correct -lp indentation.
22552 # Looking for line with isolated qw ...
22553 if ( $rOpts_line_up_parentheses
22554 && $type_beg eq 'q'
22555 && $ibeg == $iend )
22558 # ... which is part of a multiline qw
22559 my $Km = $self->K_previous_nonblank($Kbeg);
22560 my $Kp = $self->K_next_nonblank($Kbeg);
22561 if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
22562 || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
22564 $nesting_depth_beg++;
22565 $nesting_depth_end++;
22569 # ---------------------------------
22570 # define flag 'forget_side_comment'
22571 # ---------------------------------
22573 # This flag tells the vertical aligner to reset the side comment
22574 # location if we are entering a new block from level 0. This is
22575 # intended to keep side comments from drifting too far to the right.
22576 if ( $terminal_block_type
22577 && $nesting_depth_end > $nesting_depth_beg )
22579 my $level_adj = $lev;
22580 my $radjusted_levels = $self->[_radjusted_levels_];
22581 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
22583 $level_adj = $radjusted_levels->[$Kbeg];
22584 if ( $level_adj < 0 ) { $level_adj = 0 }
22586 if ( $level_adj == 0 ) {
22587 $rvao_args->{forget_side_comment} = 1;
22591 # -----------------------------------
22592 # Store the remaining non-flag values
22593 # -----------------------------------
22594 $rvao_args->{Kend} = $Kend_code;
22595 $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
22596 $rvao_args->{indentation} = $indentation;
22597 $rvao_args->{level_end} = $nesting_depth_end;
22598 $rvao_args->{level} = $nesting_depth_beg;
22599 $rvao_args->{rline_alignment} = $rline_alignment;
22600 $rvao_args->{maximum_line_length} =
22601 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
22603 # --------------------------------------
22604 # send this line to the vertical aligner
22605 # --------------------------------------
22606 my $vao = $self->[_vertical_aligner_object_];
22607 $vao->valign_input($rvao_args);
22611 # Set flag indicating if this line ends in an opening
22612 # token and is very short, so that a blank line is not
22613 # needed if the subsequent line is a comment.
22614 # Examples of what we are looking for:
22620 $self->[_last_output_short_opening_token_]
22622 # line ends in opening token
22624 = $is_opening_type{$type_end}
22628 # line has either single opening token
22631 # or is a single token followed by opening token.
22632 # Note that sub identifiers have blanks like 'sub doit'
22633 # $token_beg !~ /\s+/
22634 || ( $Kend - $Kbeg <= 2 && index( $token_beg, SPACE ) < 0 )
22637 # and limit total to 10 character widths
22638 && token_sequence_length( $ibeg, $iend ) <= 10;
22640 } ## end of loop to output each line
22642 # remember indentation of lines containing opening containers for
22643 # later use by sub final_indentation_adjustment
22644 $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list )
22645 if ( !$is_block_comment );
22647 # output any new -cscw block comment
22648 if ($cscw_block_comment) {
22649 $self->flush_vertical_aligner();
22650 my $file_writer_object = $self->[_file_writer_object_];
22651 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
22654 } ## end sub convey_batch_to_vertical_aligner
22656 sub check_batch_summed_lengths {
22658 my ( $self, $msg ) = @_;
22659 $msg = EMPTY_STRING unless defined($msg);
22660 my $rLL = $self->[_rLL_];
22662 # Verify that the summed lengths are correct. We want to be sure that
22663 # errors have not been introduced by programming changes. Summed lengths
22664 # are defined in sub $store_token. Operations like padding and unmasking
22665 # semicolons can change token lengths, but those operations are expected to
22666 # update the summed lengths when they make changes. So the summed lengths
22667 # should always be correct.
22668 foreach my $i ( 0 .. $max_index_to_go ) {
22670 $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
22671 my $len_tok_i = $token_lengths_to_go[$i];
22672 my $KK = $K_to_go[$i];
22674 if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
22675 if ( $len_by_sum != $len_tok_i
22676 || defined($len_tok_K) && $len_by_sum != $len_tok_K )
22678 my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
22679 $KK = 'undef' unless defined($KK);
22680 my $tok = $tokens_to_go[$i];
22681 my $type = $types_to_go[$i];
22683 Summed lengths are appear to be incorrect. $msg
22684 lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
22685 near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
22690 } ## end sub check_batch_summed_lengths
22692 { ## begin closure set_vertical_alignment_markers
22693 my %is_vertical_alignment_type;
22694 my %is_not_vertical_alignment_token;
22695 my %is_vertical_alignment_keyword;
22696 my %is_terminal_alignment_type;
22697 my %is_low_level_alignment_token;
22703 # Replaced =~ and // in the list. // had been removed in RT 119588
22705 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
22706 { ? : => && || ~~ !~~ =~ !~ // <=> ->
22708 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
22710 # These 'tokens' are not aligned. We need this to remove [
22711 # from the above list because it has type ='{'
22713 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
22715 # these are the only types aligned at a line end
22717 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
22719 # these tokens only align at line level
22721 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
22723 # eq and ne were removed from this list to improve alignment chances
22724 @q = qw(if unless and or err for foreach while until);
22725 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
22728 sub set_vertical_alignment_markers {
22730 # This routine takes the first step toward vertical alignment of the
22731 # lines of output text. It looks for certain tokens which can serve as
22732 # vertical alignment markers (such as an '=').
22734 # Method: We look at each token $i in this output batch and set
22735 # $ralignment_type_to_go->[$i] equal to those tokens at which we would
22736 # accept vertical alignment.
22738 my ( $self, $ri_first, $ri_last ) = @_;
22740 my $ralignment_type_to_go;
22741 my $ralignment_counts = [];
22742 my $ralignment_hash_by_line = [];
22744 # NOTE: closing side comments can insert up to 2 additional tokens
22745 # beyond the original $max_index_to_go, so we need to check ri_last for
22747 my $max_line = @{$ri_first} - 1;
22748 my $max_i = $ri_last->[$max_line];
22749 if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
22751 # -----------------------------------------------------------------
22753 # - no alignments if there is only 1 token.
22754 # - and nothing to do if we aren't allowed to change whitespace.
22755 # -----------------------------------------------------------------
22756 if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
22757 return ( $ralignment_type_to_go, $ralignment_counts,
22758 $ralignment_hash_by_line );
22761 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
22762 my $ris_function_call_paren = $self->[_ris_function_call_paren_];
22763 my $rLL = $self->[_rLL_];
22765 # -------------------------------
22766 # First handle any side comment.
22767 # -------------------------------
22768 my $i_terminal = $max_i;
22769 if ( $types_to_go[$max_i] eq '#' ) {
22771 # We know $max_i > 0 if we get here.
22773 if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
22777 my $token = $tokens_to_go[$max_i];
22778 my $KK = $K_to_go[$max_i];
22780 # Do not align various special side comments
22781 my $do_not_align = (
22783 # it is any specially marked side comment
22784 ( defined($KK) && $rspecial_side_comment_type->{$KK} )
22786 # or it is a static side comment
22787 || ( $rOpts->{'static-side-comments'}
22788 && $token =~ /$static_side_comment_pattern/ )
22790 # or a closing side comment
22791 || ( $types_to_go[$i_terminal] eq '}'
22792 && $tokens_to_go[$i_terminal] eq '}'
22793 && $token =~ /$closing_side_comment_prefix_pattern/ )
22796 # - For the specific combination -vc -nvsc, we put all side comments
22797 # at fixed locations. Note that we will lose hanging side comment
22798 # alignments. Otherwise, hsc's can move to strange locations.
22799 # - For -nvc -nvsc we make all side comments vertical alignments
22800 # because the vertical aligner will check for -nvsc and be able
22801 # to reduce the final padding to the side comments for long lines.
22802 # and keep hanging side comments aligned.
22803 if ( !$do_not_align
22804 && !$rOpts_valign_side_comments
22805 && $rOpts_valign_code )
22809 my $ipad = $max_i - 1;
22810 if ( $types_to_go[$ipad] eq 'b' ) {
22812 $rOpts->{'minimum-space-to-comment'} -
22813 $token_lengths_to_go[$ipad];
22814 $self->pad_token( $ipad, $pad_spaces );
22818 if ( !$do_not_align ) {
22819 $ralignment_type_to_go->[$max_i] = '#';
22820 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
22821 $ralignment_counts->[$max_line]++;
22825 # ----------------------------------------------
22826 # Nothing more to do on this line if -nvc is set
22827 # ----------------------------------------------
22828 if ( !$rOpts_valign_code ) {
22829 return ( $ralignment_type_to_go, $ralignment_counts,
22830 $ralignment_hash_by_line );
22833 # -------------------------------------
22834 # Loop over each line of this batch ...
22835 # -------------------------------------
22836 my $last_vertical_alignment_BEFORE_index;
22837 my $vert_last_nonblank_type;
22838 my $vert_last_nonblank_token;
22840 foreach my $line ( 0 .. $max_line ) {
22842 my $ibeg = $ri_first->[$line];
22843 my $iend = $ri_last->[$line];
22845 next if ( $iend <= $ibeg );
22847 # back up before any side comment
22848 if ( $iend > $i_terminal ) { $iend = $i_terminal }
22850 my $level_beg = $levels_to_go[$ibeg];
22851 my $token_beg = $tokens_to_go[$ibeg];
22852 my $type_beg = $types_to_go[$ibeg];
22853 my $type_beg_special_char =
22854 ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
22856 $last_vertical_alignment_BEFORE_index = -1;
22857 $vert_last_nonblank_type = $type_beg;
22858 $vert_last_nonblank_token = $token_beg;
22860 # ----------------------------------------------------------------
22861 # Initialization code merged from 'sub delete_needless_alignments'
22862 # ----------------------------------------------------------------
22863 my $i_good_paren = -1;
22864 my $i_elsif_close = $ibeg - 1;
22865 my $i_elsif_open = $iend + 1;
22867 if ( $type_beg eq 'k' ) {
22869 # Initialization for paren patch: mark a location of a paren we
22870 # should keep, such as one following something like a leading
22872 $i_good_paren = $ibeg + 1;
22873 if ( $types_to_go[$i_good_paren] eq 'b' ) {
22877 # Initialization for 'elsif' patch: remember the paren range of
22878 # an elsif, and do not make alignments within them because this
22879 # can cause loss of padding and overall brace alignment in the
22880 # vertical aligner.
22881 if ( $token_beg eq 'elsif'
22882 && $i_good_paren < $iend
22883 && $tokens_to_go[$i_good_paren] eq '(' )
22885 $i_elsif_open = $i_good_paren;
22886 $i_elsif_close = $mate_index_to_go[$i_good_paren];
22888 } ## end if ( $type_beg eq 'k' )
22890 # --------------------------------------------
22891 # Loop over each token in this output line ...
22892 # --------------------------------------------
22893 foreach my $i ( $ibeg + 1 .. $iend ) {
22895 next if ( $types_to_go[$i] eq 'b' );
22897 my $type = $types_to_go[$i];
22898 my $token = $tokens_to_go[$i];
22899 my $alignment_type = EMPTY_STRING;
22901 # ----------------------------------------------
22902 # Check for 'paren patch' : Remove excess parens
22903 # ----------------------------------------------
22905 # Excess alignment of parens can prevent other good alignments.
22906 # For example, note the parens in the first two rows of the
22907 # following snippet. They would normally get marked for
22908 # alignment and aligned as follows:
22910 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
22911 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
22912 # my $img = new Gimp::Image( $w, $h, RGB );
22914 # This causes unnecessary paren alignment and prevents the
22915 # third equals from aligning. If we remove the unwanted
22916 # alignments we get:
22918 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
22919 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
22920 # my $img = new Gimp::Image( $w, $h, RGB );
22922 # A rule for doing this which works well is to remove alignment
22923 # of parens whose containers do not contain other aligning
22924 # tokens, with the exception that we always keep alignment of
22925 # the first opening paren on a line (for things like 'if' and
22926 # 'elsif' statements).
22927 if ( $token eq ')' && @imatch_list ) {
22929 # undo the corresponding opening paren if:
22930 # - it is at the top of the stack
22931 # - and not the first overall opening paren
22932 # - does not follow a leading keyword on this line
22933 my $imate = $mate_index_to_go[$i];
22934 if ( $imatch_list[-1] eq $imate
22935 && ( $ibeg > 1 || @imatch_list > 1 )
22936 && $imate > $i_good_paren )
22938 if ( $ralignment_type_to_go->[$imate] ) {
22939 $ralignment_type_to_go->[$imate] = EMPTY_STRING;
22940 $ralignment_counts->[$line]--;
22941 delete $ralignment_hash_by_line->[$line]->{$imate};
22947 # do not align tokens at lower level than start of line
22948 # except for side comments
22949 if ( $levels_to_go[$i] < $level_beg ) {
22953 #--------------------------------------------------------
22954 # First see if we want to align BEFORE this token
22955 #--------------------------------------------------------
22957 # The first possible token that we can align before
22958 # is index 2 because: 1) it doesn't normally make sense to
22959 # align before the first token and 2) the second
22960 # token must be a blank if we are to align before
22962 if ( $i < $ibeg + 2 ) { }
22964 # must follow a blank token
22965 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
22967 # otherwise, do not align two in a row to create a
22969 elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
22971 # align before one of these keywords
22972 # (within a line, since $i>1)
22973 elsif ( $type eq 'k' ) {
22975 # /^(if|unless|and|or|eq|ne)$/
22976 if ( $is_vertical_alignment_keyword{$token} ) {
22977 $alignment_type = $token;
22981 # align qw in a 'use' statement (issue git #93)
22982 elsif ( $type eq 'q' ) {
22983 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
22984 $alignment_type = $type;
22988 # align before one of these types..
22989 elsif ( $is_vertical_alignment_type{$type}
22990 && !$is_not_vertical_alignment_token{$token} )
22992 $alignment_type = $token;
22994 # Do not align a terminal token. Although it might
22995 # occasionally look ok to do this, this has been found to be
22996 # a good general rule. The main problems are:
22997 # (1) that the terminal token (such as an = or :) might get
22998 # moved far to the right where it is hard to see because
22999 # nothing follows it, and
23000 # (2) doing so may prevent other good alignments.
23001 # Current exceptions are && and || and =>
23002 if ( $i == $iend ) {
23003 $alignment_type = EMPTY_STRING
23004 unless ( $is_terminal_alignment_type{$type} );
23007 # Do not align leading ': (' or '. ('. This would prevent
23008 # alignment in something like the following:
23010 # ( $input_line_number < 10 ) ? " "
23011 # : ( $input_line_number < 100 ) ? " "
23015 # ( $case_matters ? $accessor : " lc($accessor) " )
23016 # . ( $yesno ? " eq " : " ne " )
23018 # Also, do not align a ( following a leading ? so we can
23019 # align something like this:
23020 # $converter{$_}->{ushortok} =
23021 # $PDL::IO::Pic::biggrays
23022 # ? ( m/GIF/ ? 0 : 1 )
23023 # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
23024 if ( $type_beg_special_char
23026 && $types_to_go[ $i - 1 ] eq 'b' )
23028 $alignment_type = EMPTY_STRING;
23031 # Certain tokens only align at the same level as the
23032 # initial line level
23033 if ( $is_low_level_alignment_token{$token}
23034 && $levels_to_go[$i] != $level_beg )
23036 $alignment_type = EMPTY_STRING;
23039 # For a paren after keyword, only align something like this:
23041 # elsif ( $b ) { &b }
23042 if ( $token eq '(' ) {
23044 if ( $vert_last_nonblank_type eq 'k' ) {
23045 $alignment_type = EMPTY_STRING
23047 $is_if_unless_elsif{$vert_last_nonblank_token};
23048 ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/;
23051 # Do not align a spaced-function-paren if requested.
23052 # Issue git #53, #73.
23053 if ( !$rOpts_function_paren_vertical_alignment ) {
23054 my $seqno = $type_sequence_to_go[$i];
23055 if ( $ris_function_call_paren->{$seqno} ) {
23056 $alignment_type = EMPTY_STRING;
23060 # make () align with qw in a 'use' statement (git #93)
23061 if ( $tokens_to_go[0] eq 'use'
23062 && $types_to_go[0] eq 'k'
23063 && $mate_index_to_go[$i] == $i + 1 )
23065 $alignment_type = 'q';
23069 # be sure the alignment tokens are unique
23070 # This didn't work well: reason not determined
23071 # if ($token ne $type) {$alignment_type .= $type}
23074 # NOTE: This is deactivated because it causes the previous
23075 # if/elsif alignment to fail
23076 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
23077 #{ $alignment_type = $type; }
23079 if ($alignment_type) {
23080 $last_vertical_alignment_BEFORE_index = $i;
23083 #--------------------------------------------------------
23084 # Next see if we want to align AFTER the previous nonblank
23085 #--------------------------------------------------------
23087 # We want to line up ',' and interior ';' tokens, with the added
23088 # space AFTER these tokens. (Note: interior ';' is included
23089 # because it may occur in short blocks).
23092 # we haven't already set it
23095 # previous token IS one of these:
23097 $vert_last_nonblank_type eq ','
23098 || $vert_last_nonblank_type eq ';'
23101 # and its not the first token of the line
23104 # and it follows a blank
23105 && $types_to_go[ $i - 1 ] eq 'b'
23107 # and it's NOT one of these
23108 && !$is_closing_token{$type}
23110 # then go ahead and align
23114 $alignment_type = $vert_last_nonblank_type;
23117 #-----------------------
23118 # Set the alignment type
23119 #-----------------------
23120 if ($alignment_type) {
23122 # but do not align the opening brace of an anonymous sub
23124 && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
23129 # and do not make alignments within 'elsif' parens
23130 elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
23134 # and ignore any tokens which have leading padded spaces
23135 # example: perl527/lop.t
23136 elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
23141 $ralignment_type_to_go->[$i] = $alignment_type;
23142 $ralignment_hash_by_line->[$line]->{$i} =
23144 $ralignment_counts->[$line]++;
23145 push @imatch_list, $i;
23149 $vert_last_nonblank_type = $type;
23150 $vert_last_nonblank_token = $token;
23154 return ( $ralignment_type_to_go, $ralignment_counts,
23155 $ralignment_hash_by_line );
23156 } ## end sub set_vertical_alignment_markers
23157 } ## end closure set_vertical_alignment_markers
23159 sub make_vertical_alignments {
23160 my ( $self, $ri_first, $ri_last ) = @_;
23162 #----------------------------
23163 # Shortcut for a single token
23164 #----------------------------
23165 if ( $max_index_to_go == 0 ) {
23166 if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
23168 my $rfields = [ $tokens_to_go[0] ];
23169 my $rpatterns = [ $types_to_go[0] ];
23170 my $rfield_lengths =
23171 [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
23172 return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
23175 # Strange line packing, not fatal but should not happen
23176 elsif (DEVEL_MODE) {
23177 my $max_line = @{$ri_first} - 1;
23178 my $ibeg = $ri_first->[0];
23179 my $iend = $ri_last->[0];
23180 my $tok_b = $tokens_to_go[$ibeg];
23181 my $tok_e = $tokens_to_go[$iend];
23182 my $type_b = $types_to_go[$ibeg];
23183 my $type_e = $types_to_go[$iend];
23185 "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
23190 #---------------------------------------------------------
23191 # Step 1: Define the alignment tokens for the entire batch
23192 #---------------------------------------------------------
23193 my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
23194 = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
23196 #----------------------------------------------
23197 # Step 2: Break each line into alignment fields
23198 #----------------------------------------------
23199 my $rline_alignments = [];
23200 my $max_line = @{$ri_first} - 1;
23201 foreach my $line ( 0 .. $max_line ) {
23203 my $ibeg = $ri_first->[$line];
23204 my $iend = $ri_last->[$line];
23206 my $rtok_fld_pat_len = $self->make_alignment_patterns(
23207 $ibeg, $iend, $ralignment_type_to_go,
23208 $ralignment_counts->[$line],
23209 $ralignment_hash_by_line->[$line]
23211 push @{$rline_alignments}, $rtok_fld_pat_len;
23213 return $rline_alignments;
23214 } ## end sub make_vertical_alignments
23218 # get opening and closing sequence numbers of a token for the vertical
23219 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
23220 # to be treated somewhat like opening and closing tokens for stacking
23221 # tokens by the vertical aligner.
23222 my ( $self, $ii, $ending_in_quote ) = @_;
23224 my $rLL = $self->[_rLL_];
23226 my $KK = $K_to_go[$ii];
23227 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
23229 if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
23231 my $token = $rLL->[$KK]->[_TOKEN_];
23233 $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
23236 if ( !$ending_in_quote ) {
23237 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
23242 } ## end sub get_seqno
23245 my %undo_extended_ci;
23247 sub initialize_undo_ci {
23248 %undo_extended_ci = ();
23254 # Undo continuation indentation in certain sequences
23255 my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
23256 my ( $line_1, $line_2, $lev_last );
23257 my $this_line_is_semicolon_terminated;
23258 my $max_line = @{$ri_first} - 1;
23260 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
23262 # Prepare a list of controlling indexes for each line if required.
23263 # This is used for efficient processing below. Note: this is
23264 # critical for speed. In the initial implementation I just looped
23265 # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
23266 # found that this routine was causing a huge run time in large lists.
23267 # On a very large list test case, this new coding dropped the run time
23268 # of this routine from 30 seconds to 169 milliseconds.
23269 my @i_controlling_ci;
23270 if ( @{$rix_seqno_controlling_ci} ) {
23271 my @tmp = reverse @{$rix_seqno_controlling_ci};
23272 my $ix_next = pop @tmp;
23273 foreach my $line ( 0 .. $max_line ) {
23274 my $iend = $ri_last->[$line];
23275 while ( defined($ix_next) && $ix_next <= $iend ) {
23276 push @{ $i_controlling_ci[$line] }, $ix_next;
23277 $ix_next = pop @tmp;
23282 # Loop over all lines of the batch ...
23284 # Workaround originally created for problem c007, in which the
23285 # combination -lp -xci could produce a "Program bug" message in unusual
23287 my $skip_SECTION_1;
23288 if ( $rOpts_line_up_parentheses
23289 && $rOpts_extended_continuation_indentation )
23292 # Only set this flag if -lp is actually used here
23293 foreach my $line ( 0 .. $max_line ) {
23294 my $ibeg = $ri_first->[$line];
23295 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
23296 $skip_SECTION_1 = 1;
23302 foreach my $line ( 0 .. $max_line ) {
23304 my $ibeg = $ri_first->[$line];
23305 my $iend = $ri_last->[$line];
23306 my $lev = $levels_to_go[$ibeg];
23308 #-----------------------------------
23309 # SECTION 1: Undo needless common CI
23310 #-----------------------------------
23312 # We are looking at leading tokens and looking for a sequence all
23313 # at the same level and all at a higher level than enclosing lines.
23315 # For example, we can undo continuation indentation in sort/map/grep
23318 # my $dat1 = pack( "n*",
23319 # map { $_, $lookup->{$_} }
23320 # sort { $a <=> $b }
23321 # grep { $lookup->{$_} ne $default } keys %$lookup );
23325 # my $dat1 = pack( "n*",
23326 # map { $_, $lookup->{$_} }
23327 # sort { $a <=> $b }
23328 # grep { $lookup->{$_} ne $default } keys %$lookup );
23330 if ( $line > 0 && !$skip_SECTION_1 ) {
23332 # if we have started a chain..
23335 # see if it continues..
23336 if ( $lev == $lev_last ) {
23337 if ( $types_to_go[$ibeg] eq 'k'
23338 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23341 # chain continues...
23342 # check for chain ending at end of a statement
23343 if ( $line == $max_line ) {
23345 # see of this line ends a statement
23346 $this_line_is_semicolon_terminated =
23347 $types_to_go[$iend] eq ';'
23349 # with possible side comment
23350 || ( $types_to_go[$iend] eq '#'
23351 && $iend - $ibeg >= 2
23352 && $types_to_go[ $iend - 2 ] eq ';'
23353 && $types_to_go[ $iend - 1 ] eq 'b' );
23356 if ($this_line_is_semicolon_terminated);
23364 elsif ( $lev < $lev_last ) {
23366 # chain ends with previous line
23367 $line_2 = $line - 1;
23369 elsif ( $lev > $lev_last ) {
23375 # undo the continuation indentation if a chain ends
23376 if ( defined($line_2) && defined($line_1) ) {
23377 my $continuation_line_count = $line_2 - $line_1 + 1;
23378 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
23379 = (0) x ($continuation_line_count)
23380 if ( $continuation_line_count >= 0 );
23381 @leading_spaces_to_go[ @{$ri_first}
23382 [ $line_1 .. $line_2 ] ] =
23383 @reduced_spaces_to_go[ @{$ri_first}
23384 [ $line_1 .. $line_2 ] ];
23389 # not in a chain yet..
23392 # look for start of a new sort/map/grep chain
23393 if ( $lev > $lev_last ) {
23394 if ( $types_to_go[$ibeg] eq 'k'
23395 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23403 #-------------------------------------
23404 # SECTION 2: Undo ci at cuddled blocks
23405 #-------------------------------------
23407 # Note that sub final_indentation_adjustment will be called later to
23408 # actually do this, but for now we will tentatively mark cuddled
23409 # lines with ci=0 so that the the -xci loop which follows will be
23410 # correct at cuddles.
23412 $types_to_go[$ibeg] eq '}'
23413 && ( $nesting_depth_to_go[$iend] + 1 ==
23414 $nesting_depth_to_go[$ibeg] )
23417 my $terminal_type = $types_to_go[$iend];
23418 if ( $terminal_type eq '#' && $iend > $ibeg ) {
23419 $terminal_type = $types_to_go[ $iend - 1 ];
23420 if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
23421 $terminal_type = $types_to_go[ $iend - 2 ];
23424 if ( $terminal_type eq '{' ) {
23425 my $Kbeg = $K_to_go[$ibeg];
23426 $ci_levels_to_go[$ibeg] = 0;
23430 #--------------------------------------------------------
23431 # SECTION 3: Undo ci set by sub extended_ci if not needed
23432 #--------------------------------------------------------
23434 # Undo the ci of the leading token if its controlling token
23435 # went out on a previous line without ci
23436 if ( $ci_levels_to_go[$ibeg] ) {
23437 my $Kbeg = $K_to_go[$ibeg];
23438 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
23439 if ( $seqno && $undo_extended_ci{$seqno} ) {
23441 # but do not undo ci set by the -lp flag
23442 if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
23443 $ci_levels_to_go[$ibeg] = 0;
23444 $leading_spaces_to_go[$ibeg] =
23445 $reduced_spaces_to_go[$ibeg];
23450 # Flag any controlling opening tokens in lines without ci. This
23451 # will be used later in the above if statement to undo the ci which
23452 # they added. The array i_controlling_ci[$line] was prepared at
23453 # the top of this routine.
23454 if ( !$ci_levels_to_go[$ibeg]
23455 && defined( $i_controlling_ci[$line] ) )
23457 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
23458 my $seqno = $type_sequence_to_go[$i];
23459 $undo_extended_ci{$seqno} = 1;
23467 } ## end sub undo_ci
23470 { ## begin closure set_logical_padding
23475 my @q = qw( + - * / );
23476 @is_math_op{@q} = (1) x scalar(@q);
23479 sub set_logical_padding {
23481 # Look at a batch of lines and see if extra padding can improve the
23482 # alignment when there are certain leading operators. Here is an
23483 # example, in which some extra space is introduced before
23484 # '( $year' to make it line up with the subsequent lines:
23486 # if ( ( $Year < 1601 )
23487 # || ( $Year > 2899 )
23488 # || ( $EndYear < 1601 )
23489 # || ( $EndYear > 2899 ) )
23491 # &Error_OutOfRange;
23494 my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
23496 my $max_line = @{$ri_first} - 1;
23498 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
23499 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
23501 # Patch to produce padding in the first line of short code blocks.
23502 # This is part of an update to fix cases b562 .. b983.
23503 # This is needed to compensate for a change which was made in 'sub
23504 # starting_one_line_block' to prevent blinkers. Previously, that sub
23505 # would not look at the total block size and rely on sub
23506 # break_long_lines to break up long blocks. Consequently, the
23507 # first line of those batches would end in the opening block brace of a
23508 # sort/map/grep/eval block. When this was changed to immediately check
23509 # for blocks which were too long, the opening block brace would go out
23510 # in a single batch, and the block contents would go out as the next
23511 # batch. This caused the logic in this routine which decides if the
23512 # first line should be padded to be incorrect. To fix this, we set a
23513 # flag if the previous batch ended in an opening sort/map/grep/eval
23514 # block brace, and use it to adjust the logic to compensate.
23516 # For example, the following would have previously been a single batch
23517 # but now is two batches. We want to pad the line starting in '$dir':
23518 # my (@indices) = # batch n-1 (prev batch n)
23519 # sort { # batch n-1 (prev batch n)
23520 # $dir eq 'left' # batch n
23521 # ? $cells[$a] <=> $cells[$b] # batch n
23522 # : $cells[$b] <=> $cells[$a]; # batch n
23523 # } ( 0 .. $#cells ); # batch n
23525 my $rLL = $self->[_rLL_];
23526 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
23528 my $is_short_block;
23529 if ( $K_to_go[0] > 0 ) {
23530 my $Kp = $K_to_go[0] - 1;
23531 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23534 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
23536 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23540 my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
23542 my $block_type = $rblock_type_of_seqno->{$seqno};
23544 $is_short_block = $is_sort_map_grep_eval{$block_type};
23545 $is_short_block ||= $want_one_line_block{$block_type};
23550 # looking at each line of this batch..
23551 foreach my $line ( 0 .. $max_line - 1 ) {
23553 # see if the next line begins with a logical operator
23554 $ibeg = $ri_first->[$line];
23555 $iend = $ri_last->[$line];
23556 $ibeg_next = $ri_first->[ $line + 1 ];
23557 $tok_next = $tokens_to_go[$ibeg_next];
23558 $type_next = $types_to_go[$ibeg_next];
23560 $has_leading_op_next = ( $tok_next =~ /^\w/ )
23561 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
23562 : $is_chain_operator{$type_next}; # and, or
23564 next unless ($has_leading_op_next);
23566 # next line must not be at lesser depth
23568 if ( $nesting_depth_to_go[$ibeg] >
23569 $nesting_depth_to_go[$ibeg_next] );
23571 # identify the token in this line to be padded on the left
23574 # handle lines at same depth...
23575 if ( $nesting_depth_to_go[$ibeg] ==
23576 $nesting_depth_to_go[$ibeg_next] )
23579 # if this is not first line of the batch ...
23582 # and we have leading operator..
23583 next if $has_leading_op;
23585 # Introduce padding if..
23586 # 1. the previous line is at lesser depth, or
23587 # 2. the previous line ends in an assignment
23588 # 3. the previous line ends in a 'return'
23589 # 4. the previous line ends in a comma
23590 # Example 1: previous line at lesser depth
23591 # if ( ( $Year < 1601 ) # <- we are here but
23592 # || ( $Year > 2899 ) # list has not yet
23593 # || ( $EndYear < 1601 ) # collapsed vertically
23594 # || ( $EndYear > 2899 ) )
23597 # Example 2: previous line ending in assignment:
23599 # $year % 4 ? 0 # <- We are here
23600 # : $year % 100 ? 1
23601 # : $year % 400 ? 0
23604 # Example 3: previous line ending in comma:
23611 # be sure levels agree (never indent after an indented 'if')
23613 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
23615 # allow padding on first line after a comma but only if:
23616 # (1) this is line 2 and
23617 # (2) there are at more than three lines and
23618 # (3) lines 3 and 4 have the same leading operator
23619 # These rules try to prevent padding within a long
23620 # comma-separated list.
23622 if ( $types_to_go[$iendm] eq ','
23626 my $ibeg_next_next = $ri_first->[ $line + 2 ];
23627 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
23628 $ok_comma = $tok_next_next eq $tok_next;
23633 $is_assignment{ $types_to_go[$iendm] }
23635 || ( $nesting_depth_to_go[$ibegm] <
23636 $nesting_depth_to_go[$ibeg] )
23637 || ( $types_to_go[$iendm] eq 'k'
23638 && $tokens_to_go[$iendm] eq 'return' )
23641 # we will add padding before the first token
23645 # for first line of the batch..
23648 # WARNING: Never indent if first line is starting in a
23649 # continued quote, which would change the quote.
23650 next if $starting_in_quote;
23652 # if this is text after closing '}'
23653 # then look for an interior token to pad
23654 if ( $types_to_go[$ibeg] eq '}' ) {
23658 # otherwise, we might pad if it looks really good
23659 elsif ($is_short_block) {
23664 # we might pad token $ibeg, so be sure that it
23665 # is at the same depth as the next line.
23667 if ( $nesting_depth_to_go[$ibeg] !=
23668 $nesting_depth_to_go[$ibeg_next] );
23670 # We can pad on line 1 of a statement if at least 3
23671 # lines will be aligned. Otherwise, it
23672 # can look very confusing.
23674 # We have to be careful not to pad if there are too few
23675 # lines. The current rule is:
23676 # (1) in general we require at least 3 consecutive lines
23677 # with the same leading chain operator token,
23678 # (2) but an exception is that we only require two lines
23679 # with leading colons if there are no more lines. For example,
23680 # the first $i in the following snippet would get padding
23681 # by the second rule:
23683 # $i == 1 ? ( "First", "Color" )
23684 # : $i == 2 ? ( "Then", "Rarity" )
23685 # : ( "Then", "Name" );
23687 if ( $max_line > 1 ) {
23688 my $leading_token = $tokens_to_go[$ibeg_next];
23691 # never indent line 1 of a '.' series because
23692 # previous line is most likely at same level.
23693 # TODO: we should also look at the leading_spaces
23694 # of the last output line and skip if it is same
23696 next if ( $leading_token eq '.' );
23699 foreach my $l ( 2 .. 3 ) {
23700 last if ( $line + $l > $max_line );
23701 my $ibeg_next_next = $ri_first->[ $line + $l ];
23702 if ( $tokens_to_go[$ibeg_next_next] ne
23705 $tokens_differ = 1;
23710 next if ($tokens_differ);
23711 next if ( $count < 3 && $leading_token ne ':' );
23721 # find interior token to pad if necessary
23722 if ( !defined($ipad) ) {
23724 foreach my $i ( $ibeg .. $iend - 1 ) {
23726 # find any unclosed container
23728 unless ( $type_sequence_to_go[$i]
23729 && $mate_index_to_go[$i] > $iend );
23731 # find next nonblank token to pad
23732 $ipad = $inext_to_go[$i];
23735 last if ( !$ipad || $ipad > $iend );
23738 # We cannot pad the first leading token of a file because
23739 # it could cause a bug in which the starting indentation
23740 # level is guessed incorrectly each time the code is run
23741 # though perltidy, thus causing the code to march off to
23742 # the right. For example, the following snippet would have
23745 ## ov_method mycan( $package, '(""' ), $package
23746 ## or ov_method mycan( $package, '(0+' ), $package
23747 ## or ov_method mycan( $package, '(bool' ), $package
23748 ## or ov_method mycan( $package, '(nomethod' ), $package;
23750 # If this snippet is within a block this won't happen
23751 # unless the user just processes the snippet alone within
23752 # an editor. In that case either the user will see and
23753 # fix the problem or it will be corrected next time the
23754 # entire file is processed with perltidy.
23755 next if ( $ipad == 0 && $peak_batch_size <= 1 );
23757 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
23758 ## IT DID MORE HARM THAN GOOD
23760 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
23763 ## # do not put leading padding for just 2 lines of math
23764 ## if ( $ipad == $ibeg
23766 ## && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
23767 ## && $is_math_op{$type_next}
23768 ## && $line + 2 <= $max_line )
23770 ## my $ibeg_next_next = $ri_first->[ $line + 2 ];
23771 ## my $type_next_next = $types_to_go[$ibeg_next_next];
23772 ## next if !$is_math_op{$type_next_next};
23775 # next line must not be at greater depth
23776 my $iend_next = $ri_last->[ $line + 1 ];
23778 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
23779 $nesting_depth_to_go[$ipad] );
23781 # lines must be somewhat similar to be padded..
23782 my $inext_next = $inext_to_go[$ibeg_next];
23783 my $type = $types_to_go[$ipad];
23785 # see if there are multiple continuation lines
23786 my $logical_continuation_lines = 1;
23787 if ( $line + 2 <= $max_line ) {
23788 my $leading_token = $tokens_to_go[$ibeg_next];
23789 my $ibeg_next_next = $ri_first->[ $line + 2 ];
23790 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
23791 && $nesting_depth_to_go[$ibeg_next] eq
23792 $nesting_depth_to_go[$ibeg_next_next] )
23794 $logical_continuation_lines++;
23798 # see if leading types match
23799 my $types_match = $types_to_go[$inext_next] eq $type;
23800 my $matches_without_bang;
23802 # if first line has leading ! then compare the following token
23803 if ( !$types_match && $type eq '!' ) {
23804 $types_match = $matches_without_bang =
23805 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
23809 # either we have multiple continuation lines to follow
23810 # and we are not padding the first token
23812 $logical_continuation_lines > 1
23813 && ( $ipad > 0 || $is_short_block )
23822 # and keywords must match if keyword
23825 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
23831 #----------------------begin special checks--------------
23834 # A check is needed before we can make the pad.
23835 # If we are in a list with some long items, we want each
23836 # item to stand out. So in the following example, the
23837 # first line beginning with '$casefold->' would look good
23838 # padded to align with the next line, but then it
23839 # would be indented more than the last line, so we
23843 # $casefold->{code} eq '0041'
23844 # && $casefold->{status} eq 'C'
23845 # && $casefold->{mapping} eq '0061',
23850 # It would be faster, and almost as good, to use a comma
23851 # count, and not pad if comma_count > 1 and the previous
23852 # line did not end with a comma.
23856 my $ibg = $ri_first->[ $line + 1 ];
23857 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
23859 # just use simplified formula for leading spaces to avoid
23860 # needless sub calls
23861 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
23863 # look at each line beyond the next ..
23865 foreach my $ltest ( $line + 2 .. $max_line ) {
23867 my $ibeg_t = $ri_first->[$l];
23869 # quit looking at the end of this container
23871 if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth )
23872 || ( $nesting_depth_to_go[$ibeg_t] < $depth );
23874 # cannot do the pad if a later line would be
23876 if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] <
23884 # don't pad if we end in a broken list
23885 if ( $l == $max_line ) {
23886 my $i2 = $ri_last->[$l];
23887 if ( $types_to_go[$i2] eq '#' ) {
23888 my $i1 = $ri_first->[$l];
23889 next if terminal_type_i( $i1, $i2 ) eq ',';
23894 # a minus may introduce a quoted variable, and we will
23895 # add the pad only if this line begins with a bare word,
23896 # such as for the word 'Button' here:
23898 # Button => "Print letter \"~$_\"",
23899 # -command => [ sub { print "$_[0]\n" }, $_ ],
23900 # -accelerator => "Meta+$_"
23903 # On the other hand, if 'Button' is quoted, it looks best
23906 # 'Button' => "Print letter \"~$_\"",
23907 # -command => [ sub { print "$_[0]\n" }, $_ ],
23908 # -accelerator => "Meta+$_"
23910 if ( $types_to_go[$ibeg_next] eq 'm' ) {
23911 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
23914 next unless $ok_to_pad;
23916 #----------------------end special check---------------
23918 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
23919 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
23920 $pad_spaces = $length_2 - $length_1;
23922 # If the first line has a leading ! and the second does
23923 # not, then remove one space to try to align the next
23924 # leading characters, which are often the same. For example:
23926 # || $ts == $self->Holder
23927 # || $self->Holder->Type eq "Arena" )
23929 # This usually helps readability, but if there are subsequent
23930 # ! operators things will still get messed up. For example:
23932 # if ( !exists $Net::DNS::typesbyname{$qtype}
23933 # && exists $Net::DNS::classesbyname{$qtype}
23934 # && !exists $Net::DNS::classesbyname{$qclass}
23935 # && exists $Net::DNS::typesbyname{$qclass} )
23936 # We can't fix that.
23937 if ($matches_without_bang) { $pad_spaces-- }
23939 # make sure this won't change if -lp is used
23940 my $indentation_1 = $leading_spaces_to_go[$ibeg];
23941 if ( ref($indentation_1)
23942 && $indentation_1->get_recoverable_spaces() == 0 )
23944 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
23945 if ( ref($indentation_2)
23946 && $indentation_2->get_recoverable_spaces() != 0 )
23952 # we might be able to handle a pad of -1 by removing a blank
23954 if ( $pad_spaces < 0 ) {
23956 # Deactivated for -kpit due to conflict. This block deletes
23957 # a space in an attempt to improve alignment in some cases,
23958 # but it may conflict with user spacing requests. For now
23959 # it is just deactivated if the -kpit option is used.
23960 if ( $pad_spaces == -1 ) {
23962 && $types_to_go[ $ipad - 1 ] eq 'b'
23963 && !%keyword_paren_inner_tightness )
23965 $self->pad_token( $ipad - 1, $pad_spaces );
23971 # now apply any padding for alignment
23972 if ( $ipad >= 0 && $pad_spaces ) {
23974 my $length_t = total_line_length( $ibeg, $iend );
23975 if ( $pad_spaces + $length_t <=
23976 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
23978 $self->pad_token( $ipad, $pad_spaces );
23986 $has_leading_op = $has_leading_op_next;
23987 } ## end of loop over lines
23989 } ## end sub set_logical_padding
23990 } ## end closure set_logical_padding
23994 # insert $pad_spaces before token number $ipad
23995 my ( $self, $ipad, $pad_spaces ) = @_;
23996 my $rLL = $self->[_rLL_];
23997 my $KK = $K_to_go[$ipad];
23998 my $tok = $rLL->[$KK]->[_TOKEN_];
23999 my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
24001 if ( $pad_spaces > 0 ) {
24002 $tok = SPACE x $pad_spaces . $tok;
24003 $tok_len += $pad_spaces;
24005 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
24006 $tok = EMPTY_STRING;
24015 $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
24016 $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
24018 $token_lengths_to_go[$ipad] += $pad_spaces;
24019 $tokens_to_go[$ipad] = $tok;
24021 foreach my $i ( $ipad .. $max_index_to_go ) {
24022 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
24025 } ## end sub pad_token
24027 { ## begin closure make_alignment_patterns
24032 my %is_my_local_our;
24035 my %is_binary_type;
24036 my %is_binary_keyword;
24041 # Note: %block_type_map is now global to enable the -gal=s option
24043 # map certain keywords to the same 'if' class to align
24044 # long if/elsif sequences. [elsif.pl]
24050 'default' => 'given',
24051 'case' => 'switch',
24053 # treat an 'undef' similar to numbers and quotes
24057 # map certain operators to the same class for pattern matching
24072 # leading keywords which to skip for efficiency when making parenless
24074 my @q = qw( my local our return );
24075 @{is_my_local_our}{@q} = (1) x scalar(@q);
24077 # leading keywords where we should just join one token to form
24080 @{is_use_like}{@q} = (1) x scalar(@q);
24082 # leading token types which may be used to make a container name
24084 @{is_kwU}{@q} = (1) x scalar(@q);
24086 # token types which prevent using leading word as a container name
24088 x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
24089 &= // >> ~. &. |. ^.
24090 **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
24093 @{is_binary_type}{@q} = (1) x scalar(@q);
24095 # token keywords which prevent using leading word as a container name
24096 @_ = qw(and or err eq ne cmp);
24097 @is_binary_keyword{@_} = (1) x scalar(@_);
24099 # Some common function calls whose args can be aligned. These do not
24100 # give good alignments if the lengths differ significantly.
24102 'unlike' => 'like',
24104 ##'is_deeply' => 'is', # poor; names lengths too different
24109 sub make_alignment_patterns {
24111 # Here we do some important preliminary work for the
24112 # vertical aligner. We create four arrays for one
24113 # output line. These arrays contain strings that can
24114 # be tested by the vertical aligner to see if
24115 # consecutive lines can be aligned vertically.
24117 # The four arrays are indexed on the vertical
24118 # alignment fields and are:
24119 # @tokens - a list of any vertical alignment tokens for this line.
24120 # These are tokens, such as '=' '&&' '#' etc which
24121 # we want to might align vertically. These are
24122 # decorated with various information such as
24123 # nesting depth to prevent unwanted vertical
24124 # alignment matches.
24125 # @fields - the actual text of the line between the vertical alignment
24127 # @patterns - a modified list of token types, one for each alignment
24128 # field. These should normally each match before alignment is
24129 # allowed, even when the alignment tokens match.
24130 # @field_lengths - the display width of each field
24132 my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
24136 # The var $ralignment_hash contains all of the alignments for this
24137 # line. It is not yet used but is available for future coding in case
24138 # there is a need to do a preliminary scan of the alignment tokens.
24141 if ( defined($ralignment_hash) ) {
24142 $new_count = keys %{$ralignment_hash};
24144 my $old_count = $alignment_count;
24145 $old_count = 0 unless ($old_count);
24146 if ( $new_count != $old_count ) {
24147 my $K = $K_to_go[$ibeg];
24148 my $rLL = $self->[_rLL_];
24149 my $lnl = $rLL->[$K]->[_LINE_INDEX_];
24151 "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
24156 # -------------------------------------
24157 # Shortcut for lines without alignments
24158 # -------------------------------------
24159 if ( !$alignment_count ) {
24161 my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
24162 $summed_lengths_to_go[$ibeg] ];
24165 if ( $ibeg == $iend ) {
24166 $rfields = [ $tokens_to_go[$ibeg] ];
24167 $rpatterns = [ $types_to_go[$ibeg] ];
24171 [ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ];
24173 [ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ];
24175 return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
24178 my $i_start = $ibeg;
24180 my %container_name = ( 0 => EMPTY_STRING );
24185 my @field_lengths = ();
24187 #-------------------------------------------------------------
24188 # Make a container name for any uncontained commas, issue c089
24189 #-------------------------------------------------------------
24190 # This is a generalization of the fix for rt136416 which was a
24191 # specialized patch just for 'use Module' statements.
24192 # We restrict this to semicolon-terminated statements; that way
24193 # we know that the top level commas are not in a list container.
24194 if ( $ibeg == 0 && $iend == $max_index_to_go ) {
24195 my $iterm = $max_index_to_go;
24196 if ( $types_to_go[$iterm] eq '#' ) {
24197 $iterm = $iprev_to_go[$iterm];
24200 # Alignment lines ending like '=> sub {'; fixes issue c093
24201 my $term_type_ok = $types_to_go[$iterm] eq ';';
24203 $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
24205 if ( $iterm > $ibeg
24207 && !$is_my_local_our{ $tokens_to_go[$ibeg] }
24208 && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
24211 # Make a container name by combining all leading barewords,
24212 # keywords and functions.
24213 my $name = EMPTY_STRING;
24218 for ( $ibeg .. $iterm ) {
24219 my $type = $types_to_go[$_];
24221 if ( $type eq 'b' ) {
24226 my $token = $tokens_to_go[$_];
24228 # Give up if we find an opening paren, binary operator or
24229 # comma within or after the proposed container name.
24231 || $is_binary_type{$type}
24232 || $type eq 'k' && $is_binary_keyword{$token} )
24234 $name = EMPTY_STRING;
24238 # The container name is only built of certain types:
24239 last if ( !$is_kwU{$type} );
24241 # Normally it is made of one word, but two words for 'use'
24242 if ( $count == 0 ) {
24244 && $is_use_like{ $tokens_to_go[$_] } )
24252 elsif ( defined($count_max) && $count >= $count_max ) {
24256 if ( defined( $name_map{$token} ) ) {
24257 $token = $name_map{$token};
24260 $name .= SPACE . $token;
24265 # Require a space after the container name token(s)
24267 && defined($ilast_blank)
24268 && $ilast_blank > $iname_end )
24270 $name = substr( $name, 1 );
24271 $container_name{'0'} = $name;
24276 # --------------------
24277 # Loop over all tokens
24278 # --------------------
24279 my $j = 0; # field index
24281 $patterns[0] = EMPTY_STRING;
24283 for my $i ( $ibeg .. $iend ) {
24285 # Keep track of containers balanced on this line only.
24286 # These are used below to prevent unwanted cross-line alignments.
24287 # Unbalanced containers already avoid aligning across
24288 # container boundaries.
24290 my $type = $types_to_go[$i];
24291 my $token = $tokens_to_go[$i];
24292 my $depth_last = $depth;
24293 if ( $type_sequence_to_go[$i] ) {
24294 if ( $is_opening_token{$token} ) {
24296 # if container is balanced on this line...
24297 my $i_mate = $mate_index_to_go[$i];
24298 if ( $i_mate > $i && $i_mate <= $iend ) {
24301 # Append the previous token name to make the container name
24302 # more unique. This name will also be given to any commas
24303 # within this container, and it helps avoid undesirable
24304 # alignments of different types of containers.
24306 # Containers beginning with { and [ are given those names
24307 # for uniqueness. That way commas in different containers
24308 # will not match. Here is an example of what this prevents:
24309 # a => [ 1, 2, 3 ],
24310 # b => { b1 => 4, b2 => 5 },
24311 # Here is another example of what we avoid by labeling the
24314 # is_d( [ $a, $a ], [ $b, $c ] );
24315 # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
24316 # is_d( [ \$a, \$a ], [ \$b, \$c ] );
24319 if ( $token eq '(' ) {
24320 $name = $self->make_paren_name($i);
24323 # name cannot be '.', so change to something else if so
24324 if ( $name eq '.' ) { $name = 'dot' }
24326 $container_name{$depth} = "+" . $name;
24328 # Make the container name even more unique if necessary.
24329 # If we are not vertically aligning this opening paren,
24330 # append a character count to avoid bad alignment since
24331 # it usually looks bad to align commas within containers
24332 # for which the opening parens do not align. Here
24333 # is an example very BAD alignment of commas (because
24334 # the atan2 functions are not all aligned):
24336 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
24337 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
24338 # $X * atan2( $X, 1 ) -
24339 # $Y * atan2( $Y, 1 );
24341 # On the other hand, it is usually okay to align commas
24342 # if opening parens align, such as:
24343 # glVertex3d( $cx + $s * $xs, $cy, $z );
24344 # glVertex3d( $cx, $cy + $s * $ys, $z );
24345 # glVertex3d( $cx - $s * $xs, $cy, $z );
24346 # glVertex3d( $cx, $cy - $s * $ys, $z );
24348 # To distinguish between these situations, we append
24349 # the length of the line from the previous matching
24350 # token, or beginning of line, to the function name.
24351 # This will allow the vertical aligner to reject
24352 # undesirable matches.
24354 # if we are not aligning on this paren...
24355 if ( !$ralignment_type_to_go->[$i] ) {
24357 # Sum length from previous alignment
24358 my $len = token_sequence_length( $i_start, $i - 1 );
24360 # Minor patch: do not include the length of any '!'.
24361 # Otherwise, commas in the following line will not
24363 # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
24364 # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
24365 if ( grep { $_ eq '!' }
24366 @types_to_go[ $i_start .. $i - 1 ] )
24371 if ( $i_start == $ibeg ) {
24373 # For first token, use distance from start of
24374 # line but subtract off the indentation due to
24375 # level. Otherwise, results could vary with
24378 leading_spaces_to_go($ibeg) -
24379 $levels_to_go[$i_start] *
24380 $rOpts_indent_columns;
24381 if ( $len < 0 ) { $len = 0 }
24384 # tack this length onto the container name to try
24385 # to make a unique token name
24386 $container_name{$depth} .= "-" . $len;
24387 } ## end if ( !$ralignment_type_to_go...)
24388 } ## end if ( $i_mate > $i && $i_mate...)
24389 } ## end if ( $is_opening_token...)
24391 elsif ( $is_closing_type{$token} ) {
24392 $depth-- if $depth > 0;
24394 } ## end if ( $type_sequence_to_go...)
24396 # if we find a new synchronization token, we are done with
24398 if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
24400 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
24402 # map similar items
24403 my $tok_map = $operator_map{$tok};
24404 $tok = $tok_map if ($tok_map);
24406 # make separators in different nesting depths unique
24407 # by appending the nesting depth digit.
24408 if ( $raw_tok ne '#' ) {
24409 $tok .= "$nesting_depth_to_go[$i]";
24412 # also decorate commas with any container name to avoid
24413 # unwanted cross-line alignments.
24414 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
24416 # If we are at an opening token which increased depth, we have
24417 # to use the name from the previous depth.
24419 ( $depth_last < $depth ? $depth_last : $depth );
24420 if ( $container_name{$depth_p} ) {
24421 $tok .= $container_name{$depth_p};
24425 # Patch to avoid aligning leading and trailing if, unless.
24426 # Mark trailing if, unless statements with container names.
24427 # This makes them different from leading if, unless which
24428 # are not so marked at present. If we ever need to name
24429 # them too, we could use ci to distinguish them.
24430 # Example problem to avoid:
24431 # return ( 2, "DBERROR" )
24432 # if ( $retval == 2 );
24433 # if ( scalar @_ ) {
24434 # my ( $a, $b, $c, $d, $e, $f ) = @_;
24436 if ( $raw_tok eq '(' ) {
24437 if ( $ci_levels_to_go[$ibeg]
24438 && $container_name{$depth} =~ /^\+(if|unless)/ )
24440 $tok .= $container_name{$depth};
24444 # Decorate block braces with block types to avoid
24445 # unwanted alignments such as the following:
24446 # foreach ( @{$routput_array} ) { $fh->print($_) }
24447 # eval { $fh->close() };
24448 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
24449 my $block_type = $block_type_to_go[$i];
24451 # map certain related block types to allow
24452 # else blocks to align
24453 $block_type = $block_type_map{$block_type}
24454 if ( defined( $block_type_map{$block_type} ) );
24456 # remove sub names to allow one-line sub braces to align
24457 # regardless of name
24458 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
24460 # allow all control-type blocks to align
24461 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
24463 $tok .= $block_type;
24466 # Mark multiple copies of certain tokens with the copy number
24467 # This will allow the aligner to decide if they are matched.
24468 # For now, only do this for equals. For example, the two
24469 # equals on the next line will be labeled '=0' and '=0.2'.
24470 # Later, the '=0.2' will be ignored in alignment because it
24473 # $| = $debug = 1 if $opt_d;
24474 # $full_index = 1 if $opt_i;
24476 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
24477 $token_count{$tok}++;
24478 if ( $token_count{$tok} > 1 ) {
24479 $tok .= '.' . $token_count{$tok};
24483 # concatenate the text of the consecutive tokens to form
24486 join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) );
24488 push @field_lengths,
24489 $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
24491 # store the alignment token for this field
24492 push( @tokens, $tok );
24494 # get ready for the next batch
24497 $patterns[$j] = EMPTY_STRING;
24498 } ## end if ( new synchronization token
24500 # continue accumulating tokens
24502 # for keywords we have to use the actual text
24503 if ( $type eq 'k' ) {
24505 my $tok_fix = $tokens_to_go[$i];
24507 # but map certain keywords to a common string to allow
24509 $tok_fix = $keyword_map{$tok_fix}
24510 if ( defined( $keyword_map{$tok_fix} ) );
24511 $patterns[$j] .= $tok_fix;
24514 elsif ( $type eq 'b' ) {
24515 $patterns[$j] .= $type;
24518 # Mark most things before arrows as a quote to
24519 # get them to line up. Testfile: mixed.pl.
24521 # handle $type =~ /^[wnC]$/
24522 elsif ( $is_w_n_C{$type} ) {
24524 my $type_fix = $type;
24526 if ( $i < $iend - 1 ) {
24527 my $next_type = $types_to_go[ $i + 1 ];
24528 my $i_next_nonblank =
24529 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
24531 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
24534 # Patch to ignore leading minus before words,
24535 # by changing pattern 'mQ' into just 'Q',
24536 # so that we can align things like this:
24537 # Button => "Print letter \"~$_\"",
24538 # -command => [ sub { print "$_[0]\n" }, $_ ],
24539 if ( $patterns[$j] eq 'm' ) {
24540 $patterns[$j] = EMPTY_STRING;
24545 # Convert a bareword within braces into a quote for
24546 # matching. This will allow alignment of expressions like
24548 # local ( $SIG{'INT'} ) = IGNORE;
24549 # local ( $SIG{ALRM} ) = 'POSTMAN';
24553 && $types_to_go[ $i - 1 ] eq 'L'
24554 && $types_to_go[ $i + 1 ] eq 'R' )
24559 # patch to make numbers and quotes align
24560 if ( $type eq 'n' ) { $type_fix = 'Q' }
24562 $patterns[$j] .= $type_fix;
24563 } ## end elsif ( $is_w_n_C{$type} )
24565 # ignore any ! in patterns
24566 elsif ( $type eq '!' ) { }
24570 $patterns[$j] .= $type;
24573 # remove any zero-level name at first fat comma
24574 if ( $depth == 0 && $type eq '=>' ) {
24575 $container_name{$depth} = EMPTY_STRING;
24577 } ## end for my $i ( $ibeg .. $iend)
24579 # done with this line .. join text of tokens to make the last field
24581 join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
24582 push @field_lengths,
24583 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
24585 return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
24586 } ## end sub make_alignment_patterns
24588 } ## end closure make_alignment_patterns
24590 sub make_paren_name {
24591 my ( $self, $i ) = @_;
24593 # The token at index $i is a '('.
24594 # Create an alignment name for it to avoid incorrect alignments.
24596 # Start with the name of the previous nonblank token...
24597 my $name = EMPTY_STRING;
24599 return EMPTY_STRING if ( $im < 0 );
24600 if ( $types_to_go[$im] eq 'b' ) { $im--; }
24601 return EMPTY_STRING if ( $im < 0 );
24602 $name = $tokens_to_go[$im];
24604 # Prepend any sub name to an isolated -> to avoid unwanted alignments
24605 # [test case is test8/penco.pl]
24606 if ( $name eq '->' ) {
24608 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
24609 $name = $tokens_to_go[$im] . $name;
24613 # Finally, remove any leading arrows
24614 if ( substr( $name, 0, 2 ) eq '->' ) {
24615 $name = substr( $name, 2 );
24618 } ## end sub make_paren_name
24620 { ## begin closure final_indentation_adjustment
24622 my ( $last_indentation_written, $last_unadjusted_indentation,
24623 $last_leading_token );
24625 sub initialize_final_indentation_adjustment {
24626 $last_indentation_written = 0;
24627 $last_unadjusted_indentation = 0;
24628 $last_leading_token = EMPTY_STRING;
24632 sub final_indentation_adjustment {
24634 #--------------------------------------------------------------------
24635 # This routine sets the final indentation of a line in the Formatter.
24636 #--------------------------------------------------------------------
24638 # It starts with the basic indentation which has been defined for the
24639 # leading token, and then takes into account any options that the user
24640 # has set regarding special indenting and outdenting.
24642 # This routine has to resolve a number of complex interacting issues,
24644 # 1. The various -cti=n type flags, which contain the desired change in
24645 # indentation for lines ending in commas and semicolons, should be
24647 # 2. qw quotes require special processing and do not fit perfectly
24648 # with normal containers,
24649 # 3. formatting with -wn can complicate things, especially with qw
24651 # 4. formatting with the -lp option is complicated, and does not
24652 # work well with qw quotes and with -wn formatting.
24653 # 5. a number of special situations, such as 'cuddled' formatting.
24654 # 6. This routine is mainly concerned with outdenting closing tokens
24655 # but note that there is some overlap with the functions of sub
24656 # undo_ci, which was processed earlier, so care has to be taken to
24657 # keep them coordinated.
24662 $rpatterns, $ri_first,
24663 $ri_last, $rindentation_list,
24664 $level_jump, $starting_in_quote,
24665 $is_static_block_comment,
24668 my $rLL = $self->[_rLL_];
24669 my $Klimit = $self->[_Klimit_];
24670 my $ris_bli_container = $self->[_ris_bli_container_];
24671 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
24672 my $rwant_reduced_ci = $self->[_rwant_reduced_ci_];
24673 my $rK_weld_left = $self->[_rK_weld_left_];
24675 # Find the last code token of this line
24676 my $i_terminal = $iend;
24677 my $terminal_type = $types_to_go[$iend];
24678 if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
24680 $terminal_type = $types_to_go[$i_terminal];
24681 if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
24683 $terminal_type = $types_to_go[$i_terminal];
24687 my $terminal_block_type = $block_type_to_go[$i_terminal];
24688 my $is_outdented_line = 0;
24690 my $type_beg = $types_to_go[$ibeg];
24691 my $token_beg = $tokens_to_go[$ibeg];
24692 my $block_type_beg = $block_type_to_go[$ibeg];
24693 my $level_beg = $levels_to_go[$ibeg];
24694 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
24695 my $K_beg = $K_to_go[$ibeg];
24696 my $seqno_beg = $type_sequence_to_go[$ibeg];
24697 my $ibeg_weld_fix = $ibeg;
24698 my $is_closing_type_beg = $is_closing_type{$type_beg};
24699 my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
24701 # QW INDENTATION PATCH 3:
24702 my $seqno_qw_closing;
24703 if ( $type_beg eq 'q' && $ibeg == 0 ) {
24704 my $KK = $K_to_go[$ibeg];
24705 $seqno_qw_closing =
24706 $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
24709 my $is_semicolon_terminated = $terminal_type eq ';'
24710 && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
24711 || $seqno_qw_closing );
24713 # NOTE: A future improvement would be to make it semicolon terminated
24714 # even if it does not have a semicolon but is followed by a closing
24715 # block brace. This would undo ci even for something like the
24716 # following, in which the final paren does not have a semicolon because
24717 # it is a possible weld location:
24719 # if ($BOLD_MATH) {
24721 # $labels, $comment,
24722 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
24727 # MOJO: Set a flag if this lines begins with ')->'
24728 my $leading_paren_arrow = (
24729 $is_closing_type_beg
24730 && $token_beg eq ')'
24732 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
24733 || ( $ibeg < $i_terminal - 1
24734 && $types_to_go[ $ibeg + 1 ] eq 'b'
24735 && $types_to_go[ $ibeg + 2 ] eq '->' )
24739 #---------------------------------------------------------
24740 # Section 1: set a flag and a default indentation
24742 # Most lines are indented according to the initial token.
24743 # But it is common to outdent to the level just after the
24744 # terminal token in certain cases...
24745 # adjust_indentation flag:
24746 # 0 - do not adjust
24748 # 2 - vertically align with opening token
24750 #---------------------------------------------------------
24751 my $adjust_indentation = 0;
24752 my $default_adjust_indentation = $adjust_indentation;
24755 $opening_indentation, $opening_offset,
24756 $is_leading, $opening_exists
24759 # Honor any flag to reduce -ci set by the -bbxi=n option
24760 if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
24762 # if this is an opening, it must be alone on the line ...
24763 if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
24764 $adjust_indentation = 1;
24767 # ... or a single welded unit (fix for b1173)
24768 elsif ($total_weld_count) {
24769 my $Kterm = $K_to_go[$i_terminal];
24770 my $Kterm_test = $rK_weld_left->{$Kterm};
24771 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
24772 $Kterm = $Kterm_test;
24774 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
24778 # Update the $is_bli flag as we go. It is initially 1.
24779 # We note seeing a leading opening brace by setting it to 2.
24780 # If we get to the closing brace without seeing the opening then we
24781 # turn it off. This occurs if the opening brace did not get output
24782 # at the start of a line, so we will then indent the closing brace
24783 # in the default way.
24784 if ( $is_bli_beg && $is_bli_beg == 1 ) {
24785 my $K_opening_container = $self->[_K_opening_container_];
24786 my $K_opening = $K_opening_container->{$seqno_beg};
24787 if ( $K_beg eq $K_opening ) {
24788 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
24790 else { $is_bli_beg = 0 }
24793 # QW PATCH for the combination -lp -wn
24794 # For -lp formatting use $ibeg_weld_fix to get around the problem
24795 # that with -lp type formatting the opening and closing tokens to not
24796 # have sequence numbers.
24797 if ( $seqno_qw_closing && $total_weld_count ) {
24798 my $i_plus = $inext_to_go[$ibeg];
24799 if ( $i_plus <= $max_index_to_go ) {
24800 my $K_plus = $K_to_go[$i_plus];
24801 if ( defined( $rK_weld_left->{$K_plus} ) ) {
24802 $ibeg_weld_fix = $i_plus;
24807 # if we are at a closing token of some type..
24808 if ( $is_closing_type_beg || $seqno_qw_closing ) {
24810 # get the indentation of the line containing the corresponding
24813 $opening_indentation, $opening_offset,
24814 $is_leading, $opening_exists
24816 = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
24817 $ri_last, $rindentation_list, $seqno_qw_closing );
24819 my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
24821 # First set the default behavior:
24824 # default behavior is to outdent closing lines
24825 # of the form: "); }; ]; )->xxx;"
24826 $is_semicolon_terminated
24828 # and 'cuddled parens' of the form: ")->pack("
24829 # Bug fix for RT #123749]: the types here were
24830 # incorrectly '(' and ')'. Corrected to be '{' and '}'
24832 $terminal_type eq '{'
24833 && $type_beg eq '}'
24834 && ( $nesting_depth_to_go[$iend] + 1 ==
24835 $nesting_depth_to_go[$ibeg] )
24838 # remove continuation indentation for any line like
24840 # or without ending '{' and unbalanced, such as
24841 # such as '}->{$operator}'
24845 && ( $types_to_go[$iend] eq '{'
24846 || $levels_to_go[$iend] < $level_beg )
24849 # and when the next line is at a lower indentation level...
24851 # PATCH #1: and only if the style allows undoing continuation
24852 # for all closing token types. We should really wait until
24853 # the indentation of the next line is known and then make
24854 # a decision, but that would require another pass.
24856 # PATCH #2: and not if this token is under -xci control
24857 || ( $level_jump < 0
24858 && !$some_closing_token_indentation
24859 && !$rseqno_controlling_my_ci->{$K_beg} )
24861 # Patch for -wn=2, multiple welded closing tokens
24862 || ( $i_terminal > $ibeg
24863 && $is_closing_type{ $types_to_go[$iend] } )
24865 # Alternate Patch for git #51, isolated closing qw token not
24866 # outdented if no-delete-old-newlines is set. This works, but
24867 # a more general patch elsewhere fixes the real problem: ljump.
24868 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
24872 $adjust_indentation = 1;
24875 # outdent something like '),'
24877 $terminal_type eq ','
24879 # Removed this constraint for -wn
24880 # OLD: allow just one character before the comma
24881 # && $i_terminal == $ibeg + 1
24883 # require LIST environment; otherwise, we may outdent too much -
24884 # this can happen in calls without parentheses (overload.t);
24885 && $terminal_is_in_list
24888 $adjust_indentation = 1;
24891 # undo continuation indentation of a terminal closing token if
24892 # it is the last token before a level decrease. This will allow
24893 # a closing token to line up with its opening counterpart, and
24894 # avoids an indentation jump larger than 1 level.
24895 if ( $i_terminal == $ibeg
24896 && $is_closing_type_beg
24898 && $K_beg < $Klimit )
24900 my $K_plus = $K_beg + 1;
24901 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
24903 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24904 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24907 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
24908 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24909 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24910 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24913 # Note: we have skipped past just one comment (perhaps a
24914 # side comment). There could be more, and we could easily
24915 # skip past all the rest with the following code, or with a
24916 # while loop. It would be rare to have to do this, and
24917 # those block comments would still be indented, so it would
24918 # to leave them indented. So it seems best to just stop at
24919 # a maximum of one comment.
24920 ##if ($type_plus eq '#') {
24921 ## $K_plus = $self->K_next_code($K_plus);
24925 if ( !$is_bli_beg && defined($K_plus) ) {
24926 my $lev = $level_beg;
24927 my $level_next = $rLL->[$K_plus]->[_LEVEL_];
24929 # and do not undo ci if it was set by the -xci option
24930 $adjust_indentation = 1
24931 if ( $level_next < $lev
24932 && !$rseqno_controlling_my_ci->{$K_beg} );
24935 # Patch for RT #96101, in which closing brace of anonymous subs
24936 # was not outdented. We should look ahead and see if there is
24937 # a level decrease at the next token (i.e., a closing token),
24938 # but right now we do not have that information. For now
24939 # we see if we are in a list, and this works well.
24940 # See test files 'sub*.t' for good test cases.
24941 if ( $terminal_is_in_list
24942 && !$rOpts_indent_closing_brace
24944 && $block_type_beg =~ /$ASUB_PATTERN/ )
24947 $opening_indentation, $opening_offset,
24948 $is_leading, $opening_exists
24950 = $self->get_opening_indentation( $ibeg, $ri_first,
24951 $ri_last, $rindentation_list );
24952 my $indentation = $leading_spaces_beg;
24953 if ( defined($opening_indentation)
24954 && get_spaces($indentation) >
24955 get_spaces($opening_indentation) )
24957 $adjust_indentation = 1;
24962 # YVES patch 1 of 2:
24963 # Undo ci of line with leading closing eval brace,
24964 # but not beyond the indentation of the line with
24965 # the opening brace.
24967 $block_type_beg eq 'eval'
24968 ##&& !$rOpts_line_up_parentheses
24969 && !ref($leading_spaces_beg)
24970 && !$rOpts_indent_closing_brace
24974 $opening_indentation, $opening_offset,
24975 $is_leading, $opening_exists
24977 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
24978 $rindentation_list );
24979 my $indentation = $leading_spaces_beg;
24980 if ( defined($opening_indentation)
24981 && get_spaces($indentation) >
24982 get_spaces($opening_indentation) )
24984 $adjust_indentation = 1;
24988 # patch for issue git #40: -bli setting has priority
24989 $adjust_indentation = 0 if ($is_bli_beg);
24991 $default_adjust_indentation = $adjust_indentation;
24993 # Now modify default behavior according to user request:
24994 # handle option to indent non-blocks of the form ); }; ];
24995 # But don't do special indentation to something like ')->pack('
24996 if ( !$block_type_beg ) {
24998 # Note that logical padding has already been applied, so we may
24999 # need to remove some spaces to get a valid hash key.
25000 my $tok = $token_beg;
25001 my $cti = $closing_token_indentation{$tok};
25003 # Fix the value of 'cti' for an isolated non-welded closing qw
25005 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
25007 # A quote delimiter which is not a container will not have
25008 # a cti value defined. In this case use the style of a
25009 # paren. For example
25017 if ( !defined($cti) && length($tok) == 1 ) {
25019 # something other than ')', '}', ']' ; use flag for ')'
25020 $cti = $closing_token_indentation{')'};
25022 # But for now, do not outdent non-container qw
25023 # delimiters because it would would change existing
25025 if ( $tok ne '>' ) { $cti = 3 }
25028 # A non-welded closing qw cannot currently use -cti=1
25029 # because that option requires a sequence number to find
25030 # the opening indentation, and qw quote delimiters are not
25032 if ( defined($cti) && $cti == 1 ) { $cti = 0 }
25035 if ( !defined($cti) ) {
25037 # $cti may not be defined for several reasons.
25038 # -padding may have been applied so the character
25040 # - we may have welded to a closing quote token.
25041 # Here is an example (perltidy -wn):
25042 # __PACKAGE__->load_components( qw(
25046 $adjust_indentation = 0;
25049 elsif ( $cti == 1 ) {
25050 if ( $i_terminal <= $ibeg + 1
25051 || $is_semicolon_terminated )
25053 $adjust_indentation = 2;
25056 $adjust_indentation = 0;
25059 elsif ( $cti == 2 ) {
25060 if ($is_semicolon_terminated) {
25061 $adjust_indentation = 3;
25064 $adjust_indentation = 0;
25067 elsif ( $cti == 3 ) {
25068 $adjust_indentation = 3;
25072 # handle option to indent blocks
25075 $rOpts_indent_closing_brace
25077 $i_terminal == $ibeg # isolated terminal '}'
25078 || $is_semicolon_terminated
25082 $adjust_indentation = 3;
25087 # if at ');', '};', '>;', and '];' of a terminal qw quote
25089 substr( $rpatterns->[0], 0, 2 ) eq 'qb'
25090 && substr( $rfields->[0], -1, 1 ) eq ';'
25091 ##&& $rpatterns->[0] =~ /^qb*;$/
25092 && $rfields->[0] =~ /^([\)\}\]\>]);$/
25095 if ( $closing_token_indentation{$1} == 0 ) {
25096 $adjust_indentation = 1;
25099 $adjust_indentation = 3;
25103 # if line begins with a ':', align it with any
25104 # previous line leading with corresponding ?
25105 elsif ( $type_beg eq ':' ) {
25107 $opening_indentation, $opening_offset,
25108 $is_leading, $opening_exists
25110 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
25111 $rindentation_list );
25112 if ($is_leading) { $adjust_indentation = 2; }
25115 #---------------------------------------------------------
25116 # Section 2: set indentation according to flag set above
25118 # Select the indentation object to define leading
25119 # whitespace. If we are outdenting something like '} } );'
25120 # then we want to use one level below the last token
25121 # ($i_terminal) in order to get it to fully outdent through
25123 #---------------------------------------------------------
25126 my $level_end = $levels_to_go[$iend];
25128 if ( $adjust_indentation == 0 ) {
25129 $indentation = $leading_spaces_beg;
25132 elsif ( $adjust_indentation == 1 ) {
25134 # Change the indentation to be that of a different token on the line
25135 # Previously, the indentation of the terminal token was used:
25137 # $indentation = $reduced_spaces_to_go[$i_terminal];
25138 # $lev = $levels_to_go[$i_terminal];
25140 # Generalization for MOJO:
25141 # Use the lowest level indentation of the tokens on the line.
25142 # For example, here we can use the indentation of the ending ';':
25143 # } until ($selection > 0 and $selection < 10); # ok to use ';'
25144 # But this will not outdent if we use the terminal indentation:
25145 # )->then( sub { # use indentation of the ->, not the {
25146 # Warning: reduced_spaces_to_go[] may be a reference, do not
25147 # do numerical checks with it
25150 $indentation = $reduced_spaces_to_go[$i_ind];
25151 $lev = $levels_to_go[$i_ind];
25152 while ( $i_ind < $i_terminal ) {
25154 if ( $levels_to_go[$i_ind] < $lev ) {
25155 $indentation = $reduced_spaces_to_go[$i_ind];
25156 $lev = $levels_to_go[$i_ind];
25161 # handle indented closing token which aligns with opening token
25162 elsif ( $adjust_indentation == 2 ) {
25164 # handle option to align closing token with opening token
25167 # calculate spaces needed to align with opening token
25169 get_spaces($opening_indentation) + $opening_offset;
25171 # Indent less than the previous line.
25173 # Problem: For -lp we don't exactly know what it was if there
25174 # were recoverable spaces sent to the aligner. A good solution
25175 # would be to force a flush of the vertical alignment buffer, so
25176 # that we would know. For now, this rule is used for -lp:
25178 # When the last line did not start with a closing token we will
25179 # be optimistic that the aligner will recover everything wanted.
25181 # This rule will prevent us from breaking a hierarchy of closing
25182 # tokens, and in a worst case will leave a closing paren too far
25183 # indented, but this is better than frequently leaving it not
25185 my $last_spaces = get_spaces($last_indentation_written);
25187 if ( ref($last_indentation_written)
25188 && !$is_closing_token{$last_leading_token} )
25191 get_recoverable_spaces($last_indentation_written);
25194 # reset the indentation to the new space count if it works
25195 # only options are all or none: nothing in-between looks good
25198 my $diff = $last_spaces - $space_count;
25200 $indentation = $space_count;
25204 # We need to fix things ... but there is no good way to do it.
25205 # The best solution is for the user to use a longer maximum
25206 # line length. We could get a smooth variation if we just move
25207 # the paren in using
25208 # $space_count -= ( 1 - $diff );
25209 # But unfortunately this can give a rather unbalanced look.
25211 # For -xlp we currently allow a tolerance of one indentation
25212 # level and then revert to a simpler default. This will jump
25213 # suddenly but keeps a balanced look.
25214 if ( $rOpts_extended_line_up_parentheses
25215 && $diff >= -$rOpts_indent_columns
25216 && $space_count > $leading_spaces_beg )
25218 $indentation = $space_count;
25221 # Otherwise revert to defaults
25222 elsif ( $default_adjust_indentation == 0 ) {
25223 $indentation = $leading_spaces_beg;
25225 elsif ( $default_adjust_indentation == 1 ) {
25226 $indentation = $reduced_spaces_to_go[$i_terminal];
25227 $lev = $levels_to_go[$i_terminal];
25232 # Full indentation of closing tokens (-icb and -icp or -cti=2)
25235 # handle -icb (indented closing code block braces)
25236 # Updated method for indented block braces: indent one full level if
25237 # there is no continuation indentation. This will occur for major
25238 # structures such as sub, if, else, but not for things like map
25241 # Note: only code blocks without continuation indentation are
25242 # handled here (if, else, unless, ..). In the following snippet,
25243 # the terminal brace of the sort block will have continuation
25244 # indentation as shown so it will not be handled by the coding
25245 # here. We would have to undo the continuation indentation to do
25246 # this, but it probably looks ok as is. This is a possible future
25247 # update for semicolon terminated lines.
25249 # if ($sortby eq 'date' or $sortby eq 'size') {
25251 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
25256 if ( $block_type_beg
25257 && $ci_levels_to_go[$i_terminal] == 0 )
25259 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
25260 $indentation = $spaces + $rOpts_indent_columns;
25262 # NOTE: for -lp we could create a new indentation object, but
25263 # there is probably no need to do it
25266 # handle -icp and any -icb block braces which fall through above
25267 # test such as the 'sort' block mentioned above.
25270 # There are currently two ways to handle -icp...
25271 # One way is to use the indentation of the previous line:
25272 # $indentation = $last_indentation_written;
25274 # The other way is to use the indentation that the previous line
25275 # would have had if it hadn't been adjusted:
25276 $indentation = $last_unadjusted_indentation;
25278 # Current method: use the minimum of the two. This avoids
25279 # inconsistent indentation.
25280 if ( get_spaces($last_indentation_written) <
25281 get_spaces($indentation) )
25283 $indentation = $last_indentation_written;
25287 # use previous indentation but use own level
25288 # to cause list to be flushed properly
25292 # remember indentation except for multi-line quotes, which get
25294 unless ( $ibeg == 0 && $starting_in_quote ) {
25295 $last_indentation_written = $indentation;
25296 $last_unadjusted_indentation = $leading_spaces_beg;
25297 $last_leading_token = $token_beg;
25299 # Patch to make a line which is the end of a qw quote work with the
25300 # -lp option. Make $token_beg look like a closing token as some
25301 # type even if it is not. This variable will become
25302 # $last_leading_token at the end of this loop. Then, if the -lp
25303 # style is selected, and the next line is also a
25304 # closing token, it will not get more indentation than this line.
25305 # We need to do this because qw quotes (at present) only get
25306 # continuation indentation, not one level of indentation, so we
25307 # need to turn off the -lp indentation.
25309 # ... a picture is worth a thousand words:
25311 # perltidy -wn -gnu (Without this patch):
25313 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
25317 # perltidy -wn -gnu (With this patch):
25319 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
25322 if ( $seqno_qw_closing
25323 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
25325 $last_leading_token = ')';
25329 # be sure lines with leading closing tokens are not outdented more
25330 # than the line which contained the corresponding opening token.
25332 #--------------------------------------------------------
25333 # updated per bug report in alex_bug.pl: we must not
25334 # mess with the indentation of closing logical braces so
25335 # we must treat something like '} else {' as if it were
25336 # an isolated brace
25337 #--------------------------------------------------------
25338 my $is_isolated_block_brace = $block_type_beg
25339 && ( $i_terminal == $ibeg
25340 || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
25343 # only do this for a ':; which is aligned with its leading '?'
25344 my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
25347 defined($opening_indentation)
25348 && !$leading_paren_arrow # MOJO
25349 && !$is_isolated_block_brace
25350 && !$is_unaligned_colon
25353 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
25354 $indentation = $opening_indentation;
25358 # remember the indentation of each line of this batch
25359 push @{$rindentation_list}, $indentation;
25361 # outdent lines with certain leading tokens...
25364 # must be first word of this batch
25370 # certain leading keywords if requested
25371 $rOpts_outdent_keywords
25372 && $type_beg eq 'k'
25373 && $outdent_keyword{$token_beg}
25375 # or labels if requested
25376 || $rOpts_outdent_labels && $type_beg eq 'J'
25378 # or static block comments if requested
25379 || $is_static_block_comment
25380 && $rOpts_outdent_static_block_comments
25384 my $space_count = leading_spaces_to_go($ibeg);
25385 if ( $space_count > 0 ) {
25386 $space_count -= $rOpts_continuation_indentation;
25387 $is_outdented_line = 1;
25388 if ( $space_count < 0 ) { $space_count = 0 }
25390 # do not promote a spaced static block comment to non-spaced;
25391 # this is not normally necessary but could be for some
25392 # unusual user inputs (such as -ci = -i)
25393 if ( $type_beg eq '#' && $space_count == 0 ) {
25397 $indentation = $space_count;
25401 return ( $indentation, $lev, $level_end, $terminal_type,
25402 $terminal_block_type, $is_semicolon_terminated,
25403 $is_outdented_line );
25404 } ## end sub final_indentation_adjustment
25405 } ## end closure final_indentation_adjustment
25407 sub get_opening_indentation {
25409 # get the indentation of the line which output the opening token
25410 # corresponding to a given closing token in the current output batch.
25413 # $i_closing - index in this line of a closing token ')' '}' or ']'
25415 # $ri_first - reference to list of the first index $i for each output
25416 # line in this batch
25417 # $ri_last - reference to list of the last index $i for each output line
25419 # $rindentation_list - reference to a list containing the indentation
25420 # used for each line.
25421 # $qw_seqno - optional sequence number to use if normal seqno not defined
25422 # (TODO: would be more general to just look this up from index i)
25425 # -the indentation of the line which contained the opening token
25426 # which matches the token at index $i_opening
25427 # -and its offset (number of columns) from the start of the line
25429 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
25432 # first, see if the opening token is in the current batch
25433 my $i_opening = $mate_index_to_go[$i_closing];
25434 my ( $indent, $offset, $is_leading, $exists );
25436 if ( defined($i_opening) && $i_opening >= 0 ) {
25438 # it is..look up the indentation
25439 ( $indent, $offset, $is_leading ) =
25440 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
25441 $rindentation_list );
25444 # if not, it should have been stored in the hash by a previous batch
25446 my $seqno = $type_sequence_to_go[$i_closing];
25447 $seqno = $qw_seqno unless ($seqno);
25448 ( $indent, $offset, $is_leading, $exists ) =
25449 get_saved_opening_indentation($seqno);
25451 return ( $indent, $offset, $is_leading, $exists );
25452 } ## end sub get_opening_indentation
25454 sub set_vertical_tightness_flags {
25456 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
25457 $ending_in_quote, $closing_side_comment )
25460 # Define vertical tightness controls for the nth line of a batch.
25462 # These parameters are passed to the vertical aligner to indicated
25463 # if we should combine this line with the next line to achieve the
25464 # desired vertical tightness. This was previously an array but
25465 # has been converted to a hash:
25470 # 0 _vt_type: 1=opening non-block 2=closing non-block
25471 # 3=opening block brace 4=closing block brace
25473 # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
25474 # 1b _vt_closing_flag: spaces of padding to use if closing
25475 # 2 _vt_seqno: sequence number of container
25476 # 3 _vt_valid flag: do not append if this flag is false. Will be
25477 # true if appropriate -vt flag is set. Otherwise, Will be
25478 # made true only for 2 line container in parens with -lp
25479 # 4 _vt_seqno_beg: sequence number of first token of line
25480 # 5 _vt_seqno_end: sequence number of last token of line
25481 # 6 _vt_min_lines: min number of lines for joining opening cache,
25483 # 7 _vt_max_lines: max number of lines for joining opening cache,
25486 # The vertical tightness mechanism can add whitespace, so whitespace can
25487 # continually increase if we allowed it when the -fws flag is set.
25488 # See case b499 for an example.
25490 # Speedup: just return for a comment
25491 if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
25495 # Define these values...
25497 my $vt_opening_flag = 0;
25498 my $vt_closing_flag = 0;
25500 my $vt_valid_flag = 0;
25501 my $vt_seqno_beg = 0;
25502 my $vt_seqno_end = 0;
25503 my $vt_min_lines = 0;
25504 my $vt_max_lines = 0;
25507 if ($rOpts_freeze_whitespace);
25509 # Uses these global parameters:
25510 # $rOpts_block_brace_tightness
25511 # $rOpts_block_brace_vertical_tightness
25512 # $rOpts_stack_closing_block_brace
25513 # %opening_vertical_tightness
25514 # %closing_vertical_tightness
25515 # %opening_token_right
25516 # %stack_closing_token
25517 # %stack_opening_token
25519 #--------------------------------------------------------------
25520 # Vertical Tightness Flags Section 1:
25521 # Handle Lines 1 .. n-1 but not the last line
25522 # For non-BLOCK tokens, we will need to examine the next line
25523 # too, so we won't consider the last line.
25524 #--------------------------------------------------------------
25525 if ( $n < $n_last_line ) {
25527 #--------------------------------------------------------------
25528 # Vertical Tightness Flags Section 1a:
25529 # Look for Type 1, last token of this line is a non-block opening token
25530 #--------------------------------------------------------------
25531 my $ibeg_next = $ri_first->[ $n + 1 ];
25532 my $token_end = $tokens_to_go[$iend];
25533 my $iend_next = $ri_last->[ $n + 1 ];
25536 $type_sequence_to_go[$iend]
25537 && !$block_type_to_go[$iend]
25538 && $is_opening_token{$token_end}
25540 $opening_vertical_tightness{$token_end} > 0
25542 # allow 2-line method call to be closed up
25543 || ( $rOpts_line_up_parentheses
25544 && $token_end eq '('
25545 && $self->[_rlp_object_by_seqno_]
25546 ->{ $type_sequence_to_go[$iend] }
25548 && $types_to_go[ $iend - 1 ] ne 'b' )
25552 # avoid multiple jumps in nesting depth in one line if
25554 my $ovt = $opening_vertical_tightness{$token_end};
25556 # Turn off the -vt flag if the next line ends in a weld.
25557 # This avoids an instability with one-line welds (fixes b1183).
25558 my $type_end_next = $types_to_go[$iend_next];
25560 if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
25561 && $is_closing_type{$type_end_next} );
25563 # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270
25564 # See similar patch above for $cvt.
25565 my $seqno = $type_sequence_to_go[$iend];
25566 if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) {
25571 && $self->[_rreduce_vertical_tightness_by_seqno_]->{$seqno} )
25578 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
25579 $nesting_depth_to_go[$ibeg_next] )
25583 # If -vt flag has not been set, mark this as invalid
25584 # and aligner will validate it if it sees the closing paren
25586 my $valid_flag = $ovt;
25589 $vt_opening_flag = $ovt;
25590 $vt_seqno = $type_sequence_to_go[$iend];
25591 $vt_valid_flag = $valid_flag;
25595 #--------------------------------------------------------------
25596 # Vertical Tightness Flags Section 1b:
25597 # Look for Type 2, first token of next line is a non-block closing
25598 # token .. and be sure this line does not have a side comment
25599 #--------------------------------------------------------------
25600 my $token_next = $tokens_to_go[$ibeg_next];
25601 if ( $type_sequence_to_go[$ibeg_next]
25602 && !$block_type_to_go[$ibeg_next]
25603 && $is_closing_token{$token_next}
25604 && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
25606 my $ovt = $opening_vertical_tightness{$token_next};
25607 my $cvt = $closing_vertical_tightness{$token_next};
25609 # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
25610 # See similar patch above for $ovt.
25611 my $seqno = $type_sequence_to_go[$ibeg_next];
25612 if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
25616 # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
25617 # otherwise. Added for rt136417.
25619 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
25622 # The unusual combination -pvtc=2 -dws -naws can be unstable.
25623 # This fixes b1282, b1283. This can be moved to set_options.
25625 && $rOpts_delete_old_whitespace
25626 && !$rOpts_add_whitespace )
25633 # Never append a trailing line like ')->pack(' because it
25634 # will throw off later alignment. So this line must start at a
25635 # deeper level than the next line (fix1 for welding, git #45).
25637 $nesting_depth_to_go[$ibeg_next] >=
25638 $nesting_depth_to_go[ $iend_next + 1 ] + 1
25643 !$self->is_in_list_by_i($ibeg_next)
25647 # allow closing up 2-line method calls
25648 || ( $rOpts_line_up_parentheses
25649 && $token_next eq ')'
25650 && $self->[_rlp_object_by_seqno_]
25651 ->{ $type_sequence_to_go[$ibeg_next] } )
25658 # decide which trailing closing tokens to append..
25660 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
25662 my $str = join( EMPTY_STRING,
25663 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
25665 # append closing token if followed by comment or ';'
25666 # or another closing token (fix2 for welding, git #45)
25667 if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
25671 my $valid_flag = $cvt;
25675 # Fix for b1187 and b1188: Blinking can occur if we allow
25676 # welded tokens to re-form into one-line blocks during
25677 # vertical alignment when -lp used. So for this case we
25678 # set the minimum number of lines to be 1 instead of 0.
25679 # The maximum should be 1 if -vtc is not used. If -vtc is
25680 # used, we turn the valid
25681 # flag off and set the maximum to 0. This is equivalent to
25682 # using a large number.
25683 my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
25684 if ( $rOpts_line_up_parentheses
25685 && $total_weld_count
25686 && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
25687 && $self->is_welded_at_seqno($seqno_ibeg_next) )
25690 $max_lines = $cvt ? 0 : 1;
25695 $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
25696 $vt_seqno = $type_sequence_to_go[$ibeg_next];
25697 $vt_valid_flag = $valid_flag;
25698 $vt_min_lines = $min_lines;
25699 $vt_max_lines = $max_lines;
25704 #--------------------------------------------------------------
25705 # Vertical Tightness Flags Section 1c:
25706 # Implement the Opening Token Right flag (Type 2)..
25707 # If requested, move an isolated trailing opening token to the end of
25708 # the previous line which ended in a comma. We could do this
25709 # in sub recombine_breakpoints but that would cause problems
25710 # with -lp formatting. The problem is that indentation will
25711 # quickly move far to the right in nested expressions. By
25712 # doing it after indentation has been set, we avoid changes
25713 # to the indentation. Actual movement of the token takes place
25714 # in sub valign_output_step_B.
25716 # Note added 4 May 2021: the man page suggests that the -otr flags
25717 # are mainly for opening tokens following commas. But this seems
25718 # to have been generalized long ago to include other situations.
25719 # I checked the coding back to 2012 and it is essentially the same
25720 # as here, so it is best to leave this unchanged for now.
25721 #--------------------------------------------------------------
25723 $opening_token_right{ $tokens_to_go[$ibeg_next] }
25725 # previous line is not opening
25726 # (use -sot to combine with it)
25727 && !$is_opening_token{$token_end}
25729 # previous line ended in one of these
25730 # (add other cases if necessary; '=>' and '.' are not necessary
25731 && !$block_type_to_go[$ibeg_next]
25733 # this is a line with just an opening token
25734 && ( $iend_next == $ibeg_next
25735 || $iend_next == $ibeg_next + 2
25736 && $types_to_go[$iend_next] eq '#' )
25738 # Fix for case b1060 when both -baoo and -otr are set:
25739 # to avoid blinking, honor the -baoo flag over the -otr flag.
25740 && $token_end ne '||' && $token_end ne '&&'
25742 # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
25745 && $rOpts_line_up_parentheses
25746 && $self->[_rlp_object_by_seqno_]
25747 ->{ $type_sequence_to_go[$ibeg_next] }
25750 # looks bad if we align vertically with the wrong container
25751 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
25754 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25757 $vt_closing_flag = $spaces;
25758 $vt_seqno = $type_sequence_to_go[$ibeg_next];
25759 $vt_valid_flag = 1;
25762 #--------------------------------------------------------------
25763 # Vertical Tightness Flags Section 1d:
25764 # Stacking of opening and closing tokens (Type 2)
25765 #--------------------------------------------------------------
25767 my $token_beg_next = $tokens_to_go[$ibeg_next];
25769 # patch to make something like 'qw(' behave like an opening paren
25771 if ( $types_to_go[$ibeg_next] eq 'q' ) {
25772 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
25773 $token_beg_next = $1;
25777 if ( $is_closing_token{$token_end}
25778 && $is_closing_token{$token_beg_next} )
25781 # avoid instability of combo -bom and -sct; b1179
25782 my $seq_next = $type_sequence_to_go[$ibeg_next];
25783 $stackable = $stack_closing_token{$token_beg_next}
25784 unless ( $block_type_to_go[$ibeg_next]
25785 || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
25787 elsif ($is_opening_token{$token_end}
25788 && $is_opening_token{$token_beg_next} )
25790 $stackable = $stack_opening_token{$token_beg_next}
25791 unless ( $block_type_to_go[$ibeg_next] )
25792 ; # shouldn't happen; just checking
25797 my $is_semicolon_terminated;
25798 if ( $n + 1 == $n_last_line ) {
25799 my ( $terminal_type, $i_terminal ) =
25800 terminal_type_i( $ibeg_next, $iend_next );
25801 $is_semicolon_terminated = $terminal_type eq ';'
25802 && $nesting_depth_to_go[$iend_next] <
25803 $nesting_depth_to_go[$ibeg_next];
25806 # this must be a line with just an opening token
25807 # or end in a semicolon
25809 $is_semicolon_terminated
25810 || ( $iend_next == $ibeg_next
25811 || $iend_next == $ibeg_next + 2
25812 && $types_to_go[$iend_next] eq '#' )
25815 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25818 $vt_closing_flag = $spaces;
25819 $vt_seqno = $type_sequence_to_go[$ibeg_next];
25820 $vt_valid_flag = 1;
25826 #--------------------------------------------------------------
25827 # Vertical Tightness Flags Section 2:
25828 # Handle type 3, opening block braces on last line of the batch
25829 # Check for a last line with isolated opening BLOCK curly
25830 #--------------------------------------------------------------
25831 elsif ($rOpts_block_brace_vertical_tightness
25833 && $types_to_go[$iend] eq '{'
25834 && $block_type_to_go[$iend] =~
25835 /$block_brace_vertical_tightness_pattern/ )
25838 $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
25840 $vt_valid_flag = 1;
25843 #--------------------------------------------------------------
25844 # Vertical Tightness Flags Section 3:
25845 # Handle type 4, a closing block brace on the last line of the batch Check
25846 # for a last line with isolated closing BLOCK curly
25847 # Patch: added a check for any new closing side comment which the
25848 # -csc option may generate. If it exists, there will be a side comment
25849 # so we cannot combine with a brace on the next line. This issue
25850 # occurs for the combination -scbb and -csc is used.
25851 #--------------------------------------------------------------
25852 elsif ($rOpts_stack_closing_block_brace
25854 && $block_type_to_go[$iend]
25855 && $types_to_go[$iend] eq '}'
25856 && ( !$closing_side_comment || $n < $n_last_line ) )
25858 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
25861 $vt_closing_flag = $spaces;
25862 $vt_seqno = $type_sequence_to_go[$iend];
25863 $vt_valid_flag = 1;
25867 # get the sequence numbers of the ends of this line
25868 $vt_seqno_beg = $type_sequence_to_go[$ibeg];
25869 if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
25870 $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
25873 $vt_seqno_end = $type_sequence_to_go[$iend];
25874 if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
25875 $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
25880 my $rvertical_tightness_flags = {
25881 _vt_type => $vt_type,
25882 _vt_opening_flag => $vt_opening_flag,
25883 _vt_closing_flag => $vt_closing_flag,
25884 _vt_seqno => $vt_seqno,
25885 _vt_valid_flag => $vt_valid_flag,
25886 _vt_seqno_beg => $vt_seqno_beg,
25887 _vt_seqno_end => $vt_seqno_end,
25888 _vt_min_lines => $vt_min_lines,
25889 _vt_max_lines => $vt_max_lines,
25892 return ($rvertical_tightness_flags);
25893 } ## end sub set_vertical_tightness_flags
25895 ##########################################################
25896 # CODE SECTION 14: Code for creating closing side comments
25897 ##########################################################
25899 { ## begin closure accumulate_csc_text
25901 # These routines are called once per batch when the --closing-side-comments flag
25904 my %block_leading_text;
25905 my %block_opening_line_number;
25906 my $csc_new_statement_ok;
25907 my $csc_last_label;
25908 my %csc_block_label;
25909 my $accumulating_text_for_block;
25910 my $leading_block_text;
25911 my $rleading_block_if_elsif_text;
25912 my $leading_block_text_level;
25913 my $leading_block_text_length_exceeded;
25914 my $leading_block_text_line_length;
25915 my $leading_block_text_line_number;
25917 sub initialize_csc_vars {
25918 %block_leading_text = ();
25919 %block_opening_line_number = ();
25920 $csc_new_statement_ok = 1;
25921 $csc_last_label = EMPTY_STRING;
25922 %csc_block_label = ();
25923 $rleading_block_if_elsif_text = [];
25924 $accumulating_text_for_block = EMPTY_STRING;
25925 reset_block_text_accumulator();
25927 } ## end sub initialize_csc_vars
25929 sub reset_block_text_accumulator {
25931 # save text after 'if' and 'elsif' to append after 'else'
25932 if ($accumulating_text_for_block) {
25934 ## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
25935 if ( $is_if_elsif{$accumulating_text_for_block} ) {
25936 push @{$rleading_block_if_elsif_text}, $leading_block_text;
25939 $accumulating_text_for_block = EMPTY_STRING;
25940 $leading_block_text = EMPTY_STRING;
25941 $leading_block_text_level = 0;
25942 $leading_block_text_length_exceeded = 0;
25943 $leading_block_text_line_number = 0;
25944 $leading_block_text_line_length = 0;
25946 } ## end sub reset_block_text_accumulator
25948 sub set_block_text_accumulator {
25949 my ( $self, $i ) = @_;
25950 $accumulating_text_for_block = $tokens_to_go[$i];
25951 if ( $accumulating_text_for_block !~ /^els/ ) {
25952 $rleading_block_if_elsif_text = [];
25954 $leading_block_text = EMPTY_STRING;
25955 $leading_block_text_level = $levels_to_go[$i];
25956 $leading_block_text_line_number = $self->get_output_line_number();
25957 $leading_block_text_length_exceeded = 0;
25959 # this will contain the column number of the last character
25960 # of the closing side comment
25961 $leading_block_text_line_length =
25962 length($csc_last_label) +
25963 length($accumulating_text_for_block) +
25964 length( $rOpts->{'closing-side-comment-prefix'} ) +
25965 $leading_block_text_level * $rOpts_indent_columns + 3;
25967 } ## end sub set_block_text_accumulator
25969 sub accumulate_block_text {
25970 my ( $self, $i ) = @_;
25972 # accumulate leading text for -csc, ignoring any side comments
25973 if ( $accumulating_text_for_block
25974 && !$leading_block_text_length_exceeded
25975 && $types_to_go[$i] ne '#' )
25978 my $added_length = $token_lengths_to_go[$i];
25979 $added_length += 1 if $i == 0;
25980 my $new_line_length =
25981 $leading_block_text_line_length + $added_length;
25983 # we can add this text if we don't exceed some limits..
25986 # we must not have already exceeded the text length limit
25987 length($leading_block_text) <
25988 $rOpts_closing_side_comment_maximum_text
25991 # the new total line length must be below the line length limit
25992 # or the new length must be below the text length limit
25993 # (ie, we may allow one token to exceed the text length limit)
25996 $maximum_line_length_at_level[$leading_block_text_level]
25998 || length($leading_block_text) + $added_length <
25999 $rOpts_closing_side_comment_maximum_text
26002 # UNLESS: we are adding a closing paren before the brace we seek.
26003 # This is an attempt to avoid situations where the ... to be
26004 # added are longer than the omitted right paren, as in:
26006 # foreach my $item (@a_rather_long_variable_name_here) {
26008 # } ## end foreach my $item (@a_rather_long_variable_name_here...
26011 $tokens_to_go[$i] eq ')'
26014 $i + 1 <= $max_index_to_go
26015 && $block_type_to_go[ $i + 1 ] eq
26016 $accumulating_text_for_block
26018 || ( $i + 2 <= $max_index_to_go
26019 && $block_type_to_go[ $i + 2 ] eq
26020 $accumulating_text_for_block )
26026 # add an extra space at each newline
26027 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
26028 $leading_block_text .= SPACE;
26031 # add the token text
26032 $leading_block_text .= $tokens_to_go[$i];
26033 $leading_block_text_line_length = $new_line_length;
26036 # show that text was truncated if necessary
26037 elsif ( $types_to_go[$i] ne 'b' ) {
26038 $leading_block_text_length_exceeded = 1;
26039 $leading_block_text .= '...';
26043 } ## end sub accumulate_block_text
26045 sub accumulate_csc_text {
26049 # called once per output buffer when -csc is used. Accumulates
26050 # the text placed after certain closing block braces.
26051 # Defines and returns the following for this buffer:
26053 my $block_leading_text =
26054 EMPTY_STRING; # the leading text of the last '}'
26055 my $rblock_leading_if_elsif_text;
26056 my $i_block_leading_text =
26057 -1; # index of token owning block_leading_text
26058 my $block_line_count = 100; # how many lines the block spans
26059 my $terminal_type = 'b'; # type of last nonblank token
26060 my $i_terminal = 0; # index of last nonblank token
26061 my $terminal_block_type = EMPTY_STRING;
26063 # update most recent statement label
26064 $csc_last_label = EMPTY_STRING unless ($csc_last_label);
26065 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
26066 my $block_label = $csc_last_label;
26068 # Loop over all tokens of this batch
26069 for my $i ( 0 .. $max_index_to_go ) {
26070 my $type = $types_to_go[$i];
26071 my $block_type = $block_type_to_go[$i];
26072 my $token = $tokens_to_go[$i];
26074 # remember last nonblank token type
26075 if ( $type ne '#' && $type ne 'b' ) {
26076 $terminal_type = $type;
26077 $terminal_block_type = $block_type;
26081 my $type_sequence = $type_sequence_to_go[$i];
26082 if ( $block_type && $type_sequence ) {
26084 if ( $token eq '}' ) {
26086 # restore any leading text saved when we entered this block
26087 if ( defined( $block_leading_text{$type_sequence} ) ) {
26088 ( $block_leading_text, $rblock_leading_if_elsif_text )
26089 = @{ $block_leading_text{$type_sequence} };
26090 $i_block_leading_text = $i;
26091 delete $block_leading_text{$type_sequence};
26092 $rleading_block_if_elsif_text =
26093 $rblock_leading_if_elsif_text;
26096 if ( defined( $csc_block_label{$type_sequence} ) ) {
26097 $block_label = $csc_block_label{$type_sequence};
26098 delete $csc_block_label{$type_sequence};
26101 # if we run into a '}' then we probably started accumulating
26102 # at something like a trailing 'if' clause..no harm done.
26103 if ( $accumulating_text_for_block
26104 && $levels_to_go[$i] <= $leading_block_text_level )
26106 my $lev = $levels_to_go[$i];
26107 reset_block_text_accumulator();
26110 if ( defined( $block_opening_line_number{$type_sequence} ) )
26112 my $output_line_number =
26113 $self->get_output_line_number();
26114 $block_line_count =
26115 $output_line_number -
26116 $block_opening_line_number{$type_sequence} + 1;
26117 delete $block_opening_line_number{$type_sequence};
26121 # Error: block opening line undefined for this line..
26122 # This shouldn't be possible, but it is not a
26123 # significant problem.
26127 elsif ( $token eq '{' ) {
26129 my $line_number = $self->get_output_line_number();
26130 $block_opening_line_number{$type_sequence} = $line_number;
26132 # set a label for this block, except for
26133 # a bare block which already has the label
26134 # A label can only be used on the next {
26135 if ( $block_type =~ /:$/ ) {
26136 $csc_last_label = EMPTY_STRING;
26138 $csc_block_label{$type_sequence} = $csc_last_label;
26139 $csc_last_label = EMPTY_STRING;
26141 if ( $accumulating_text_for_block
26142 && $levels_to_go[$i] == $leading_block_text_level )
26145 if ( $accumulating_text_for_block eq $block_type ) {
26147 # save any leading text before we enter this block
26148 $block_leading_text{$type_sequence} = [
26149 $leading_block_text,
26150 $rleading_block_if_elsif_text
26152 $block_opening_line_number{$type_sequence} =
26153 $leading_block_text_line_number;
26154 reset_block_text_accumulator();
26158 # shouldn't happen, but not a serious error.
26159 # We were accumulating -csc text for block type
26160 # $accumulating_text_for_block and unexpectedly
26161 # encountered a '{' for block type $block_type.
26168 && $csc_new_statement_ok
26169 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
26170 && $token =~ /$closing_side_comment_list_pattern/ )
26172 $self->set_block_text_accumulator($i);
26176 # note: ignoring type 'q' because of tricks being played
26177 # with 'q' for hanging side comments
26178 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
26179 $csc_new_statement_ok =
26180 ( $block_type || $type eq 'J' || $type eq ';' );
26183 && $accumulating_text_for_block
26184 && $levels_to_go[$i] == $leading_block_text_level )
26186 reset_block_text_accumulator();
26189 $self->accumulate_block_text($i);
26194 # Treat an 'else' block specially by adding preceding 'if' and
26195 # 'elsif' text. Otherwise, the 'end else' is not helpful,
26196 # especially for cuddled-else formatting.
26197 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
26198 $block_leading_text =
26199 $self->make_else_csc_text( $i_terminal, $terminal_block_type,
26200 $block_leading_text, $rblock_leading_if_elsif_text );
26203 # if this line ends in a label then remember it for the next pass
26204 $csc_last_label = EMPTY_STRING;
26205 if ( $terminal_type eq 'J' ) {
26206 $csc_last_label = $tokens_to_go[$i_terminal];
26209 return ( $terminal_type, $i_terminal, $i_block_leading_text,
26210 $block_leading_text, $block_line_count, $block_label );
26211 } ## end sub accumulate_csc_text
26213 sub make_else_csc_text {
26215 # create additional -csc text for an 'else' and optionally 'elsif',
26216 # depending on the value of switch
26218 # = 0 add 'if' text to trailing else
26219 # = 1 same as 0 plus:
26220 # add 'if' to 'elsif's if can fit in line length
26221 # add last 'elsif' to trailing else if can fit in one line
26222 # = 2 same as 1 but do not check if exceed line length
26224 # $rif_elsif_text = a reference to a list of all previous closing
26225 # side comments created for this if block
26227 my ( $self, $i_terminal, $block_type, $block_leading_text,
26230 my $csc_text = $block_leading_text;
26232 if ( $block_type eq 'elsif'
26233 && $rOpts_closing_side_comment_else_flag == 0 )
26238 my $count = @{$rif_elsif_text};
26239 return $csc_text unless ($count);
26241 my $if_text = '[ if' . $rif_elsif_text->[0];
26243 # always show the leading 'if' text on 'else'
26244 if ( $block_type eq 'else' ) {
26245 $csc_text .= $if_text;
26248 # see if that's all
26249 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
26253 my $last_elsif_text = EMPTY_STRING;
26254 if ( $count > 1 ) {
26255 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
26256 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
26259 # tentatively append one more item
26260 my $saved_text = $csc_text;
26261 if ( $block_type eq 'else' ) {
26262 $csc_text .= $last_elsif_text;
26265 $csc_text .= SPACE . $if_text;
26268 # all done if no length checks requested
26269 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
26273 # undo it if line length exceeded
26275 length($csc_text) +
26276 length($block_type) +
26277 length( $rOpts->{'closing-side-comment-prefix'} ) +
26278 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
26280 $length > $maximum_line_length_at_level[$leading_block_text_level] )
26282 $csc_text = $saved_text;
26285 } ## end sub make_else_csc_text
26286 } ## end closure accumulate_csc_text
26288 { ## begin closure balance_csc_text
26290 # Some additional routines for handling the --closing-side-comments option
26305 sub balance_csc_text {
26307 # Append characters to balance a closing side comment so that editors
26308 # such as vim can correctly jump through code.
26310 # input = ## end foreach my $foo ( sort { $b ...
26311 # output = ## end foreach my $foo ( sort { $b ...})
26313 # NOTE: This routine does not currently filter out structures within
26314 # quoted text because the bounce algorithms in text editors do not
26315 # necessarily do this either (a version of vim was checked and
26316 # did not do this).
26318 # Some complex examples which will cause trouble for some editors:
26319 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
26320 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
26321 # if ( $1 eq '{' ) {
26322 # test file test1/braces.pl has many such examples.
26326 # loop to examine characters one-by-one, RIGHT to LEFT and
26327 # build a balancing ending, LEFT to RIGHT.
26328 foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) {
26330 my $char = substr( $csc, $pos, 1 );
26332 # ignore everything except structural characters
26333 next unless ( $matching_char{$char} );
26335 # pop most recently appended character
26336 my $top = chop($csc);
26338 # push it back plus the mate to the newest character
26339 # unless they balance each other.
26340 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
26343 # return the balanced string
26345 } ## end sub balance_csc_text
26346 } ## end closure balance_csc_text
26348 sub add_closing_side_comment {
26350 my ( $self, $ri_first, $ri_last ) = @_;
26351 my $rLL = $self->[_rLL_];
26353 # add closing side comments after closing block braces if -csc used
26354 my ( $closing_side_comment, $cscw_block_comment );
26356 #---------------------------------------------------------------
26357 # Step 1: loop through all tokens of this line to accumulate
26358 # the text needed to create the closing side comments. Also see
26359 # how the line ends.
26360 #---------------------------------------------------------------
26362 my ( $terminal_type, $i_terminal, $i_block_leading_text,
26363 $block_leading_text, $block_line_count, $block_label )
26364 = $self->accumulate_csc_text();
26366 #---------------------------------------------------------------
26367 # Step 2: make the closing side comment if this ends a block
26368 #---------------------------------------------------------------
26369 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
26371 # if this line might end in a block closure..
26373 $terminal_type eq '}'
26375 # Fix 1 for c091, this is only for blocks
26376 && $block_type_to_go[$i_terminal]
26381 # the block is long enough
26382 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
26384 # or there is an existing comment to check
26385 || ( $have_side_comment
26386 && $rOpts->{'closing-side-comment-warnings'} )
26389 # .. and if this is one of the types of interest
26390 && $block_type_to_go[$i_terminal] =~
26391 /$closing_side_comment_list_pattern/
26393 # .. but not an anonymous sub
26394 # These are not normally of interest, and their closing braces are
26395 # often followed by commas or semicolons anyway. This also avoids
26396 # possible erratic output due to line numbering inconsistencies
26397 # in the cases where their closing braces terminate a line.
26398 && $block_type_to_go[$i_terminal] ne 'sub'
26400 # ..and the corresponding opening brace must is not in this batch
26401 # (because we do not need to tag one-line blocks, although this
26402 # should also be caught with a positive -csci value)
26403 && $mate_index_to_go[$i_terminal] < 0
26408 # this is the last token (line doesn't have a side comment)
26409 !$have_side_comment
26411 # or the old side comment is a closing side comment
26412 || $tokens_to_go[$max_index_to_go] =~
26413 /$closing_side_comment_prefix_pattern/
26418 # then make the closing side comment text
26419 if ($block_label) { $block_label .= SPACE }
26421 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
26423 # append any extra descriptive text collected above
26424 if ( $i_block_leading_text == $i_terminal ) {
26425 $token .= $block_leading_text;
26428 $token = balance_csc_text($token)
26429 if $rOpts->{'closing-side-comments-balanced'};
26431 $token =~ s/\s*$//; # trim any trailing whitespace
26433 # handle case of existing closing side comment
26434 if ($have_side_comment) {
26436 # warn if requested and tokens differ significantly
26437 if ( $rOpts->{'closing-side-comment-warnings'} ) {
26438 my $old_csc = $tokens_to_go[$max_index_to_go];
26439 my $new_csc = $token;
26440 $new_csc =~ s/\s+//g; # trim all whitespace
26441 $old_csc =~ s/\s+//g; # trim all whitespace
26442 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
26443 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
26444 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
26445 my $new_trailing_dots = $1;
26446 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
26448 # Patch to handle multiple closing side comments at
26449 # else and elsif's. These have become too complicated
26450 # to check, so if we see an indication of
26451 # '[ if' or '[ # elsif', then assume they were made
26453 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
26454 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
26456 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
26457 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
26460 # if old comment is contained in new comment,
26461 # only compare the common part.
26462 if ( length($new_csc) > length($old_csc) ) {
26463 $new_csc = substr( $new_csc, 0, length($old_csc) );
26466 # if the new comment is shorter and has been limited,
26467 # only compare the common part.
26468 if ( length($new_csc) < length($old_csc)
26469 && $new_trailing_dots )
26471 $old_csc = substr( $old_csc, 0, length($new_csc) );
26474 # any remaining difference?
26475 if ( $new_csc ne $old_csc ) {
26477 # just leave the old comment if we are below the threshold
26478 # for creating side comments
26479 if ( $block_line_count <
26480 $rOpts->{'closing-side-comment-interval'} )
26485 # otherwise we'll make a note of it
26489 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
26492 # save the old side comment in a new trailing block
26494 my $timestamp = EMPTY_STRING;
26495 if ( $rOpts->{'timestamp'} ) {
26496 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
26499 $timestamp = "$year-$month-$day";
26501 $cscw_block_comment =
26502 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
26503 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
26508 # No differences.. we can safely delete old comment if we
26509 # are below the threshold
26510 if ( $block_line_count <
26511 $rOpts->{'closing-side-comment-interval'} )
26513 # Since the line breaks have already been set, we have
26514 # to remove the token from the _to_go array and also
26515 # from the line range (this fixes issue c081).
26516 # Note that we can only get here if -cscw has been set
26517 # because otherwise the old comment is already deleted.
26519 my $ibeg = $ri_first->[-1];
26520 my $iend = $ri_last->[-1];
26522 && $iend == $max_index_to_go
26523 && $types_to_go[$max_index_to_go] eq '#' )
26526 $max_index_to_go--;
26528 && $types_to_go[$max_index_to_go] eq 'b' )
26531 $max_index_to_go--;
26533 $ri_last->[-1] = $iend;
26539 # switch to the new csc (unless we deleted it!)
26542 my $len_tok = length($token); # NOTE: length no longer important
26544 $len_tok - $token_lengths_to_go[$max_index_to_go];
26546 $tokens_to_go[$max_index_to_go] = $token;
26547 $token_lengths_to_go[$max_index_to_go] = $len_tok;
26548 my $K = $K_to_go[$max_index_to_go];
26549 $rLL->[$K]->[_TOKEN_] = $token;
26550 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
26551 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
26555 # handle case of NO existing closing side comment
26558 # To avoid inserting a new token in the token arrays, we
26559 # will just return the new side comment so that it can be
26560 # inserted just before it is needed in the call to the
26561 # vertical aligner.
26562 $closing_side_comment = $token;
26565 return ( $closing_side_comment, $cscw_block_comment );
26566 } ## end sub add_closing_side_comment
26568 ############################
26569 # CODE SECTION 15: Summarize
26570 ############################
26574 # This is the last routine called when a file is formatted.
26575 # Flush buffer and write any informative messages
26579 my $file_writer_object = $self->[_file_writer_object_];
26580 $file_writer_object->decrement_output_line_number()
26581 ; # fix up line number since it was incremented
26582 we_are_at_the_last_line();
26584 my $max_depth = $self->[_maximum_BLOCK_level_];
26585 my $at_line = $self->[_maximum_BLOCK_level_at_line_];
26586 write_logfile_entry(
26587 "Maximum leading structural depth is $max_depth in input at line $at_line\n"
26590 my $added_semicolon_count = $self->[_added_semicolon_count_];
26591 my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
26592 my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
26594 if ( $added_semicolon_count > 0 ) {
26595 my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
26597 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
26598 write_logfile_entry("$added_semicolon_count $what added:\n");
26599 write_logfile_entry(
26600 " $first at input line $first_added_semicolon_at\n");
26602 if ( $added_semicolon_count > 1 ) {
26603 write_logfile_entry(
26604 " Last at input line $last_added_semicolon_at\n");
26606 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
26607 write_logfile_entry("\n");
26610 my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
26611 my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
26612 my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
26613 if ( $deleted_semicolon_count > 0 ) {
26614 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
26616 ( $deleted_semicolon_count > 1 )
26617 ? "semicolons were"
26619 write_logfile_entry(
26620 "$deleted_semicolon_count unnecessary $what deleted:\n");
26621 write_logfile_entry(
26622 " $first at input line $first_deleted_semicolon_at\n");
26624 if ( $deleted_semicolon_count > 1 ) {
26625 write_logfile_entry(
26626 " Last at input line $last_deleted_semicolon_at\n");
26628 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
26629 write_logfile_entry("\n");
26632 my $embedded_tab_count = $self->[_embedded_tab_count_];
26633 my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
26634 my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
26635 if ( $embedded_tab_count > 0 ) {
26636 my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING;
26638 ( $embedded_tab_count > 1 )
26639 ? "quotes or patterns"
26640 : "quote or pattern";
26641 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
26642 write_logfile_entry(
26643 "This means the display of this script could vary with device or software\n"
26645 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
26647 if ( $embedded_tab_count > 1 ) {
26648 write_logfile_entry(
26649 " Last at input line $last_embedded_tab_at\n");
26651 write_logfile_entry("\n");
26654 my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
26655 my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
26656 my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
26657 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
26659 if ($first_tabbing_disagreement) {
26660 write_logfile_entry(
26661 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
26665 my $first_btd = $self->[_first_brace_tabbing_disagreement_];
26668 "First closing brace indentation disagreement started at input line $first_btd\n";
26669 write_logfile_entry($msg);
26671 # leave a hint in the .ERR file if there was a brace error
26672 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26675 my $in_btd = $self->[_in_brace_tabbing_disagreement_];
26678 "Ending with brace indentation disagreement which started at input line $in_btd\n";
26679 write_logfile_entry($msg);
26681 # leave a hint in the .ERR file if there was a brace error
26682 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26685 if ($in_tabbing_disagreement) {
26687 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
26688 write_logfile_entry($msg);
26692 if ($last_tabbing_disagreement) {
26694 write_logfile_entry(
26695 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
26699 write_logfile_entry("No indentation disagreement seen\n");
26703 if ($first_tabbing_disagreement) {
26704 write_logfile_entry(
26705 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
26708 write_logfile_entry("\n");
26710 my $vao = $self->[_vertical_aligner_object_];
26711 $vao->report_anything_unusual();
26713 $file_writer_object->report_line_length_errors();
26715 $self->[_converged_] = $file_writer_object->get_convergence_check()
26716 || $rOpts->{'indent-only'};
26719 } ## end sub wrapup
26721 } ## end package Perl::Tidy::Formatter