1 ####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 #####################################################################
9 # CODE SECTION 1: Preliminary code, global definitions and sub new
11 # CODE SECTION 2: Some Basic Utilities
12 # CODE SECTION 3: Check and process options
14 # CODE SECTION 4: Receive lines from the tokenizer
16 # CODE SECTION 5: Pre-process the entire file
17 # sub finish_formatting
18 # CODE SECTION 6: Process line-by-line
19 # sub process_all_lines
20 # CODE SECTION 7: Process lines of code
21 # process_line_of_CODE
22 # CODE SECTION 8: Utilities for setting breakpoints
23 # sub set_forced_breakpoint
24 # CODE SECTION 9: Process batches of code
25 # sub grind_batch_of_CODE
26 # CODE SECTION 10: Code to break long statements
27 # sub break_long_lines
28 # CODE SECTION 11: Code to break long lists
30 # CODE SECTION 12: Code for setting indentation
31 # CODE SECTION 13: Preparing batch of lines for vertical alignment
32 # sub convey_batch_to_vertical_aligner
33 # CODE SECTION 14: Code for creating closing side comments
34 # sub add_closing_side_comment
35 # CODE SECTION 15: Summarize
38 #######################################################################
39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
40 #######################################################################
42 package Perl::Tidy::Formatter;
46 # DEVEL_MODE gets switched on during automated testing for extra checking
47 use constant DEVEL_MODE => 0;
48 use constant EMPTY_STRING => q{};
49 use constant SPACE => q{ };
51 { #<<< A non-indenting brace to contain all lexical variables
54 use English qw( -no_match_vars );
55 use List::Util qw( min max ); # min, max are in Perl 5.8
56 our $VERSION = '20221112';
58 # The Tokenizer will be loaded with the Formatter
59 ##use Perl::Tidy::Tokenizer; # for is_keyword()
63 # Catch any undefined sub calls so that we are sure to get
64 # some diagnostic information. This sub should never be called
65 # except for a programming error.
67 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
68 my ( $pkg, $fname, $lno ) = caller();
69 my $my_package = __PACKAGE__;
71 ======================================================================
72 Error detected in package '$my_package', version $VERSION
73 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
74 Called from package: '$pkg'
75 Called from File '$fname' at line '$lno'
76 This error is probably due to a recent programming change
77 ======================================================================
84 $self->_decrement_count();
90 Perl::Tidy::Die($msg);
91 croak "unexpected return from Perl::Tidy::Die";
96 Perl::Tidy::Warn($msg);
103 # This routine is called for errors that really should not occur
104 # except if there has been a bug introduced by a recent program change.
105 # Please add comments at calls to Fault to explain why the call
106 # should not occur, and where to look to fix it.
107 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
108 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
109 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
110 my $input_stream_name = get_input_stream_name();
113 ==============================================================================
114 While operating on input stream with name: '$input_stream_name'
115 A fault was detected at line $line0 of sub '$subroutine1'
117 which was called from line $line1 of sub '$subroutine2'
119 This is probably an error introduced by a recent programming change.
120 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
121 ==============================================================================
124 # We shouldn't get here, but this return is to keep Perl-Critic from
132 # This is the same as Fault except that it calls Warn instead of Die
134 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
135 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
136 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
137 my $input_stream_name = get_input_stream_name();
140 ==============================================================================
141 While operating on input stream with name: '$input_stream_name'
142 A fault was detected at line $line0 of sub '$subroutine1'
144 which was called from line $line1 of sub '$subroutine2'
146 This is probably an error introduced by a recent programming change.
147 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
148 ==============================================================================
152 } ## end sub Fault_Warn
156 Perl::Tidy::Exit($msg);
157 croak "unexpected return from Perl::Tidy::Exit";
160 # Global variables ...
163 #-----------------------------------------------------------------
164 # Section 1: Global variables which are either always constant or
165 # are constant after being configured by user-supplied
166 # parameters. They remain constant as a file is being processed.
167 #-----------------------------------------------------------------
169 # user parameters and shortcuts
172 $rOpts_add_whitespace,
173 $rOpts_add_trailing_commas,
174 $rOpts_blank_lines_after_opening_block,
175 $rOpts_block_brace_tightness,
176 $rOpts_block_brace_vertical_tightness,
177 $rOpts_break_after_labels,
178 $rOpts_break_at_old_attribute_breakpoints,
179 $rOpts_break_at_old_comma_breakpoints,
180 $rOpts_break_at_old_keyword_breakpoints,
181 $rOpts_break_at_old_logical_breakpoints,
182 $rOpts_break_at_old_semicolon_breakpoints,
183 $rOpts_break_at_old_ternary_breakpoints,
184 $rOpts_break_open_compact_parens,
185 $rOpts_closing_side_comments,
186 $rOpts_closing_side_comment_else_flag,
187 $rOpts_closing_side_comment_maximum_text,
188 $rOpts_comma_arrow_breakpoints,
189 $rOpts_continuation_indentation,
190 $rOpts_delete_closing_side_comments,
191 $rOpts_delete_old_whitespace,
192 $rOpts_delete_side_comments,
193 $rOpts_delete_trailing_commas,
194 $rOpts_delete_weld_interfering_commas,
195 $rOpts_extended_continuation_indentation,
196 $rOpts_format_skipping,
197 $rOpts_freeze_whitespace,
198 $rOpts_function_paren_vertical_alignment,
199 $rOpts_fuzzy_line_length,
200 $rOpts_ignore_old_breakpoints,
201 $rOpts_ignore_side_comment_lengths,
202 $rOpts_indent_closing_brace,
203 $rOpts_indent_columns,
205 $rOpts_keep_interior_semicolons,
206 $rOpts_line_up_parentheses,
207 $rOpts_logical_padding,
208 $rOpts_maximum_consecutive_blank_lines,
209 $rOpts_maximum_fields_per_table,
210 $rOpts_maximum_line_length,
211 $rOpts_one_line_block_semicolons,
212 $rOpts_opening_brace_always_on_right,
213 $rOpts_outdent_keywords,
214 $rOpts_outdent_labels,
215 $rOpts_outdent_long_comments,
216 $rOpts_outdent_long_quotes,
217 $rOpts_outdent_static_block_comments,
219 $rOpts_short_concatenation_item_length,
220 $rOpts_space_prototype_paren,
221 $rOpts_stack_closing_block_brace,
222 $rOpts_static_block_comments,
223 $rOpts_sub_alias_list,
224 $rOpts_tee_block_comments,
226 $rOpts_tee_side_comments,
227 $rOpts_variable_maximum_line_length,
230 $rOpts_valign_side_comments,
231 $rOpts_whitespace_cycle,
232 $rOpts_extended_line_up_parentheses,
234 # Static hashes initialized in a BEGIN block
237 %is_if_unless_and_or_last_next_redo_return,
238 %is_if_elsif_else_unless_while_until_for_foreach,
239 %is_if_unless_while_until_for_foreach,
240 %is_last_next_redo_return,
244 %is_if_unless_elsif_else,
248 %is_block_without_semicolon,
249 %ok_to_add_semicolon_for_block_type,
255 %is_equal_or_fat_comma,
257 %is_opening_sequence_token,
258 %is_closing_sequence_token,
259 %is_container_label_type,
260 %is_die_confess_croak_warn,
265 # Initialized in check_options. These are constants and could
266 # just as well be initialized in a BEGIN block.
268 %is_anon_sub_brace_follower,
269 %is_anon_sub_1_brace_follower,
270 %is_other_brace_follower,
272 # Initialized and re-initialized in sub initialize_grep_and_friends;
273 # These can be modified by grep-alias-list
275 %is_sort_map_grep_eval,
276 %is_sort_map_grep_eval_do,
278 %is_keyword_returning_list,
281 # Initialized in sub initialize_whitespace_hashes;
282 # Some can be modified according to user parameters.
287 # Configured in sub initialize_bond_strength_hashes
288 %right_bond_strength,
291 # Hashes for -kbb=s and -kba=s
292 %keep_break_before_type,
293 %keep_break_after_type,
295 # Initialized in check_options, modified by prepare_cuddled_block_types:
296 %want_one_line_block,
298 # Initialized in sub prepare_cuddled_block_types
299 $rcuddled_block_types,
301 # Initialized and configured in check_options
303 %keyword_paren_inner_tightness,
307 %break_before_container_types,
308 %container_indentation_options,
310 %space_after_keyword,
315 %opening_vertical_tightness,
316 %closing_vertical_tightness,
317 %closing_token_indentation,
318 $some_closing_token_indentation,
320 %opening_token_right,
321 %stack_opening_token,
322 %stack_closing_token,
324 %weld_nested_exclusion_rules,
325 %weld_fat_comma_rules,
326 %line_up_parentheses_control_hash,
327 $line_up_parentheses_control_is_lxpl,
329 %trailing_comma_rules,
330 $controlled_comma_style,
332 # regex patterns for text identification.
333 # Most are initialized in a sub make_**_pattern during configuration.
334 # Most can be configured by user parameters.
337 $static_block_comment_pattern,
338 $static_side_comment_pattern,
339 $format_skipping_pattern_begin,
340 $format_skipping_pattern_end,
341 $non_indenting_brace_pattern,
342 $bl_exclusion_pattern,
344 $bli_exclusion_pattern,
346 $block_brace_vertical_tightness_pattern,
347 $blank_lines_after_opening_block_pattern,
348 $blank_lines_before_closing_block_pattern,
349 $keyword_group_list_pattern,
350 $keyword_group_list_comment_pattern,
351 $closing_side_comment_prefix_pattern,
352 $closing_side_comment_list_pattern,
354 # Table to efficiently find indentation and max line length
356 @maximum_line_length_at_level,
357 @maximum_text_length_at_level,
362 # Total number of sequence items in a weld, for quick checks
365 #--------------------------------------------------------
366 # Section 2: Work arrays for the current batch of tokens.
367 #--------------------------------------------------------
369 # These are re-initialized for each batch of code
370 # in sub initialize_batch_variables.
373 @type_sequence_to_go,
374 @forced_breakpoint_to_go,
375 @token_lengths_to_go,
376 @summed_lengths_to_go,
378 @leading_spaces_to_go,
379 @reduced_spaces_to_go,
382 @nesting_depth_to_go,
384 @old_breakpoint_to_go,
392 # forced breakpoint variables associated with each batch of code
393 $forced_breakpoint_count,
394 $forced_breakpoint_undo_count,
395 $index_max_forced_break,
400 # Index names for token variables.
401 # Do not combine with other BEGIN blocks (c101).
405 _CUMULATIVE_LENGTH_ => $i++,
406 _LINE_INDEX_ => $i++,
407 _KNEXT_SEQ_ITEM_ => $i++,
410 _TOKEN_LENGTH_ => $i++,
412 _TYPE_SEQUENCE_ => $i++,
414 # Number of token variables; must be last in list:
421 # Index names for $self variables.
422 # Do not combine with other BEGIN blocks (c101).
428 _rdepth_of_opening_seqno_ => $i++,
430 _Iss_opening_ => $i++,
431 _Iss_closing_ => $i++,
432 _rblock_type_of_seqno_ => $i++,
433 _ris_asub_block_ => $i++,
434 _ris_sub_block_ => $i++,
435 _K_opening_container_ => $i++,
436 _K_closing_container_ => $i++,
437 _K_opening_ternary_ => $i++,
438 _K_closing_ternary_ => $i++,
439 _K_first_seq_item_ => $i++,
440 _rtype_count_by_seqno_ => $i++,
441 _ris_function_call_paren_ => $i++,
442 _rlec_count_by_seqno_ => $i++,
443 _ris_broken_container_ => $i++,
444 _ris_permanently_broken_ => $i++,
445 _rblank_and_comment_count_ => $i++,
447 _rhas_broken_list_ => $i++,
448 _rhas_broken_list_with_lec_ => $i++,
449 _rfirst_comma_line_index_ => $i++,
450 _rhas_code_block_ => $i++,
451 _rhas_broken_code_block_ => $i++,
452 _rhas_ternary_ => $i++,
453 _ris_excluded_lp_container_ => $i++,
454 _rlp_object_by_seqno_ => $i++,
455 _rwant_reduced_ci_ => $i++,
456 _rno_xci_by_seqno_ => $i++,
457 _rbrace_left_ => $i++,
458 _ris_bli_container_ => $i++,
459 _rparent_of_seqno_ => $i++,
460 _rchildren_of_seqno_ => $i++,
461 _ris_list_by_seqno_ => $i++,
462 _ris_cuddled_closing_brace_ => $i++,
463 _rbreak_container_ => $i++,
464 _rshort_nested_ => $i++,
465 _length_function_ => $i++,
466 _is_encoded_data_ => $i++,
468 _sink_object_ => $i++,
469 _file_writer_object_ => $i++,
470 _vertical_aligner_object_ => $i++,
471 _logger_object_ => $i++,
472 _radjusted_levels_ => $i++,
473 _this_batch_ => $i++,
475 _last_output_short_opening_token_ => $i++,
477 _last_line_leading_type_ => $i++,
478 _last_line_leading_level_ => $i++,
479 _last_last_line_leading_level_ => $i++,
481 _added_semicolon_count_ => $i++,
482 _first_added_semicolon_at_ => $i++,
483 _last_added_semicolon_at_ => $i++,
485 _deleted_semicolon_count_ => $i++,
486 _first_deleted_semicolon_at_ => $i++,
487 _last_deleted_semicolon_at_ => $i++,
489 _embedded_tab_count_ => $i++,
490 _first_embedded_tab_at_ => $i++,
491 _last_embedded_tab_at_ => $i++,
493 _first_tabbing_disagreement_ => $i++,
494 _last_tabbing_disagreement_ => $i++,
495 _tabbing_disagreement_count_ => $i++,
496 _in_tabbing_disagreement_ => $i++,
497 _first_brace_tabbing_disagreement_ => $i++,
498 _in_brace_tabbing_disagreement_ => $i++,
500 _saw_VERSION_in_this_file_ => $i++,
501 _saw_END_or_DATA_ => $i++,
503 _rK_weld_left_ => $i++,
504 _rK_weld_right_ => $i++,
505 _rweld_len_right_at_K_ => $i++,
507 _rspecial_side_comment_type_ => $i++,
509 _rseqno_controlling_my_ci_ => $i++,
510 _ris_seqno_controlling_ci_ => $i++,
511 _save_logfile_ => $i++,
512 _maximum_level_ => $i++,
513 _maximum_level_at_line_ => $i++,
514 _maximum_BLOCK_level_ => $i++,
515 _maximum_BLOCK_level_at_line_ => $i++,
517 _rKrange_code_without_comments_ => $i++,
518 _rbreak_before_Kfirst_ => $i++,
519 _rbreak_after_Klast_ => $i++,
520 _rwant_container_open_ => $i++,
523 _rstarting_multiline_qw_seqno_by_K_ => $i++,
524 _rending_multiline_qw_seqno_by_K_ => $i++,
525 _rKrange_multiline_qw_by_seqno_ => $i++,
526 _rmultiline_qw_has_extra_level_ => $i++,
528 _rcollapsed_length_by_seqno_ => $i++,
529 _rbreak_before_container_by_seqno_ => $i++,
530 _ris_essential_old_breakpoint_ => $i++,
531 _roverride_cab3_ => $i++,
532 _ris_assigned_structure_ => $i++,
533 _ris_short_broken_eval_block_ => $i++,
534 _ris_bare_trailing_comma_by_seqno_ => $i++,
536 _rseqno_non_indenting_brace_by_ix_ => $i++,
537 _rmax_vertical_tightness_ => $i++,
539 _no_vertical_tightness_flags_ => $i++,
541 _LAST_SELF_INDEX_ => $i - 1,
547 # Index names for batch variables.
548 # Do not combine with other BEGIN blocks (c101).
549 # These are stored in _this_batch_, which is a sub-array of $self.
552 _starting_in_quote_ => $i++,
553 _ending_in_quote_ => $i++,
554 _is_static_block_comment_ => $i++,
557 _do_not_pad_ => $i++,
558 _peak_batch_size_ => $i++,
559 _batch_count_ => $i++,
560 _rix_seqno_controlling_ci_ => $i++,
561 _batch_CODE_type_ => $i++,
562 _ri_starting_one_line_block_ => $i++,
563 _runmatched_opening_indexes_ => $i++,
569 # Sequence number assigned to the root of sequence tree.
570 # The minimum of the actual sequences numbers is 4, so we can use 1
571 use constant SEQ_ROOT => 1;
573 # Codes for insertion and deletion of blanks
574 use constant DELETE => 0;
575 use constant STABLE => 1;
576 use constant INSERT => 2;
579 use constant WS_YES => 1;
580 use constant WS_OPTIONAL => 0;
581 use constant WS_NO => -1;
583 # Token bond strengths.
584 use constant NO_BREAK => 10_000;
585 use constant VERY_STRONG => 100;
586 use constant STRONG => 2.1;
587 use constant NOMINAL => 1.1;
588 use constant WEAK => 0.8;
589 use constant VERY_WEAK => 0.55;
591 # values for testing indexes in output array
592 use constant UNDEFINED_INDEX => -1;
594 # Maximum number of little messages; probably need not be changed.
595 use constant MAX_NAG_MESSAGES => 6;
597 # This is the decimal range of printable characters in ASCII. It is used to
598 # make quick preliminary checks before resorting to using a regex.
599 use constant ORD_PRINTABLE_MIN => 33;
600 use constant ORD_PRINTABLE_MAX => 126;
602 # Initialize constant hashes ...
606 = **= += *= &= <<= &&=
611 @is_assignment{@q} = (1) x scalar(@q);
613 # a hash needed by break_lists for efficiency:
614 push @q, qw{ ; < > ~ f };
615 @is_non_list_type{@q} = (1) x scalar(@q);
617 @q = qw(is if unless and or err last next redo return);
618 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
620 # These block types may have text between the keyword and opening
621 # curly. Note: 'else' does not, but must be included to allow trailing
622 # if/elsif text to be appended.
623 # patch for SWITCH/CASE: added 'case' and 'when'
624 @q = qw(if elsif else unless while until for foreach case when catch);
625 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
628 @q = qw(if unless while until for foreach);
629 @is_if_unless_while_until_for_foreach{@q} =
632 @q = qw(last next redo return);
633 @is_last_next_redo_return{@q} = (1) x scalar(@q);
635 # Map related block names into a common name to allow vertical alignment
636 # used by sub make_alignment_patterns. Note: this is normally unchanged,
637 # but it contains 'grep' and can be re-initialized in
638 # sub initialize_grep_and_friends in a testing mode.
651 @is_if_unless{@q} = (1) x scalar(@q);
654 @is_if_elsif{@q} = (1) x scalar(@q);
656 @q = qw(if unless elsif);
657 @is_if_unless_elsif{@q} = (1) x scalar(@q);
659 @q = qw(if unless elsif else);
660 @is_if_unless_elsif_else{@q} = (1) x scalar(@q);
663 @is_elsif_else{@q} = (1) x scalar(@q);
666 @is_and_or{@q} = (1) x scalar(@q);
668 # Identify certain operators which often occur in chains.
669 # Note: the minus (-) causes a side effect of padding of the first line in
670 # something like this (by sub set_logical_padding):
671 # Checkbutton => 'Transmission checked',
672 # -variable => \$TRANS
673 # This usually improves appearance so it seems ok.
674 @q = qw(&& || and or : ? . + - * /);
675 @is_chain_operator{@q} = (1) x scalar(@q);
677 # Operators that the user can request break before or after.
678 # Note that some are keywords
679 @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
680 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
681 . : ? && || and or err xor
684 # We can remove semicolons after blocks preceded by these keywords
686 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
687 unless while until for foreach given when default);
688 @is_block_without_semicolon{@q} = (1) x scalar(@q);
690 # We will allow semicolons to be added within these block types
691 # as well as sub and package blocks.
693 # 1. Note that these keywords are omitted:
694 # switch case given when default sort map grep
695 # 2. It is also ok to add for sub and package blocks and a labeled block
696 # 3. But not okay for other perltidy types including:
698 # 4. Test files: blktype.t, blktype1.t, semicolon.t
700 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
701 unless do while until eval for foreach );
702 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
704 # 'L' is token for opening { at hash key
706 @is_opening_type{@q} = (1) x scalar(@q);
708 # 'R' is token for closing } at hash key
710 @is_closing_type{@q} = (1) x scalar(@q);
713 @is_opening_token{@q} = (1) x scalar(@q);
716 @is_closing_token{@q} = (1) x scalar(@q);
719 @is_ternary{@q} = (1) x scalar(@q);
722 @is_opening_sequence_token{@q} = (1) x scalar(@q);
725 @is_closing_sequence_token{@q} = (1) x scalar(@q);
727 # a hash needed by sub break_lists for labeling containers
728 @q = qw( k => && || ? : . );
729 @is_container_label_type{@q} = (1) x scalar(@q);
731 @q = qw( die confess croak warn );
732 @is_die_confess_croak_warn{@q} = (1) x scalar(@q);
734 @q = qw( my our local );
735 @is_my_our_local{@q} = (1) x scalar(@q);
737 # Braces -bbht etc must follow these. Note: experimentation with
738 # including a simple comma shows that it adds little and can lead
739 # to poor formatting in complex lists.
741 @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
745 @is_counted_type{@q} = (1) x scalar(@q);
749 { ## begin closure to count instances
751 # methods to count instances
753 sub get_count { return $_count; }
754 sub _increment_count { return ++$_count }
755 sub _decrement_count { return --$_count }
756 } ## end closure to count instances
760 my ( $class, @args ) = @_;
762 # we are given an object with a write_line() method to take lines
764 sink_object => undef,
765 diagnostics_object => undef,
766 logger_object => undef,
767 length_function => sub { return length( $_[0] ) },
768 is_encoded_data => EMPTY_STRING,
771 my %args = ( %defaults, @args );
773 my $length_function = $args{length_function};
774 my $is_encoded_data = $args{is_encoded_data};
775 my $fh_tee = $args{fh_tee};
776 my $logger_object = $args{logger_object};
777 my $diagnostics_object = $args{diagnostics_object};
779 # we create another object with a get_line() and peek_ahead() method
780 my $sink_object = $args{sink_object};
781 my $file_writer_object =
782 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
784 # initialize closure variables...
785 set_logger_object($logger_object);
786 set_diagnostics_object($diagnostics_object);
787 initialize_lp_vars();
788 initialize_csc_vars();
789 initialize_break_lists();
790 initialize_undo_ci();
791 initialize_process_line_of_CODE();
792 initialize_grind_batch_of_CODE();
793 initialize_get_final_indentation();
794 initialize_postponed_breakpoint();
795 initialize_batch_variables();
796 initialize_write_line();
798 my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
800 file_writer_object => $file_writer_object,
801 logger_object => $logger_object,
802 diagnostics_object => $diagnostics_object,
803 length_function => $length_function,
806 write_logfile_entry("\nStarting tokenization pass...\n");
808 if ( $rOpts->{'entab-leading-whitespace'} ) {
810 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
813 elsif ( $rOpts->{'tabs'} ) {
814 write_logfile_entry("Indentation will be with a tab character\n");
818 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
821 # Initialize the $self array reference.
822 # To add an item, first add a constant index in the BEGIN block above.
825 # Basic data structures...
826 $self->[_rlines_] = []; # = ref to array of lines of the file
828 # 'rLL' = reference to the continuous liner array of all tokens in a file.
829 # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
830 # 'LL' stuck because it is easy to type. The 'rLL' array is updated
831 # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
832 # with '$K' by convention.
834 $self->[_Klimit_] = undef; # = maximum K index for rLL.
836 # Indexes into the rLL list
837 $self->[_K_opening_container_] = {};
838 $self->[_K_closing_container_] = {};
839 $self->[_K_opening_ternary_] = {};
840 $self->[_K_closing_ternary_] = {};
841 $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
843 # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
844 # numbers with + or - indicating opening or closing. This list represents
845 # the entire container tree and is invariant under reformatting. It can be
846 # used to quickly travel through the tree. Indexes in the rSS array begin
847 # with '$I' by convention. The 'Iss' arrays give the indexes in this list
848 # of opening and closing sequence numbers.
850 $self->[_Iss_opening_] = [];
851 $self->[_Iss_closing_] = [];
853 # Arrays to help traverse the tree
854 $self->[_rdepth_of_opening_seqno_] = [];
855 $self->[_rblock_type_of_seqno_] = {};
856 $self->[_ris_asub_block_] = {};
857 $self->[_ris_sub_block_] = {};
859 # Mostly list characteristics and processing flags
860 $self->[_rtype_count_by_seqno_] = {};
861 $self->[_ris_function_call_paren_] = {};
862 $self->[_rlec_count_by_seqno_] = {};
863 $self->[_ris_broken_container_] = {};
864 $self->[_ris_permanently_broken_] = {};
865 $self->[_rblank_and_comment_count_] = {};
866 $self->[_rhas_list_] = {};
867 $self->[_rhas_broken_list_] = {};
868 $self->[_rhas_broken_list_with_lec_] = {};
869 $self->[_rfirst_comma_line_index_] = {};
870 $self->[_rhas_code_block_] = {};
871 $self->[_rhas_broken_code_block_] = {};
872 $self->[_rhas_ternary_] = {};
873 $self->[_ris_excluded_lp_container_] = {};
874 $self->[_rlp_object_by_seqno_] = {};
875 $self->[_rwant_reduced_ci_] = {};
876 $self->[_rno_xci_by_seqno_] = {};
877 $self->[_rbrace_left_] = {};
878 $self->[_ris_bli_container_] = {};
879 $self->[_rparent_of_seqno_] = {};
880 $self->[_rchildren_of_seqno_] = {};
881 $self->[_ris_list_by_seqno_] = {};
882 $self->[_ris_cuddled_closing_brace_] = {};
884 $self->[_rbreak_container_] = {}; # prevent one-line blocks
885 $self->[_rshort_nested_] = {}; # blocks not forced open
886 $self->[_length_function_] = $length_function;
887 $self->[_is_encoded_data_] = $is_encoded_data;
890 $self->[_fh_tee_] = $fh_tee;
891 $self->[_sink_object_] = $sink_object;
892 $self->[_file_writer_object_] = $file_writer_object;
893 $self->[_vertical_aligner_object_] = $vertical_aligner_object;
894 $self->[_logger_object_] = $logger_object;
896 # Reference to the batch being processed
897 $self->[_this_batch_] = [];
899 # Memory of processed text...
900 $self->[_last_last_line_leading_level_] = 0;
901 $self->[_last_line_leading_level_] = 0;
902 $self->[_last_line_leading_type_] = '#';
903 $self->[_last_output_short_opening_token_] = 0;
904 $self->[_added_semicolon_count_] = 0;
905 $self->[_first_added_semicolon_at_] = 0;
906 $self->[_last_added_semicolon_at_] = 0;
907 $self->[_deleted_semicolon_count_] = 0;
908 $self->[_first_deleted_semicolon_at_] = 0;
909 $self->[_last_deleted_semicolon_at_] = 0;
910 $self->[_embedded_tab_count_] = 0;
911 $self->[_first_embedded_tab_at_] = 0;
912 $self->[_last_embedded_tab_at_] = 0;
913 $self->[_first_tabbing_disagreement_] = 0;
914 $self->[_last_tabbing_disagreement_] = 0;
915 $self->[_tabbing_disagreement_count_] = 0;
916 $self->[_in_tabbing_disagreement_] = 0;
917 $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
918 $self->[_saw_END_or_DATA_] = 0;
919 $self->[_first_brace_tabbing_disagreement_] = undef;
920 $self->[_in_brace_tabbing_disagreement_] = undef;
922 # Hashes related to container welding...
923 $self->[_radjusted_levels_] = [];
925 # Weld data structures
926 $self->[_rK_weld_left_] = {};
927 $self->[_rK_weld_right_] = {};
928 $self->[_rweld_len_right_at_K_] = {};
931 $self->[_rseqno_controlling_my_ci_] = {};
932 $self->[_ris_seqno_controlling_ci_] = {};
934 $self->[_rspecial_side_comment_type_] = {};
935 $self->[_maximum_level_] = 0;
936 $self->[_maximum_level_at_line_] = 0;
937 $self->[_maximum_BLOCK_level_] = 0;
938 $self->[_maximum_BLOCK_level_at_line_] = 0;
940 $self->[_rKrange_code_without_comments_] = [];
941 $self->[_rbreak_before_Kfirst_] = {};
942 $self->[_rbreak_after_Klast_] = {};
943 $self->[_rwant_container_open_] = {};
944 $self->[_converged_] = 0;
947 $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
948 $self->[_rending_multiline_qw_seqno_by_K_] = {};
949 $self->[_rKrange_multiline_qw_by_seqno_] = {};
950 $self->[_rmultiline_qw_has_extra_level_] = {};
952 $self->[_rcollapsed_length_by_seqno_] = {};
953 $self->[_rbreak_before_container_by_seqno_] = {};
954 $self->[_ris_essential_old_breakpoint_] = {};
955 $self->[_roverride_cab3_] = {};
956 $self->[_ris_assigned_structure_] = {};
957 $self->[_ris_short_broken_eval_block_] = {};
958 $self->[_ris_bare_trailing_comma_by_seqno_] = {};
960 $self->[_rseqno_non_indenting_brace_by_ix_] = {};
961 $self->[_rmax_vertical_tightness_] = {};
963 $self->[_no_vertical_tightness_flags_] = 0;
965 # This flag will be updated later by a call to get_save_logfile()
966 $self->[_save_logfile_] = defined($logger_object);
968 # Be sure all variables in $self have been initialized above. To find the
969 # correspondence of index numbers and array names, copy a list to a file
970 # and use the unix 'nl' command to number lines 1..
973 foreach ( 0 .. _LAST_SELF_INDEX_ ) {
974 if ( !exists( $self->[$_] ) ) {
975 push @non_existant, $_;
979 Fault("These indexes in self not initialized: (@non_existant)\n");
985 # Safety check..this is not a class yet
986 if ( _increment_count() > 1 ) {
988 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
993 ######################################
994 # CODE SECTION 2: Some Basic Utilities
995 ######################################
999 # Verify that the rLL array has not been auto-vivified
1000 my ( $self, $msg ) = @_;
1001 my $rLL = $self->[_rLL_];
1002 my $Klimit = $self->[_Klimit_];
1004 if ( ( defined($Klimit) && $Klimit != $num - 1 )
1005 || ( !defined($Klimit) && $num > 0 ) )
1008 # This fault can occur if the array has been accessed for an index
1009 # greater than $Klimit, which is the last token index. Just accessing
1010 # the array above index $Klimit, not setting a value, can cause @rLL to
1011 # increase beyond $Klimit. If this occurs, the problem can be located
1012 # by making calls to this routine at different locations in
1013 # sub 'finish_formatting'.
1014 $Klimit = 'undef' if ( !defined($Klimit) );
1015 $msg = EMPTY_STRING unless $msg;
1016 Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
1019 } ## end sub check_rLL
1022 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
1024 # Check the keys of a hash:
1025 # $rtest = ref to hash to test
1026 # $rvalid = ref to hash with valid keys
1028 # $msg = a message to write in case of error
1029 # $exact_match defines the type of check:
1030 # = false: test hash must not have unknown key
1031 # = true: test hash must have exactly same keys as known hash
1033 grep { !exists $rvalid->{$_} } keys %{$rtest};
1035 grep { !exists $rtest->{$_} } keys %{$rvalid};
1036 my $error = @unknown_keys;
1037 if ($exact_match) { $error ||= @missing_keys }
1039 local $LIST_SEPARATOR = ')(';
1040 my @expected_keys = sort keys %{$rvalid};
1041 @unknown_keys = sort @unknown_keys;
1043 ------------------------------------------------------------------------
1044 Program error detected checking hash keys
1046 Expected keys: (@expected_keys)
1047 Unknown key(s): (@unknown_keys)
1048 Missing key(s): (@missing_keys)
1049 ------------------------------------------------------------------------
1053 } ## end sub check_keys
1055 sub check_token_array {
1058 # Check for errors in the array of tokens. This is only called
1059 # when the DEVEL_MODE flag is set, so this Fault will only occur
1060 # during code development.
1061 my $rLL = $self->[_rLL_];
1062 foreach my $KK ( 0 .. @{$rLL} - 1 ) {
1063 my $nvars = @{ $rLL->[$KK] };
1064 if ( $nvars != _NVARS ) {
1066 my $type = $rLL->[$KK]->[_TYPE_];
1067 $type = '*' unless defined($type);
1069 # The number of variables per token node is _NVARS and was set when
1070 # the array indexes were generated. So if the number of variables
1071 # is different we have done something wrong, like not store all of
1072 # them in sub 'write_line' when they were received from the
1075 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
1078 foreach my $var ( _TOKEN_, _TYPE_ ) {
1079 if ( !defined( $rLL->[$KK]->[$var] ) ) {
1080 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
1082 # This is a simple check that each token has some basic
1083 # variables. In other words, that there are no holes in the
1084 # array of tokens. Sub 'write_line' pushes tokens into the
1085 # $rLL array, so this should guarantee no gaps.
1086 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1091 } ## end sub check_token_array
1093 { ## begin closure check_line_hashes
1095 # This code checks that no autovivification occurs in the 'line' hash
1097 my %valid_line_hash;
1101 # These keys are defined for each line in the formatter
1102 # Each line must have exactly these quantities
1103 my @valid_line_keys = qw(
1106 _guessed_indentation_level
1113 _square_bracket_depth
1115 _ended_in_blank_token
1124 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1127 sub check_line_hashes {
1129 my $rlines = $self->[_rlines_];
1130 foreach my $rline ( @{$rlines} ) {
1131 my $iline = $rline->{_line_number};
1132 my $line_type = $rline->{_line_type};
1133 check_keys( $rline, \%valid_line_hash,
1134 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1137 } ## end sub check_line_hashes
1138 } ## end closure check_line_hashes
1140 { ## begin closure for logger routines
1143 # Called once per file to initialize the logger object
1144 sub set_logger_object {
1145 $logger_object = shift;
1149 sub get_logger_object {
1150 return $logger_object;
1153 sub get_input_stream_name {
1154 my $input_stream_name = EMPTY_STRING;
1155 if ($logger_object) {
1156 $input_stream_name = $logger_object->get_input_stream_name();
1158 return $input_stream_name;
1161 # interface to Perl::Tidy::Logger routines
1164 if ($logger_object) { $logger_object->warning($msg); }
1170 if ($logger_object) {
1171 $logger_object->complain($msg);
1176 sub write_logfile_entry {
1178 if ($logger_object) {
1179 $logger_object->write_logfile_entry(@msg);
1184 sub get_saw_brace_error {
1185 if ($logger_object) {
1186 return $logger_object->get_saw_brace_error();
1191 sub we_are_at_the_last_line {
1192 if ($logger_object) {
1193 $logger_object->we_are_at_the_last_line();
1198 } ## end closure for logger routines
1200 { ## begin closure for diagnostics routines
1201 my $diagnostics_object;
1203 # Called once per file to initialize the diagnostics object
1204 sub set_diagnostics_object {
1205 $diagnostics_object = shift;
1209 sub write_diagnostics {
1211 if ($diagnostics_object) {
1212 $diagnostics_object->write_diagnostics($msg);
1216 } ## end closure for diagnostics routines
1218 sub get_convergence_check {
1220 return $self->[_converged_];
1223 sub get_output_line_number {
1225 my $vao = $self->[_vertical_aligner_object_];
1226 return $vao->get_output_line_number();
1229 sub want_blank_line {
1232 my $file_writer_object = $self->[_file_writer_object_];
1233 $file_writer_object->want_blank_line();
1237 sub write_unindented_line {
1238 my ( $self, $line ) = @_;
1240 my $file_writer_object = $self->[_file_writer_object_];
1241 $file_writer_object->write_line($line);
1245 sub consecutive_nonblank_lines {
1247 my $file_writer_object = $self->[_file_writer_object_];
1248 my $vao = $self->[_vertical_aligner_object_];
1249 return $file_writer_object->get_consecutive_nonblank_lines() +
1250 $vao->get_cached_line_count();
1255 # given a string containing words separated by whitespace,
1256 # return the list of words
1261 return split( /\s+/, $str );
1262 } ## end sub split_words
1264 ###########################################
1265 # CODE SECTION 3: Check and process options
1266 ###########################################
1270 # This routine is called to check the user-supplied run parameters
1271 # and to configure the control hashes to them.
1274 initialize_whitespace_hashes();
1275 initialize_bond_strength_hashes();
1277 # This function must be called early to get hashes with grep initialized
1278 initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
1280 # Make needed regex patterns for matching text.
1281 # NOTE: sub_matching_patterns must be made first because later patterns use
1282 # them; see RT #133130.
1283 make_sub_matching_pattern();
1284 make_static_block_comment_pattern();
1285 make_static_side_comment_pattern();
1286 make_closing_side_comment_prefix();
1287 make_closing_side_comment_list_pattern();
1288 $format_skipping_pattern_begin =
1289 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1290 $format_skipping_pattern_end =
1291 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1292 make_non_indenting_brace_pattern();
1294 # If closing side comments ARE selected, then we can safely
1295 # delete old closing side comments unless closing side comment
1296 # warnings are requested. This is a good idea because it will
1297 # eliminate any old csc's which fall below the line count threshold.
1298 # We cannot do this if warnings are turned on, though, because we
1299 # might delete some text which has been added. So that must
1300 # be handled when comments are created. And we cannot do this
1301 # with -io because -csc will be skipped altogether.
1302 if ( $rOpts->{'closing-side-comments'} ) {
1303 if ( !$rOpts->{'closing-side-comment-warnings'}
1304 && !$rOpts->{'indent-only'} )
1306 $rOpts->{'delete-closing-side-comments'} = 1;
1310 # If closing side comments ARE NOT selected, but warnings ARE
1311 # selected and we ARE DELETING csc's, then we will pretend to be
1312 # adding with a huge interval. This will force the comments to be
1313 # generated for comparison with the old comments, but not added.
1314 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1315 if ( $rOpts->{'delete-closing-side-comments'} ) {
1316 $rOpts->{'delete-closing-side-comments'} = 0;
1317 $rOpts->{'closing-side-comments'} = 1;
1318 $rOpts->{'closing-side-comment-interval'} = 100_000_000;
1324 make_block_brace_vertical_tightness_pattern();
1325 make_blank_line_pattern();
1326 make_keyword_group_list_pattern();
1328 # Make initial list of desired one line block types
1329 # They will be modified by 'prepare_cuddled_block_types'
1330 # NOTE: this line must come after is_sort_map_grep_eval is
1331 # initialized in sub 'initialize_grep_and_friends'
1332 %want_one_line_block = %is_sort_map_grep_eval;
1334 prepare_cuddled_block_types();
1335 if ( $rOpts->{'dump-cuddled-block-list'} ) {
1336 dump_cuddled_block_list(*STDOUT);
1341 if ( $rOpts->{'extended-line-up-parentheses'} ) {
1342 $rOpts->{'line-up-parentheses'} ||= 1;
1345 if ( $rOpts->{'line-up-parentheses'} ) {
1347 if ( $rOpts->{'indent-only'}
1348 || !$rOpts->{'add-newlines'}
1349 || !$rOpts->{'delete-old-newlines'} )
1352 -----------------------------------------------------------------------
1353 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1355 The -lp indentation logic requires that perltidy be able to coordinate
1356 arbitrarily large numbers of line breakpoints. This isn't possible
1358 -----------------------------------------------------------------------
1360 $rOpts->{'line-up-parentheses'} = 0;
1361 $rOpts->{'extended-line-up-parentheses'} = 0;
1364 if ( $rOpts->{'whitespace-cycle'} ) {
1366 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1368 $rOpts->{'whitespace-cycle'} = 0;
1372 # At present, tabs are not compatible with the line-up-parentheses style
1373 # (it would be possible to entab the total leading whitespace
1374 # just prior to writing the line, if desired).
1375 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1377 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
1379 $rOpts->{'tabs'} = 0;
1382 # Likewise, tabs are not compatible with outdenting..
1383 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1385 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1387 $rOpts->{'tabs'} = 0;
1390 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1392 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
1394 $rOpts->{'tabs'} = 0;
1397 if ( !$rOpts->{'space-for-semicolon'} ) {
1398 $want_left_space{'f'} = -1;
1401 if ( $rOpts->{'space-terminal-semicolon'} ) {
1402 $want_left_space{';'} = 1;
1405 # We should put an upper bound on any -sil=n value. Otherwise enormous
1406 # files could be created by mistake.
1407 for ( $rOpts->{'starting-indentation-level'} ) {
1408 if ( $_ && $_ > 100 ) {
1410 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1416 # Require -msp > 0 to avoid future parsing problems (issue c147)
1417 for ( $rOpts->{'minimum-space-to-comment'} ) {
1418 if ( !$_ || $_ <= 0 ) { $_ = 1 }
1421 # implement outdenting preferences for keywords
1422 %outdent_keyword = ();
1423 my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1425 @okw = qw(next last redo goto return); # defaults
1428 # FUTURE: if not a keyword, assume that it is an identifier
1430 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1431 $outdent_keyword{$_} = 1;
1434 Warn("ignoring '$_' in -okwl list; not a perl keyword");
1438 # setup hash for -kpit option
1439 %keyword_paren_inner_tightness = ();
1440 my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1441 if ( defined($kpit_value) && $kpit_value != 1 ) {
1443 split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1445 @kpit = qw(if elsif unless while until for foreach); # defaults
1448 # we will allow keywords and user-defined identifiers
1450 $keyword_paren_inner_tightness{$_} = $kpit_value;
1454 # implement user whitespace preferences
1455 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1456 @want_left_space{@q} = (1) x scalar(@q);
1459 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1460 @want_right_space{@q} = (1) x scalar(@q);
1463 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1464 @want_left_space{@q} = (-1) x scalar(@q);
1467 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1468 @want_right_space{@q} = (-1) x scalar(@q);
1470 if ( $rOpts->{'dump-want-left-space'} ) {
1471 dump_want_left_space(*STDOUT);
1475 if ( $rOpts->{'dump-want-right-space'} ) {
1476 dump_want_right_space(*STDOUT);
1480 # default keywords for which space is introduced before an opening paren
1481 # (at present, including them messes up vertical alignment)
1482 my @sak = qw(my local our and or xor err eq ne if else elsif until
1483 unless while for foreach return switch case given when catch);
1484 %space_after_keyword = map { $_ => 1 } @sak;
1486 # first remove any or all of these if desired
1487 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
1489 # -nsak='*' selects all the above keywords
1490 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
1491 @space_after_keyword{@q} = (0) x scalar(@q);
1494 # then allow user to add to these defaults
1495 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
1496 @space_after_keyword{@q} = (1) x scalar(@q);
1499 # implement user break preferences
1500 my $break_after = sub {
1502 foreach my $tok (@toks) {
1503 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
1504 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
1505 my $lbs = $left_bond_strength{$tok};
1506 my $rbs = $right_bond_strength{$tok};
1507 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
1508 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1515 my $break_before = sub {
1517 foreach my $tok (@toks) {
1518 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
1519 my $lbs = $left_bond_strength{$tok};
1520 my $rbs = $right_bond_strength{$tok};
1521 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
1522 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1529 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
1530 $break_before->(@all_operators)
1531 if ( $rOpts->{'break-before-all-operators'} );
1533 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
1534 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
1536 # make note if breaks are before certain key types
1537 %want_break_before = ();
1538 foreach my $tok ( @all_operators, ',' ) {
1539 $want_break_before{$tok} =
1540 $left_bond_strength{$tok} < $right_bond_strength{$tok};
1543 # Coordinate ?/: breaks, which must be similar
1544 # The small strength 0.01 which is added is 1% of the strength of one
1545 # indentation level and seems to work okay.
1546 if ( !$want_break_before{':'} ) {
1547 $want_break_before{'?'} = $want_break_before{':'};
1548 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
1549 $left_bond_strength{'?'} = NO_BREAK;
1552 # Only make a hash entry for the next parameters if values are defined.
1553 # That allows a quick check to be made later.
1554 %break_before_container_types = ();
1555 for ( $rOpts->{'break-before-hash-brace'} ) {
1556 $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
1558 for ( $rOpts->{'break-before-square-bracket'} ) {
1559 $break_before_container_types{'['} = $_ if $_ && $_ > 0;
1561 for ( $rOpts->{'break-before-paren'} ) {
1562 $break_before_container_types{'('} = $_ if $_ && $_ > 0;
1565 #--------------------------------------------------------------
1566 # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1567 #--------------------------------------------------------------
1568 # The -vmll and -lp parameters do not really work well together.
1569 # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1570 # NOTE: we could make this more precise by looking at any exclusion
1571 # flags for -lp, and allowing -bbx=2 for excluded types.
1572 if ( $rOpts->{'variable-maximum-line-length'}
1573 && $rOpts->{'ignore-old-breakpoints'}
1574 && $rOpts->{'line-up-parentheses'} )
1577 foreach my $key ( keys %break_before_container_types ) {
1578 if ( $break_before_container_types{$key} == 2 ) {
1579 $break_before_container_types{$key} = 1;
1580 push @changed, $key;
1585 # we could write a warning here
1589 #-----------------------------------------------------------
1590 # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1591 #-----------------------------------------------------------
1592 # The -vmll and -lp parameters do not really work well together.
1593 # This is a very crude fix for an unusual parameter combination.
1594 if ( $rOpts->{'variable-maximum-line-length'}
1595 && $rOpts->{'line-up-parentheses'}
1596 && $rOpts->{'continuation-indentation'} < 2 )
1598 $rOpts->{'continuation-indentation'} = 2;
1599 ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1602 #-----------------------------------------------------------
1603 # The combination -lp -vmll -atc -dtc -wtc=b can be unstable
1604 #-----------------------------------------------------------
1605 # This fixes b1386 b1387 b1388
1606 if ( $rOpts->{'variable-maximum-line-length'}
1607 && $rOpts->{'line-up-parentheses'}
1608 && $rOpts->{'add-trailing-commas'}
1609 && $rOpts->{'delete-trailing-commas'}
1610 && $rOpts->{'want-trailing-commas'}
1611 && $rOpts->{'want-trailing-commas'} =~ /b/ )
1613 $rOpts->{'delete-trailing-commas'} = 0;
1614 ## warning causes trouble with test cases and this combo is so rare that
1615 ## it is unlikely to not occur in practice.
1617 ##"The combination -vmll -lp -atc -dtc -wtc=b can be unstable; turning off -dtc\n"
1621 %container_indentation_options = ();
1623 [ 'break-before-hash-brace-and-indent', '{' ],
1624 [ 'break-before-square-bracket-and-indent', '[' ],
1625 [ 'break-before-paren-and-indent', '(' ],
1628 my ( $key, $tok ) = @{$pair};
1629 my $opt = $rOpts->{$key};
1630 if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1633 # (1) -lp is not compatible with opt=2, silently set to opt=0
1634 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1635 # (3) set opt=0 if -i < -ci (can be unstable, case b1355)
1638 $rOpts->{'line-up-parentheses'}
1639 || ( $rOpts->{'indent-columns'} <=
1640 $rOpts->{'continuation-indentation'} )
1646 $container_indentation_options{$tok} = $opt;
1650 # Define here tokens which may follow the closing brace of a do statement
1651 # on the same line, as in:
1652 # } while ( $something);
1653 my @dof = qw(until while unless if ; : );
1655 @is_do_follower{@dof} = (1) x scalar(@dof);
1657 # what can follow a multi-line anonymous sub definition closing curly:
1658 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
1660 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
1662 # what can follow a one-line anonymous sub closing curly:
1663 # one-line anonymous subs also have ']' here...
1664 # see tk3.t and PP.pm
1665 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
1667 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
1669 # What can follow a closing curly of a block
1670 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
1671 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
1672 my @obf = qw# ; : => or and && || ) #;
1674 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
1676 $right_bond_strength{'{'} = WEAK;
1677 $left_bond_strength{'{'} = VERY_STRONG;
1679 # make -l=0 equal to -l=infinite
1680 if ( !$rOpts->{'maximum-line-length'} ) {
1681 $rOpts->{'maximum-line-length'} = 1_000_000;
1684 # make -lbl=0 equal to -lbl=infinite
1685 if ( !$rOpts->{'long-block-line-count'} ) {
1686 $rOpts->{'long-block-line-count'} = 1_000_000;
1689 my $ole = $rOpts->{'output-line-ending'};
1698 # Patch for RT #99514, a memoization issue.
1699 # Normally, the user enters one of 'dos', 'win', etc, and we change the
1700 # value in the options parameter to be the corresponding line ending
1701 # character. But, if we are using memoization, on later passes through
1702 # here the option parameter will already have the desired ending
1703 # character rather than the keyword 'dos', 'win', etc. So
1704 # we must check to see if conversion has already been done and, if so,
1705 # bypass the conversion step.
1706 my %endings_inverted = (
1707 "\015\012" => 'dos',
1708 "\015\012" => 'win',
1713 if ( defined( $endings_inverted{$ole} ) ) {
1715 # we already have valid line ending, nothing more to do
1719 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
1720 my $str = join SPACE, keys %endings;
1722 Unrecognized line ending '$ole'; expecting one of: $str
1725 if ( $rOpts->{'preserve-line-endings'} ) {
1726 Warn("Ignoring -ple; conflicts with -ole\n");
1727 $rOpts->{'preserve-line-endings'} = undef;
1732 # hashes used to simplify setting whitespace
1734 '{' => $rOpts->{'brace-tightness'},
1735 '}' => $rOpts->{'brace-tightness'},
1736 '(' => $rOpts->{'paren-tightness'},
1737 ')' => $rOpts->{'paren-tightness'},
1738 '[' => $rOpts->{'square-bracket-tightness'},
1739 ']' => $rOpts->{'square-bracket-tightness'},
1753 if ( $rOpts->{'ignore-old-breakpoints'} ) {
1756 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1757 $rOpts->{'break-at-old-method-breakpoints'} = 0;
1758 push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1760 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1761 $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1762 push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1764 if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1765 $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1766 push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1768 if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1769 $rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
1770 push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1772 if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1773 $rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
1774 push @conflicts, '--keep-old-breakpoints-after (-kba)';
1778 my $msg = join( "\n ",
1779 " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1785 # Note: These additional parameters are made inactive by -iob.
1786 # They are silently turned off here because they are on by default.
1787 # We would generate unexpected warnings if we issued a warning.
1788 $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
1789 $rOpts->{'break-at-old-logical-breakpoints'} = 0;
1790 $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
1791 $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1794 %keep_break_before_type = ();
1795 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1796 'kbb', \%keep_break_before_type );
1798 %keep_break_after_type = ();
1799 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1800 'kba', \%keep_break_after_type );
1802 $controlled_comma_style ||= $keep_break_before_type{','};
1803 $controlled_comma_style ||= $keep_break_after_type{','};
1805 #------------------------------------------------------------
1806 # Make global vars for frequently used options for efficiency
1807 #------------------------------------------------------------
1809 $rOpts_add_newlines = $rOpts->{'add-newlines'};
1810 $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'};
1811 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
1812 $rOpts_blank_lines_after_opening_block =
1813 $rOpts->{'blank-lines-after-opening-block'};
1814 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
1815 $rOpts_block_brace_vertical_tightness =
1816 $rOpts->{'block-brace-vertical-tightness'};
1817 $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
1818 $rOpts_break_at_old_attribute_breakpoints =
1819 $rOpts->{'break-at-old-attribute-breakpoints'};
1820 $rOpts_break_at_old_comma_breakpoints =
1821 $rOpts->{'break-at-old-comma-breakpoints'};
1822 $rOpts_break_at_old_keyword_breakpoints =
1823 $rOpts->{'break-at-old-keyword-breakpoints'};
1824 $rOpts_break_at_old_logical_breakpoints =
1825 $rOpts->{'break-at-old-logical-breakpoints'};
1826 $rOpts_break_at_old_semicolon_breakpoints =
1827 $rOpts->{'break-at-old-semicolon-breakpoints'};
1828 $rOpts_break_at_old_ternary_breakpoints =
1829 $rOpts->{'break-at-old-ternary-breakpoints'};
1830 $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
1831 $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
1832 $rOpts_closing_side_comment_else_flag =
1833 $rOpts->{'closing-side-comment-else-flag'};
1834 $rOpts_closing_side_comment_maximum_text =
1835 $rOpts->{'closing-side-comment-maximum-text'};
1836 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
1837 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
1838 $rOpts_delete_closing_side_comments =
1839 $rOpts->{'delete-closing-side-comments'};
1840 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
1841 $rOpts_extended_continuation_indentation =
1842 $rOpts->{'extended-continuation-indentation'};
1843 $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
1844 $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'};
1845 $rOpts_delete_weld_interfering_commas =
1846 $rOpts->{'delete-weld-interfering-commas'};
1847 $rOpts_format_skipping = $rOpts->{'format-skipping'};
1848 $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
1849 $rOpts_function_paren_vertical_alignment =
1850 $rOpts->{'function-paren-vertical-alignment'};
1851 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
1852 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
1853 $rOpts_ignore_side_comment_lengths =
1854 $rOpts->{'ignore-side-comment-lengths'};
1855 $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
1856 $rOpts_indent_columns = $rOpts->{'indent-columns'};
1857 $rOpts_indent_only = $rOpts->{'indent-only'};
1858 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
1859 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
1860 $rOpts_extended_line_up_parentheses =
1861 $rOpts->{'extended-line-up-parentheses'};
1862 $rOpts_logical_padding = $rOpts->{'logical-padding'};
1863 $rOpts_maximum_consecutive_blank_lines =
1864 $rOpts->{'maximum-consecutive-blank-lines'};
1865 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
1866 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
1867 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
1868 $rOpts_opening_brace_always_on_right =
1869 $rOpts->{'opening-brace-always-on-right'};
1870 $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
1871 $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
1872 $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
1873 $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
1874 $rOpts_outdent_static_block_comments =
1875 $rOpts->{'outdent-static-block-comments'};
1876 $rOpts_recombine = $rOpts->{'recombine'};
1877 $rOpts_short_concatenation_item_length =
1878 $rOpts->{'short-concatenation-item-length'};
1879 $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'};
1880 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
1881 $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
1882 $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'};
1883 $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
1884 $rOpts_tee_pod = $rOpts->{'tee-pod'};
1885 $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
1886 $rOpts_valign = $rOpts->{'valign'};
1887 $rOpts_valign_code = $rOpts->{'valign-code'};
1888 $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
1889 $rOpts_variable_maximum_line_length =
1890 $rOpts->{'variable-maximum-line-length'};
1892 # Note that both opening and closing tokens can access the opening
1893 # and closing flags of their container types.
1894 %opening_vertical_tightness = (
1895 '(' => $rOpts->{'paren-vertical-tightness'},
1896 '{' => $rOpts->{'brace-vertical-tightness'},
1897 '[' => $rOpts->{'square-bracket-vertical-tightness'},
1898 ')' => $rOpts->{'paren-vertical-tightness'},
1899 '}' => $rOpts->{'brace-vertical-tightness'},
1900 ']' => $rOpts->{'square-bracket-vertical-tightness'},
1903 %closing_vertical_tightness = (
1904 '(' => $rOpts->{'paren-vertical-tightness-closing'},
1905 '{' => $rOpts->{'brace-vertical-tightness-closing'},
1906 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1907 ')' => $rOpts->{'paren-vertical-tightness-closing'},
1908 '}' => $rOpts->{'brace-vertical-tightness-closing'},
1909 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1912 # assume flag for '>' same as ')' for closing qw quotes
1913 %closing_token_indentation = (
1914 ')' => $rOpts->{'closing-paren-indentation'},
1915 '}' => $rOpts->{'closing-brace-indentation'},
1916 ']' => $rOpts->{'closing-square-bracket-indentation'},
1917 '>' => $rOpts->{'closing-paren-indentation'},
1920 # flag indicating if any closing tokens are indented
1921 $some_closing_token_indentation =
1922 $rOpts->{'closing-paren-indentation'}
1923 || $rOpts->{'closing-brace-indentation'}
1924 || $rOpts->{'closing-square-bracket-indentation'}
1925 || $rOpts->{'indent-closing-brace'};
1927 %opening_token_right = (
1928 '(' => $rOpts->{'opening-paren-right'},
1929 '{' => $rOpts->{'opening-hash-brace-right'},
1930 '[' => $rOpts->{'opening-square-bracket-right'},
1933 %stack_opening_token = (
1934 '(' => $rOpts->{'stack-opening-paren'},
1935 '{' => $rOpts->{'stack-opening-hash-brace'},
1936 '[' => $rOpts->{'stack-opening-square-bracket'},
1939 %stack_closing_token = (
1940 ')' => $rOpts->{'stack-closing-paren'},
1941 '}' => $rOpts->{'stack-closing-hash-brace'},
1942 ']' => $rOpts->{'stack-closing-square-bracket'},
1945 # Create a table of maximum line length vs level for later efficient use.
1946 # We will make the tables very long to be sure it will not be exceeded.
1947 # But we have to choose a fixed length. A check will be made at the start
1948 # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
1949 # my standard test problems have indentation levels of about 150, so this
1950 # should be fairly large. If the choice of a maximum level ever becomes
1951 # an issue then these table values could be returned in a sub with a simple
1952 # memoization scheme.
1954 # Also create a table of the maximum spaces available for text due to the
1955 # level only. If a line has continuation indentation, then that space must
1956 # be subtracted from the table value. This table is used for preliminary
1957 # estimates in welding, extended_ci, BBX, and marking short blocks.
1958 use constant LEVEL_TABLE_MAX => 1000;
1961 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1962 my $indent = $level * $rOpts_indent_columns;
1963 $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
1964 $maximum_text_length_at_level[$level] =
1965 $rOpts_maximum_line_length - $indent;
1968 # Correct the maximum_text_length table if the -wc=n flag is used
1969 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
1970 if ($rOpts_whitespace_cycle) {
1971 if ( $rOpts_whitespace_cycle > 0 ) {
1972 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1973 my $level_mod = $level % $rOpts_whitespace_cycle;
1974 my $indent = $level_mod * $rOpts_indent_columns;
1975 $maximum_text_length_at_level[$level] =
1976 $rOpts_maximum_line_length - $indent;
1980 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
1984 # Correct the tables if the -vmll flag is used. These values override the
1986 if ($rOpts_variable_maximum_line_length) {
1987 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1988 $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
1989 $maximum_line_length_at_level[$level] =
1990 $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
1994 # Define two measures of indentation level, alpha and beta, at which some
1995 # formatting features come under stress and need to start shutting down.
1996 # Some combination of the two will be used to shut down different
1997 # formatting features.
1998 # Put a reasonable upper limit on stress level (say 100) in case the
1999 # whitespace-cycle variable is used.
2000 my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
2002 # Find stress_level_alpha, targeted at very short maximum line lengths.
2003 $stress_level_alpha = $stress_level_limit + 1;
2004 foreach my $level_test ( 0 .. $stress_level_limit ) {
2005 my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
2006 my $excess_inside_space =
2008 $rOpts_continuation_indentation -
2009 $rOpts_indent_columns - 8;
2010 if ( $excess_inside_space <= 0 ) {
2011 $stress_level_alpha = $level_test;
2016 # Find stress level beta, a stress level targeted at formatting
2017 # at deep levels near the maximum line length. We start increasing
2018 # from zero and stop at the first level which shows no more space.
2020 # 'const' is a fixed number of spaces for a typical variable.
2021 # Cases b1197-b1204 work ok with const=12 but not with const=8
2023 my $denom = max( 1, $rOpts_indent_columns );
2024 $stress_level_beta = 0;
2025 foreach my $level ( 0 .. $stress_level_limit ) {
2026 my $remaining_cycles = max(
2029 $maximum_text_length_at_level[$level] -
2030 $rOpts_continuation_indentation - $const
2033 last if ( $remaining_cycles <= 3 ); # 2 does not work
2034 $stress_level_beta = $level;
2037 # This is a combined level which works well for turning off formatting
2038 # features in most cases:
2039 $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
2041 %trailing_comma_rules = ();
2042 initialize_trailing_comma_rules();
2044 initialize_weld_nested_exclusion_rules();
2045 initialize_weld_fat_comma_rules();
2047 %line_up_parentheses_control_hash = ();
2048 $line_up_parentheses_control_is_lxpl = 1;
2049 my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
2050 my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
2051 if ( $lpxl && $lpil ) {
2053 You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
2057 $line_up_parentheses_control_is_lxpl = 1;
2058 initialize_line_up_parentheses_control_hash(
2059 $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
2062 $line_up_parentheses_control_is_lxpl = 0;
2063 initialize_line_up_parentheses_control_hash(
2064 $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
2068 } ## end sub check_options
2070 use constant ALIGN_GREP_ALIASES => 0;
2072 sub initialize_grep_and_friends {
2075 # Initialize or re-initialize hashes with 'grep' and grep aliases. This
2076 # must be done after each set of options because new grep aliases may be
2079 # re-initialize the hash ... this is critical!
2080 %is_sort_map_grep = ();
2082 my @q = qw(sort map grep);
2083 @is_sort_map_grep{@q} = (1) x scalar(@q);
2085 # Note that any 'grep-alias-list' string has been preprocessed to be a
2086 # trimmed, space-separated list.
2087 my @grep_aliases = split /\s+/, $str;
2088 @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
2090 ##@q = qw(sort map grep eval);
2091 %is_sort_map_grep_eval = %is_sort_map_grep;
2092 $is_sort_map_grep_eval{'eval'} = 1;
2094 ##@q = qw(sort map grep eval do);
2095 %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
2096 $is_sort_map_grep_eval_do{'do'} = 1;
2098 # These block types can take ci. This is used by the -xci option.
2099 # Note that the 'sub' in this list is an anonymous sub. To be more correct
2100 # we could remove sub and use ASUB pattern to also handle a
2101 # prototype/signature. But that would slow things down and would probably
2103 ##@q = qw( do sub eval sort map grep );
2104 %is_block_with_ci = %is_sort_map_grep_eval_do;
2105 $is_block_with_ci{'sub'} = 1;
2107 %is_keyword_returning_list = ();
2116 push @q, @grep_aliases;
2117 @is_keyword_returning_list{@q} = (1) x scalar(@q);
2119 # This code enables vertical alignment of grep aliases for testing. It has
2120 # not been found to be beneficial, so it is off by default. But it is
2121 # useful for precise testing of the grep alias coding.
2122 if (ALIGN_GREP_ALIASES) {
2134 $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
2138 } ## end sub initialize_grep_and_friends
2140 sub initialize_weld_nested_exclusion_rules {
2141 %weld_nested_exclusion_rules = ();
2143 my $opt_name = 'weld-nested-exclusion-list';
2144 my $str = $rOpts->{$opt_name};
2145 return unless ($str);
2148 return unless ($str);
2150 # There are four container tokens.
2158 # We are parsing an exclusion list for nested welds. The list is a string
2159 # with spaces separating any number of items. Each item consists of three
2160 # pieces of information:
2161 # <optional position> <optional type> <type of container>
2162 # < ^ or . > < k or K > < ( [ { >
2164 # The last character is the required container type and must be one of:
2166 # [ = square bracket
2169 # An optional leading position indicator:
2170 # ^ means the leading token position in the weld
2171 # . means a secondary token position in the weld
2172 # no position indicator means all positions match
2174 # An optional alphanumeric character between the position and container
2175 # token selects to which the rule applies:
2177 # K = any non-keyword
2179 # F = not a function call
2180 # w = function or keyword
2181 # W = not a function or keyword
2182 # no letter means any preceding type matches
2185 # ^( - the weld must not start with a paren
2186 # .( - the second and later tokens may not be parens
2187 # ( - no parens in weld
2188 # ^K( - exclude a leading paren not preceded by a keyword
2189 # .k( - exclude a secondary paren preceded by a keyword
2190 # [ { - exclude all brackets and braces
2192 my @items = split /\s+/, $str;
2195 foreach my $item (@items) {
2196 my $item_save = $item;
2197 my $tok = chop($item);
2198 my $key = $token_keys{$tok};
2199 if ( !defined($key) ) {
2200 $msg1 .= " '$item_save'";
2203 if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
2204 $weld_nested_exclusion_rules{$key} = [];
2206 my $rflags = $weld_nested_exclusion_rules{$key};
2208 # A 'q' means do not weld quotes
2209 if ( $tok eq 'q' ) {
2218 if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
2220 $select = $2 if ($2);
2223 $msg1 .= " '$item_save'";
2229 if ( $pos eq '^' || $pos eq '*' ) {
2230 if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
2233 $rflags->[0] = $select;
2235 if ( $pos eq '.' || $pos eq '*' ) {
2236 if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
2239 $rflags->[1] = $select;
2241 if ($err) { $msg2 .= " '$item_save'"; }
2245 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2251 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
2253 Only the last will be used.
2257 } ## end sub initialize_weld_nested_exclusion_rules
2259 sub initialize_weld_fat_comma_rules {
2261 # Initialize a hash controlling which opening token types can be
2262 # welded around a fat comma
2263 %weld_fat_comma_rules = ();
2265 # The -wfc flag turns on welding of '=>' after an opening paren
2266 if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 }
2268 # This could be generalized in the future by introducing a parameter
2269 # -weld-fat-comma-after=str (-wfca=str), where str contains any of:
2271 # to indicate which opening parens may weld to a subsequent '=>'
2273 # The flag -wfc would then be equivalent to -wfca='('
2275 # This has not been done because it is not yet clear how useful
2276 # this generalization would be.
2278 } ## end sub initialize_weld_fat_comma_rules
2280 sub initialize_line_up_parentheses_control_hash {
2281 my ( $str, $opt_name ) = @_;
2282 return unless ($str);
2285 return unless ($str);
2287 # The format is space separated items, where each item must consist of a
2288 # string with a token type preceded by an optional text token and followed
2292 # = (flag1)(key)(flag2), where
2297 my @items = split /\s+/, $str;
2300 foreach my $item (@items) {
2301 my $item_save = $item;
2302 my ( $flag1, $key, $flag2 );
2303 if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2309 $msg1 .= " '$item_save'";
2313 if ( !defined($key) ) {
2314 $msg1 .= " '$item_save'";
2318 # Check for valid flag1
2319 if ( !defined($flag1) ) { $flag1 = '*' }
2320 elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2321 $msg1 .= " '$item_save'";
2325 # Check for valid flag2
2326 # 0 or blank: ignore container contents
2327 # 1 all containers with sublists match
2328 # 2 all containers with sublists, code blocks or ternary operators match
2329 # ... this could be extended in the future
2330 if ( !defined($flag2) ) { $flag2 = 0 }
2331 elsif ( $flag2 !~ /^[012]$/ ) {
2332 $msg1 .= " '$item_save'";
2336 if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2337 $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2341 # check for multiple conflicting specifications
2342 my $rflags = $line_up_parentheses_control_hash{$key};
2344 if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2346 $rflags->[0] = $flag1;
2348 if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2350 $rflags->[1] = $flag2;
2352 $msg2 .= " '$item_save'" if ($err);
2357 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2363 Multiple specifications were encountered in the $opt_name at:
2365 Only the last will be used.
2369 # Speedup: we can turn off -lp if it is not actually used
2370 if ($line_up_parentheses_control_is_lxpl) {
2372 foreach my $key (qw# ( { [ #) {
2373 my $rflags = $line_up_parentheses_control_hash{$key};
2374 if ( defined($rflags) ) {
2375 my ( $flag1, $flag2 ) = @{$rflags};
2376 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
2377 if ($flag2) { $all_off = 0; last }
2381 $rOpts->{'line-up-parentheses'} = EMPTY_STRING;
2386 } ## end sub initialize_line_up_parentheses_control_hash
2388 use constant DEBUG_KB => 0;
2390 sub initialize_keep_old_breakpoints {
2391 my ( $str, $short_name, $rkeep_break_hash ) = @_;
2395 my @list = split_words($str);
2396 if ( DEBUG_KB && @list ) {
2397 local $LIST_SEPARATOR = SPACE;
2399 DEBUG_KB entering for '$short_name' with str=$str\n";
2404 # Ignore kbb='(' and '[' and '{': can cause unstable math formatting
2405 # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
2406 if ( $short_name eq 'kbb' ) {
2407 @list = grep { !m/[\(\[\{]/ } @list;
2409 elsif ( $short_name eq 'kba' ) {
2410 @list = grep { !m/[\)\]\}]/ } @list;
2413 # pull out any any leading container code, like f( or *{
2414 # For example: 'f(' becomes flags hash entry '(' => 'f'
2415 foreach my $item (@list) {
2416 if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2423 foreach my $type (@list) {
2424 if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2425 push @unknown_types, $type;
2429 if (@unknown_types) {
2430 my $num = @unknown_types;
2431 local $LIST_SEPARATOR = SPACE;
2433 $num unrecognized token types were input with --$short_name :
2438 @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
2440 foreach my $key ( keys %flags ) {
2441 my $flag = $flags{$key};
2443 if ( length($flag) != 1 ) {
2445 Multiple entries given for '$key' in '$short_name'
2448 elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2450 Unknown flag '$flag' given for '$key' in '$short_name'
2453 elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2455 Unknown flag '$flag' given for '$key' in '$short_name'
2459 $rkeep_break_hash->{$key} = $flag;
2462 if ( DEBUG_KB && @list ) {
2464 local $LIST_SEPARATOR = SPACE;
2467 DEBUG_KB -$short_name flag: $str
2476 } ## end sub initialize_keep_old_breakpoints
2478 sub initialize_trailing_comma_rules {
2480 # Setup control hash for trailing commas
2482 # -wtc=s defines desired trailing comma policy:
2485 # [ both -atc and -dtc ignored ]
2487 # [requires -dtc; -atc ignored]
2489 # [requires -atc; -dtc ignored]
2490 # =m : multiline lists require trailing comma
2491 # if -atc set => will add missing multiline trailing commas
2492 # if -dtc set => will delete trailing single line commas
2493 # =b or 'bare' (multiline) lists require trailing comma
2494 # if -atc set => will add missing bare trailing commas
2495 # if -dtc set => will delete non-bare trailing commas
2496 # =h or 'hash': single column stable bare lists require trailing comma
2497 # if -atc set will add these
2498 # if -dtc set will delete other trailing commas
2500 # This routine must be called after the alpha and beta stress levels
2501 # have been defined.
2503 my $rvalid_flags = [qw(0 1 * m b h i)];
2505 my $option = $rOpts->{'want-trailing-commas'};
2508 $option =~ s/^\s+//;
2509 $option =~ s/\s+$//;
2511 if ( defined($option) && length($option) ) {
2514 my @q = @{$rvalid_flags};
2516 @is_valid_flag{@q} = (1) x scalar(@q);
2518 # handle single character control, such as -wtc='b'
2519 if ( length($option) == 1 ) {
2520 foreach (qw< ) ] } >) {
2521 $rule_hash{$_} = [ $option, EMPTY_STRING ];
2525 # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m'
2527 my @parts = split /\s+/, $option;
2528 foreach my $part (@parts) {
2529 if ( length($part) >= 2 && length($part) <= 3 ) {
2530 my $val = substr( $part, -1, 1 );
2531 my $key_o = substr( $part, -2, 1 );
2532 if ( $is_opening_token{$key_o} ) {
2533 my $paren_flag = EMPTY_STRING;
2534 if ( length($part) == 3 ) {
2535 $paren_flag = substr( $part, 0, 1 );
2537 my $key = $matching_token{$key_o};
2538 $rule_hash{$key} = [ $val, $paren_flag ];
2541 $error_message .= "Unrecognized term: '$part'\n";
2545 $error_message .= "Unrecognized term: '$part'\n";
2550 # check for valid control characters
2551 if ( !$error_message ) {
2552 foreach my $key ( keys %rule_hash ) {
2553 my $item = $rule_hash{$key};
2554 my ( $val, $paren_flag ) = @{$item};
2555 if ( $val && !$is_valid_flag{$val} ) {
2556 my $valid_str = join( SPACE, @{$rvalid_flags} );
2558 "Unexpected value '$val'; must be one of: $valid_str\n";
2562 if ( $paren_flag !~ /^[kKfFwW]$/ ) {
2564 "Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
2567 if ( $key ne ')' ) {
2569 "paren flag '$paren_flag' is only allowed before a '('\n";
2576 if ($error_message) {
2578 Error parsing --want-trailing-commas='$option':
2583 # Set the control hash if no errors
2585 %trailing_comma_rules = %rule_hash;
2589 # Both adding and deleting commas can lead to instability in extreme cases
2590 if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) {
2592 # If the possible instability is significant, then we can turn off
2593 # -dtc as a defensive measure to prevent it.
2595 # We must turn off -dtc for very small values of --whitespace-cycle
2596 # to avoid instability. A minimum value of -wc=3 fixes b1393, but a
2597 # value of 4 is used here for safety. This parameter is seldom used,
2598 # and much larger than this when used, so the cutoff value is not
2600 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) {
2601 $rOpts_delete_trailing_commas = 0;
2608 sub initialize_whitespace_hashes {
2610 # This is called once before formatting begins to initialize these global
2611 # hashes, which control the use of whitespace around tokens:
2616 # %space_after_keyword
2618 # Many token types are identical to the tokens themselves.
2619 # See the tokenizer for a complete list. Here are some special types:
2621 # f = semicolon in for statement
2624 # Note that :: is excluded since it should be contained in an identifier
2625 # Note that '->' is excluded because it never gets space
2626 # parentheses and brackets are excluded since they are handled specially
2627 # curly braces are included but may be overridden by logic, such as
2630 # NEW_TOKENS: create a whitespace rule here. This can be as
2631 # simple as adding your new letter to @spaces_both_sides, for
2634 my @opening_type = qw< L { ( [ >;
2635 @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
2637 my @closing_type = qw< R } ) ] >;
2638 @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
2640 my @spaces_both_sides = qw#
2641 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2642 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2643 &&= ||= //= <=> A k f w F n C Y U G v
2646 my @spaces_left_side = qw<
2647 t ! ~ m p { \ h pp mm Z j
2649 push( @spaces_left_side, '#' ); # avoids warning message
2651 my @spaces_right_side = qw<
2652 ; } ) ] R J ++ -- **=
2654 push( @spaces_right_side, ',' ); # avoids warning message
2656 %want_left_space = ();
2657 %want_right_space = ();
2658 %binary_ws_rules = ();
2660 # Note that we setting defaults here. Later in processing
2661 # the values of %want_left_space and %want_right_space
2662 # may be overridden by any user settings specified by the
2663 # -wls and -wrs parameters. However the binary_whitespace_rules
2664 # are hardwired and have priority.
2665 @want_left_space{@spaces_both_sides} =
2666 (1) x scalar(@spaces_both_sides);
2667 @want_right_space{@spaces_both_sides} =
2668 (1) x scalar(@spaces_both_sides);
2669 @want_left_space{@spaces_left_side} =
2670 (1) x scalar(@spaces_left_side);
2671 @want_right_space{@spaces_left_side} =
2672 (-1) x scalar(@spaces_left_side);
2673 @want_left_space{@spaces_right_side} =
2674 (-1) x scalar(@spaces_right_side);
2675 @want_right_space{@spaces_right_side} =
2676 (1) x scalar(@spaces_right_side);
2677 $want_left_space{'->'} = WS_NO;
2678 $want_right_space{'->'} = WS_NO;
2679 $want_left_space{'**'} = WS_NO;
2680 $want_right_space{'**'} = WS_NO;
2681 $want_right_space{'CORE::'} = WS_NO;
2683 # These binary_ws_rules are hardwired and have priority over the above
2684 # settings. It would be nice to allow adjustment by the user,
2685 # but it would be complicated to specify.
2687 # hash type information must stay tightly bound
2689 $binary_ws_rules{'i'}{'L'} = WS_NO;
2690 $binary_ws_rules{'i'}{'{'} = WS_YES;
2691 $binary_ws_rules{'k'}{'{'} = WS_YES;
2692 $binary_ws_rules{'U'}{'{'} = WS_YES;
2693 $binary_ws_rules{'i'}{'['} = WS_NO;
2694 $binary_ws_rules{'R'}{'L'} = WS_NO;
2695 $binary_ws_rules{'R'}{'{'} = WS_NO;
2696 $binary_ws_rules{'t'}{'L'} = WS_NO;
2697 $binary_ws_rules{'t'}{'{'} = WS_NO;
2698 $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
2699 $binary_ws_rules{'}'}{'L'} = WS_NO;
2700 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
2701 $binary_ws_rules{'$'}{'L'} = WS_NO;
2702 $binary_ws_rules{'$'}{'{'} = WS_NO;
2703 $binary_ws_rules{'@'}{'L'} = WS_NO;
2704 $binary_ws_rules{'@'}{'{'} = WS_NO;
2705 $binary_ws_rules{'='}{'L'} = WS_YES;
2706 $binary_ws_rules{'J'}{'J'} = WS_YES;
2708 # the following includes ') {'
2709 # as in : if ( xxx ) { yyy }
2710 $binary_ws_rules{']'}{'L'} = WS_NO;
2711 $binary_ws_rules{']'}{'{'} = WS_NO;
2712 $binary_ws_rules{')'}{'{'} = WS_YES;
2713 $binary_ws_rules{')'}{'['} = WS_NO;
2714 $binary_ws_rules{']'}{'['} = WS_NO;
2715 $binary_ws_rules{']'}{'{'} = WS_NO;
2716 $binary_ws_rules{'}'}{'['} = WS_NO;
2717 $binary_ws_rules{'R'}{'['} = WS_NO;
2719 $binary_ws_rules{']'}{'++'} = WS_NO;
2720 $binary_ws_rules{']'}{'--'} = WS_NO;
2721 $binary_ws_rules{')'}{'++'} = WS_NO;
2722 $binary_ws_rules{')'}{'--'} = WS_NO;
2724 $binary_ws_rules{'R'}{'++'} = WS_NO;
2725 $binary_ws_rules{'R'}{'--'} = WS_NO;
2727 $binary_ws_rules{'i'}{'Q'} = WS_YES;
2728 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
2730 $binary_ws_rules{'i'}{'('} = WS_NO;
2732 $binary_ws_rules{'w'}{'('} = WS_NO;
2733 $binary_ws_rules{'w'}{'{'} = WS_YES;
2736 } ## end sub initialize_whitespace_hashes
2738 { #<<< begin closure set_whitespace_flags
2740 my %is_special_ws_type;
2746 # The following hash is used to skip over needless if tests.
2747 # Be sure to update it when adding new checks in its block.
2748 my @q = qw(k w C m - Q);
2750 @is_special_ws_type{@q} = (1) x scalar(@q);
2752 # These hashes replace slower regex tests
2754 @is_wCUG{@q} = (1) x scalar(@q);
2757 @is_wi{@q} = (1) x scalar(@q);
2760 use constant DEBUG_WHITE => 0;
2768 $j_tight_closing_paren,
2776 # Hashes to set spaces around container tokens according to their
2777 # sequence numbers. These are set as keywords are examined.
2778 # They are controlled by the -kpit and -kpitl flags.
2779 my %opening_container_inside_ws;
2780 my %closing_container_inside_ws;
2782 sub set_whitespace_flags {
2784 # This routine is called once per file to set whitespace flags for that
2785 # file. This routine examines each pair of nonblank tokens and sets a flag
2786 # indicating if white space is needed.
2788 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2789 # BEFORE token $j is needed, with the following values:
2791 # WS_NO = -1 do not want a space BEFORE token $j
2792 # WS_OPTIONAL= 0 optional space or $j is a whitespace
2793 # WS_YES = 1 want a space BEFORE token $j
2798 # initialize closure variables
2799 $rLL = $self->[_rLL_];
2800 $jmax = @{$rLL} - 1;
2802 $j_tight_closing_paren = -1;
2805 $last_token = EMPTY_STRING;
2807 %opening_container_inside_ws = ();
2808 %closing_container_inside_ws = ();
2810 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2812 my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
2813 my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2814 my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
2816 my $rwhitespace_flags = [];
2817 my $ris_function_call_paren = {};
2819 return $rwhitespace_flags if ( $jmax < 0 );
2821 my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2824 my $rtokh_last = $rLL->[0];
2825 my $rtokh_last_last = $rtokh_last;
2827 my $last_type = EMPTY_STRING;
2829 $rtokh = [ @{ $rLL->[0] } ];
2831 $rtokh->[_TOKEN_] = $token;
2832 $rtokh->[_TYPE_] = $type;
2833 $rtokh->[_TYPE_SEQUENCE_] = EMPTY_STRING;
2834 $rtokh->[_LINE_INDEX_] = 0;
2836 my ( $ws_1, $ws_2, $ws_3, $ws_4 );
2838 # main loop over all tokens to define the whitespace flags
2839 foreach my $j ( 0 .. $jmax ) {
2841 if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
2842 $rwhitespace_flags->[$j] = WS_OPTIONAL;
2846 $last_token = $token;
2849 if ( $type ne '#' ) {
2850 $rtokh_last_last = $rtokh_last;
2851 $rtokh_last = $rtokh;
2854 $rtokh = $rLL->[$j];
2855 $token = $rtokh->[_TOKEN_];
2856 $type = $rtokh->[_TYPE_];
2860 #---------------------------------------------------------------
2861 # Whitespace Rules Section 1:
2862 # Handle space on the inside of opening braces.
2863 #---------------------------------------------------------------
2866 if ( $is_opening_type{$last_type} ) {
2868 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2869 my $block_type = $rblock_type_of_seqno->{$seqno};
2870 my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
2871 my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
2873 $j_tight_closing_paren = -1;
2875 # let us keep empty matched braces together: () {} []
2877 if ( $token eq $matching_token{$last_token} ) {
2887 # we're considering the right of an opening brace
2888 # tightness = 0 means always pad inside with space
2889 # tightness = 1 means pad inside if "complex"
2890 # tightness = 2 means never pad inside with space
2893 if ( $last_type eq '{'
2894 && $last_token eq '{'
2895 && $last_block_type )
2897 $tightness = $rOpts_block_brace_tightness;
2899 else { $tightness = $tightness{$last_token} }
2901 #=============================================================
2902 # Patch for test problem <<snippets/fabrice_bug.in>>
2903 # We must always avoid spaces around a bare word beginning
2905 # my $before = ${^PREMATCH};
2906 # Because all of the following cause an error in perl:
2907 # my $before = ${ ^PREMATCH };
2908 # my $before = ${ ^PREMATCH};
2909 # my $before = ${^PREMATCH };
2910 # So if brace tightness flag is -bt=0 we must temporarily reset
2911 # to bt=1. Note that here we must set tightness=1 and not 2 so
2912 # that the closing space is also avoided
2913 # (via the $j_tight_closing_paren flag in coding)
2914 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2916 #=============================================================
2918 if ( $tightness <= 0 ) {
2921 elsif ( $tightness > 1 ) {
2925 $ws = ws_in_container($j);
2929 # check for special cases which override the above rules
2930 if ( %opening_container_inside_ws && $last_seqno ) {
2931 my $ws_override = $opening_container_inside_ws{$last_seqno};
2932 if ($ws_override) { $ws = $ws_override }
2935 $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
2938 } ## end setting space flag inside opening tokens
2940 #---------------------------------------------------------------
2941 # Whitespace Rules Section 2:
2942 # Special checks for certain types ...
2943 #---------------------------------------------------------------
2944 # The hash '%is_special_ws_type' significantly speeds up this routine,
2945 # but be sure to update it if a new check is added.
2946 # Currently has types: qw(k w C m - Q #)
2947 if ( $is_special_ws_type{$type} ) {
2949 if ( $type eq 'k' ) {
2951 # Keywords 'for', 'foreach' are special cases for -kpit since
2952 # the opening paren does not always immediately follow the
2953 # keyword. So we have to search forward for the paren in this
2954 # case. I have limited the search to 10 tokens ahead, just in
2955 # case somebody has a big file and no opening paren. This
2956 # should be enough for all normal code. Added the level check
2958 if ( $is_for_foreach{$token}
2959 && %keyword_paren_inner_tightness
2960 && defined( $keyword_paren_inner_tightness{$token} )
2963 my $level = $rLL->[$j]->[_LEVEL_];
2965 ## NOTE: we might use the KNEXT variable to avoid this loop
2966 ## but profiling shows that little would be saved
2967 foreach my $inc ( 1 .. 9 ) {
2969 last if ( $jp > $jmax );
2970 last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
2971 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
2972 my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
2973 set_container_ws_by_keyword( $token, $seqno_p );
2979 # retain any space between '-' and bare word
2980 elsif ( $type eq 'w' || $type eq 'C' ) {
2981 $ws = WS_OPTIONAL if $last_type eq '-';
2984 # retain any space between '-' and bare word; for example
2985 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2986 # $myhash{USER-NAME}='steve';
2987 elsif ( $type eq 'm' || $type eq '-' ) {
2988 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2991 # always space before side comment
2992 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2994 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
2995 # allow a space between a backslash and single or double quote
2996 # to avoid fooling html formatters
2997 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
2999 if ($rOpts_space_backslash_quote) {
3000 if ( $rOpts_space_backslash_quote == 1 ) {
3003 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
3004 else { } # shouldnt happen
3010 } ## end elsif ( $is_special_ws_type{$type} ...
3012 #---------------------------------------------------------------
3013 # Whitespace Rules Section 3:
3014 # Handle space on inside of closing brace pairs.
3015 #---------------------------------------------------------------
3018 elsif ( $is_closing_type{$type} ) {
3020 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3021 if ( $j == $j_tight_closing_paren ) {
3023 $j_tight_closing_paren = -1;
3028 if ( !defined($ws) ) {
3031 my $block_type = $rblock_type_of_seqno->{$seqno};
3032 if ( $type eq '}' && $token eq '}' && $block_type ) {
3033 $tightness = $rOpts_block_brace_tightness;
3035 else { $tightness = $tightness{$token} }
3037 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
3041 # check for special cases which override the above rules
3042 if ( %closing_container_inside_ws && $seqno ) {
3043 my $ws_override = $closing_container_inside_ws{$seqno};
3044 if ($ws_override) { $ws = $ws_override }
3047 $ws_4 = $ws_3 = $ws_2 = $ws
3049 } ## end setting space flag inside closing tokens
3051 #---------------------------------------------------------------
3052 # Whitespace Rules Section 4:
3053 #---------------------------------------------------------------
3055 elsif ( $is_opening_type{$type} ) {
3057 if ( $token eq '(' ) {
3059 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3061 # This will have to be tweaked as tokenization changes.
3062 # We usually want a space at '} (', for example:
3063 # <<snippets/space1.in>>
3064 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
3067 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
3068 # At present, the above & block is marked as type L/R so this
3069 # case won't go through here.
3070 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
3072 # NOTE: some older versions of Perl had occasional problems if
3073 # spaces are introduced between keywords or functions and
3074 # opening parens. So the default is not to do this except is
3075 # certain cases. The current Perl seems to tolerate spaces.
3077 # Space between keyword and '('
3078 elsif ( $last_type eq 'k' ) {
3080 unless ( $rOpts_space_keyword_paren
3081 || $space_after_keyword{$last_token} );
3083 # Set inside space flag if requested
3084 set_container_ws_by_keyword( $last_token, $seqno );
3087 # Space between function and '('
3088 # -----------------------------------------------------
3089 # 'w' and 'i' checks for something like:
3090 # myfun( &myfun( ->myfun(
3091 # -----------------------------------------------------
3093 # Note that at this point an identifier may still have a
3094 # leading arrow, but the arrow will be split off during token
3095 # respacing. After that, the token may become a bare word
3096 # without leading arrow. The point is, it is best to mark
3097 # function call parens right here before that happens.
3098 # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
3099 # NOTE: this would be the place to allow spaces between
3100 # repeated parens, like () () (), as in case c017, but I
3101 # decided that would not be a good idea.
3103 # Updated to allow detached '->' from tokenizer (issue c140)
3107 $is_wCUG{$last_type}
3116 # with prefix '->' or '&'
3117 $last_token =~ /^([\&]|->)/
3119 # or preceding token '->' (see b1337; c140)
3120 || $rtokh_last_last->[_TYPE_] eq '->'
3122 # or preceding sub call operator token '&'
3123 || ( $rtokh_last_last->[_TYPE_] eq 't'
3124 && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ )
3129 $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
3130 set_container_ws_by_keyword( $last_token, $seqno );
3131 $ris_function_call_paren->{$seqno} = 1;
3134 # space between something like $i and ( in 'snippets/space2.in'
3135 # for $i ( 0 .. 20 ) {
3136 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
3140 # allow constant function followed by '()' to retain no space
3141 elsif ($last_type eq 'C'
3142 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
3148 # patch for SWITCH/CASE: make space at ']{' optional
3149 # since the '{' might begin a case or when block
3150 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
3154 # keep space between 'sub' and '{' for anonymous sub definition,
3155 # be sure type = 'k' (added for c140)
3156 if ( $type eq '{' ) {
3157 if ( $last_token eq 'sub' && $last_type eq 'k' ) {
3161 # this is needed to avoid no space in '){'
3162 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
3164 # avoid any space before the brace or bracket in something like
3165 # @opts{'a','b',...}
3166 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
3170 } ## end if ( $is_opening_type{$type} ) {
3172 # always preserver whatever space was used after a possible
3173 # filehandle (except _) or here doc operator
3176 && ( ( $last_type eq 'Z' && $last_token ne '_' )
3177 || $last_type eq 'h' )
3186 if ( !defined($ws) ) {
3188 #---------------------------------------------------------------
3189 # Whitespace Rules Section 4:
3190 # Use the binary rule table.
3191 #---------------------------------------------------------------
3192 $ws = $binary_ws_rules{$last_type}{$type};
3193 $ws_4 = $ws if DEBUG_WHITE;
3195 #---------------------------------------------------------------
3196 # Whitespace Rules Section 5:
3197 # Apply default rules not covered above.
3198 #---------------------------------------------------------------
3200 # If we fall through to here, look at the pre-defined hash tables
3201 # for the two tokens, and:
3202 # if (they are equal) use the common value
3203 # if (either is zero or undef) use the other
3204 # if (either is -1) use it
3218 if ( !defined($ws) ) {
3219 my $wl = $want_left_space{$type};
3220 my $wr = $want_right_space{$last_type};
3221 if ( !defined($wl) ) {
3222 $ws = defined($wr) ? $wr : 0;
3224 elsif ( !defined($wr) ) {
3229 ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3234 # Treat newline as a whitespace. Otherwise, we might combine
3235 # 'Send' and '-recipients' here according to the above rules:
3236 # <<snippets/space3.in>>
3237 # my $msg = new Fax::Send
3238 # -recipients => $to,
3241 && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3246 $rwhitespace_flags->[$j] = $ws;
3248 next if ( !DEBUG_WHITE );
3250 my $str = substr( $last_token, 0, 15 );
3251 $str .= SPACE x ( 16 - length($str) );
3252 if ( !defined($ws_1) ) { $ws_1 = "*" }
3253 if ( !defined($ws_2) ) { $ws_2 = "*" }
3254 if ( !defined($ws_3) ) { $ws_3 = "*" }
3255 if ( !defined($ws_4) ) { $ws_4 = "*" }
3257 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3259 # reset for next pass
3260 $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3264 if ( $rOpts->{'tight-secret-operators'} ) {
3265 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3267 $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3268 return $rwhitespace_flags;
3270 } ## end sub set_whitespace_flags
3272 sub set_container_ws_by_keyword {
3274 my ( $word, $sequence_number ) = @_;
3275 return unless (%keyword_paren_inner_tightness);
3277 # We just saw a keyword (or other function name) followed by an opening
3278 # paren. Now check to see if the following paren should have special
3279 # treatment for its inside space. If so we set a hash value using the
3280 # sequence number as key.
3281 if ( $word && $sequence_number ) {
3282 my $tightness = $keyword_paren_inner_tightness{$word};
3283 if ( defined($tightness) && $tightness != 1 ) {
3284 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
3285 $opening_container_inside_ws{$sequence_number} = $ws_flag;
3286 $closing_container_inside_ws{$sequence_number} = $ws_flag;
3290 } ## end sub set_container_ws_by_keyword
3292 sub ws_in_container {
3295 if ( $j + 1 > $jmax ) { return (WS_NO) }
3297 # Patch to count '-foo' as single token so that
3298 # each of $a{-foo} and $a{foo} and $a{'foo'} do
3299 # not get spaces with default formatting.
3303 && $last_token eq '{'
3304 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
3306 # Patch to count a sign separated from a number as a single token, as
3307 # in the following line. Otherwise, it takes two steps to converge:
3309 if ( ( $type eq 'm' || $type eq 'p' )
3311 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
3312 && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
3313 && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
3318 # $j_next is where a closing token should be if
3319 # the container has a single token
3320 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
3322 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
3326 if ( $j_next > $jmax ) { return WS_NO }
3327 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
3328 my $type_next = $rLL->[$j_next]->[_TYPE_];
3330 # for tightness = 1, if there is just one token
3331 # within the matching pair, we will keep it tight
3333 $tok_next eq $matching_token{$last_token}
3335 # but watch out for this: [ [ ] (misc.t)
3336 && $last_token ne $token
3338 # double diamond is usually spaced
3344 # remember where to put the space for the closing paren
3345 $j_tight_closing_paren = $j_next;
3349 } ## end sub ws_in_container
3351 } ## end closure set_whitespace_flags
3353 sub dump_want_left_space {
3355 local $LIST_SEPARATOR = "\n";
3357 These values are the main control of whitespace to the left of a token type;
3358 They may be altered with the -wls parameter.
3359 For a list of token types, use perltidy --dump-token-types (-dtt)
3360 1 means the token wants a space to its left
3361 -1 means the token does not want a space to its left
3362 ------------------------------------------------------------------------
3364 foreach my $key ( sort keys %want_left_space ) {
3365 $fh->print("$key\t$want_left_space{$key}\n");
3368 } ## end sub dump_want_left_space
3370 sub dump_want_right_space {
3372 local $LIST_SEPARATOR = "\n";
3374 These values are the main control of whitespace to the right of a token type;
3375 They may be altered with the -wrs parameter.
3376 For a list of token types, use perltidy --dump-token-types (-dtt)
3377 1 means the token wants a space to its right
3378 -1 means the token does not want a space to its right
3379 ------------------------------------------------------------------------
3381 foreach my $key ( sort keys %want_right_space ) {
3382 $fh->print("$key\t$want_right_space{$key}\n");
3385 } ## end sub dump_want_right_space
3387 { ## begin closure is_essential_whitespace
3389 my %is_sort_grep_map;
3393 my %essential_whitespace_filter_l1;
3394 my %essential_whitespace_filter_r1;
3395 my %essential_whitespace_filter_l2;
3396 my %essential_whitespace_filter_r2;
3397 my %is_type_with_space_before_bareword;
3398 my %is_special_variable_char;
3404 # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3405 # grep aliases on purpose, since here we are looking parens, not braces
3406 @q = qw(sort grep map);
3407 @is_sort_grep_map{@q} = (1) x scalar(@q);
3409 @q = qw(for foreach);
3410 @is_for_foreach{@q} = (1) x scalar(@q);
3413 .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
3414 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3416 @is_digraph{@q} = (1) x scalar(@q);
3418 @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3419 @is_trigraph{@q} = (1) x scalar(@q);
3421 # These are used as a speedup filters for sub is_essential_whitespace.
3424 # These left side token types USUALLY do not require a space:
3425 @q = qw( ; { } [ ] L R );
3429 @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3431 # BUT some might if followed by these right token types
3432 @q = qw( pp mm << <<= h );
3433 @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3436 # These right side filters usually do not require a space
3440 @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3442 # BUT some might if followed by these left token types
3444 @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3446 # Keep a space between certain types and any bareword:
3447 # Q: keep a space between a quote and a bareword to prevent the
3448 # bareword from becoming a quote modifier.
3449 # &: do not remove space between an '&' and a bare word because
3450 # it may turn into a function evaluation, like here
3451 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3452 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3454 @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3456 # These are the only characters which can (currently) form special
3457 # variables, like $^W: (issue c066, c068).
3459 qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3460 @{is_special_variable_char}{@q} = (1) x scalar(@q);
3464 sub is_essential_whitespace {
3466 # Essential whitespace means whitespace which cannot be safely deleted
3467 # without risking the introduction of a syntax error.
3468 # We are given three tokens and their types:
3469 # ($tokenl, $typel) is the token to the left of the space in question
3470 # ($tokenr, $typer) is the token to the right of the space in question
3471 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3473 # Note1: This routine should almost never need to be changed. It is
3474 # for avoiding syntax problems rather than for formatting.
3476 # Note2: The -mangle option causes large numbers of calls to this
3477 # routine and therefore is a good test. So if a change is made, be sure
3478 # to use nytprof to profile with both old and reviesed coding using the
3479 # -mangle option and check differences.
3481 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3483 # This is potentially a very slow routine but the following quick
3484 # filters typically catch and handle over 90% of the calls.
3486 # Filter 1: usually no space required after common types ; , [ ] { } ( )
3488 if ( $essential_whitespace_filter_l1{$typel}
3489 && !$essential_whitespace_filter_r1{$typer} );
3491 # Filter 2: usually no space before common types ; ,
3493 if ( $essential_whitespace_filter_r2{$typer}
3494 && !$essential_whitespace_filter_l2{$typel} );
3496 # Filter 3: Handle side comments: a space is only essential if the left
3497 # token ends in '$' For example, we do not want to create $#foo below:
3506 # Also, I prefer not to put a ? and # together because ? used to be
3507 # a pattern delimiter and spacing was used if guessing was needed.
3509 if ( $typer eq '#' ) {
3513 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3517 my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3518 my $tokenr_is_open_paren = $tokenr eq '(';
3519 my $token_joined = $tokenl . $tokenr;
3520 my $tokenl_is_dash = $tokenl eq '-';
3524 # never combine two bare words or numbers
3525 # examples: and ::ok(1)
3527 # for bla::bla:: abc
3528 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3529 # $input eq"quit" to make $inputeq"quit"
3530 # my $size=-s::SINK if $file; <==OK but we won't do it
3531 # don't join something like: for bla::bla:: abc
3532 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3533 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3534 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3536 # do not combine a number with a concatenation dot
3537 # example: pom.caputo:
3538 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3539 || $typel eq 'n' && $tokenr eq '.'
3540 || $typer eq 'n' && $tokenl eq '.'
3542 # cases of a space before a bareword...
3544 $tokenr_is_bareword && (
3546 # do not join a minus with a bare word, because you might form
3547 # a file test operator. Example from Complex.pm:
3548 # if (CORE::abs($z - i) < $eps);
3549 # "z-i" would be taken as a file test.
3550 $tokenl_is_dash && length($tokenr) == 1
3552 # and something like this could become ambiguous without space
3554 # use constant III=>1;
3558 || $tokenl_is_dash && $typer =~ /^[wC]$/
3560 # keep space between types Q & and a bareword
3561 || $is_type_with_space_before_bareword{$typel}
3563 # +-: binary plus and minus before a bareword could get
3564 # converted into unary plus and minus on next pass through the
3565 # tokenizer. This can lead to blinkers: cases b660 b670 b780
3566 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3567 # follows an operator
3568 || ( ( $typel eq '+' || $typel eq '-' )
3569 && $typell !~ /^[niC\)\}\]R]$/ )
3571 # keep a space between a token ending in '$' and any word;
3572 # this caused trouble: "die @$ if $@"
3573 || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3575 # don't combine $$ or $# with any alphanumeric
3576 # (testfile mangle.t with --mangle)
3581 ) ## end $tokenr_is_bareword
3584 # '= -' should not become =- or you will get a warning
3586 # || ($tokenr eq '-')
3588 # do not join a bare word with a minus, like between 'Send' and
3589 # '-recipients' here <<snippets/space3.in>>
3590 # my $msg = new Fax::Send
3591 # -recipients => $to,
3593 # This is the safest thing to do. If we had the token to the right of
3594 # the minus we could do a better check.
3596 # And do not combine a bareword and a quote, like this:
3597 # oops "Your login, $Bad_Login, is not valid";
3598 # It can cause a syntax error if oops is a sub
3599 || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3601 # perl is very fussy about spaces before <<
3602 || substr( $tokenr, 0, 2 ) eq '<<'
3604 # avoid combining tokens to create new meanings. Example:
3605 # $a+ +$b must not become $a++$b
3606 || ( $is_digraph{$token_joined} )
3607 || $is_trigraph{$token_joined}
3609 # another example: do not combine these two &'s:
3610 # allow_options & &OPT_EXECCGI
3611 || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3613 # retain any space after possible filehandle
3614 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3617 # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3618 # space after type Y. Otherwise, it will get parsed as type 'Z' later
3619 # and any space would have to be added back manually if desired.
3622 # Perl is sensitive to whitespace after the + here:
3623 # $b = xvals $a + 0.1 * yvals $a;
3624 || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3627 $tokenr_is_open_paren && (
3629 # keep paren separate in 'use Foo::Bar ()'
3630 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3632 # OLD: keep any space between filehandle and paren:
3633 # file mangle.t with --mangle:
3634 # NEW: this test is no longer necessary here (moved above)
3637 # must have space between grep and left paren; "grep(" will fail
3638 || $is_sort_grep_map{$tokenl}
3640 # don't stick numbers next to left parens, as in:
3641 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3644 ) ## end $tokenr_is_open_paren
3646 # retain any space after here doc operator ( hereerr.t)
3649 # be careful with a space around ++ and --, to avoid ambiguity as to
3650 # which token it applies
3651 || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3652 || ( $typel eq '++' || $typel eq '--' )
3653 && $tokenr !~ /^[\;\}\)\]]/
3655 # need space after foreach my; for example, this will fail in
3656 # older versions of Perl:
3657 # foreach my$ft(@filetypes)...
3661 && substr( $tokenr, 0, 1 ) eq '$'
3664 && $is_for_foreach{$tokenll}
3667 # Keep space after like $^ if needed to avoid forming a different
3668 # special variable (issue c068). For example:
3669 # my $aa = $^ ? "none" : "ok";
3671 && length($tokenl) == 2
3672 && substr( $tokenl, 1, 1 ) eq '^'
3673 && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3675 # We must be sure that a space between a ? and a quoted string
3676 # remains if the space before the ? remains. [Loca.pm, lockarea]
3678 # $b=join $comma ? ',' : ':', @_; # ok
3679 # $b=join $comma?',' : ':', @_; # ok!
3680 # $b=join $comma ?',' : ':', @_; # error!
3681 # Not really required:
3682 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
3684 # Space stacked labels...
3685 # Not really required: Perl seems to accept non-spaced labels.
3686 ## || $typel eq 'J' && $typer eq 'J'
3688 ; # the value of this long logic sequence is the result we want
3690 } ## end sub is_essential_whitespace
3691 } ## end closure is_essential_whitespace
3693 { ## begin closure new_secret_operator_whitespace
3695 my %secret_operators;
3696 my %is_leading_secret_token;
3700 # token lists for perl secret operators as compiled by Philippe Bruhat
3701 # at: https://metacpan.org/module/perlsecret
3702 %secret_operators = (
3703 'Goatse' => [qw#= ( ) =#], #=( )=
3704 'Venus1' => [qw#0 +#], # 0+
3705 'Venus2' => [qw#+ 0#], # +0
3706 'Enterprise' => [qw#) x ! !#], # ()x!!
3707 'Kite1' => [qw#~ ~ <>#], # ~~<>
3708 'Kite2' => [qw#~~ <>#], # ~~<>
3709 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
3710 'Bang bang ' => [qw#! !#], # !!
3713 # The following operators and constants are not included because they
3714 # are normally kept tight by perltidy:
3718 # Make a lookup table indexed by the first token of each operator:
3719 # first token => [list, list, ...]
3720 foreach my $value ( values(%secret_operators) ) {
3721 my $tok = $value->[0];
3722 push @{ $is_leading_secret_token{$tok} }, $value;
3726 sub new_secret_operator_whitespace {
3728 my ( $rlong_array, $rwhitespace_flags ) = @_;
3730 # Loop over all tokens in this line
3731 my ( $token, $type );
3732 my $jmax = @{$rlong_array} - 1;
3733 foreach my $j ( 0 .. $jmax ) {
3735 $token = $rlong_array->[$j]->[_TOKEN_];
3736 $type = $rlong_array->[$j]->[_TYPE_];
3738 # Skip unless this token might start a secret operator
3739 next if ( $type eq 'b' );
3740 next unless ( $is_leading_secret_token{$token} );
3742 # Loop over all secret operators with this leading token
3743 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
3745 foreach my $tok ( @{$rpattern} ) {
3750 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
3752 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
3761 # set flags to prevent spaces within this operator
3762 foreach my $jj ( $j + 1 .. $jend ) {
3763 $rwhitespace_flags->[$jj] = WS_NO;
3768 } ## End Loop over all operators
3769 } ## End loop over all tokens
3772 } ## end closure new_secret_operator_whitespace
3774 { ## begin closure set_bond_strengths
3776 # These routines and variables are involved in deciding where to break very
3779 my %is_good_keyword_breakpoint;
3781 my %is_container_token;
3783 my %binary_bond_strength_nospace;
3784 my %binary_bond_strength;
3793 sub initialize_bond_strength_hashes {
3796 @q = qw(if unless while until for foreach);
3797 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
3799 @q = qw(lt gt le ge);
3800 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3802 @q = qw/ ( [ { } ] ) /;
3803 @is_container_token{@q} = (1) x scalar(@q);
3805 # The decision about where to break a line depends upon a "bond
3806 # strength" between tokens. The LOWER the bond strength, the MORE
3807 # likely a break. A bond strength may be any value but to simplify
3808 # things there are several pre-defined strength levels:
3810 # NO_BREAK => 10000;
3811 # VERY_STRONG => 100;
3815 # VERY_WEAK => 0.55;
3817 # The strength values are based on trial-and-error, and need to be
3818 # tweaked occasionally to get desired results. Some comments:
3820 # 1. Only relative strengths are important. small differences
3821 # in strengths can make big formatting differences.
3822 # 2. Each indentation level adds one unit of bond strength.
3823 # 3. A value of NO_BREAK makes an unbreakable bond
3824 # 4. A value of VERY_WEAK is the strength of a ','
3825 # 5. Values below NOMINAL are considered ok break points.
3826 # 6. Values above NOMINAL are considered poor break points.
3828 # The bond strengths should roughly follow precedence order where
3829 # possible. If you make changes, please check the results very
3830 # carefully on a variety of scripts. Testing with the -extrude
3831 # options is particularly helpful in exercising all of the rules.
3833 # Wherever possible, bond strengths are defined in the following
3834 # tables. There are two main stages to setting bond strengths and
3835 # two types of tables:
3837 # The first stage involves looking at each token individually and
3838 # defining left and right bond strengths, according to if we want
3839 # to break to the left or right side, and how good a break point it
3840 # is. For example tokens like =, ||, && make good break points and
3841 # will have low strengths, but one might want to break on either
3842 # side to put them at the end of one line or beginning of the next.
3844 # The second stage involves looking at certain pairs of tokens and
3845 # defining a bond strength for that particular pair. This second
3846 # stage has priority.
3848 #---------------------------------------------------------------
3849 # Bond Strength BEGIN Section 1.
3850 # Set left and right bond strengths of individual tokens.
3851 #---------------------------------------------------------------
3853 # NOTE: NO_BREAK's set in this section first are HINTS which will
3854 # probably not be honored. Essential NO_BREAKS's should be set in
3855 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
3856 # of this subroutine.
3858 # Note that we are setting defaults in this section. The user
3859 # cannot change bond strengths but can cause the left and right
3860 # bond strengths of any token type to be swapped through the use of
3861 # the -wba and -wbb flags. In this way the user can determine if a
3862 # breakpoint token should appear at the end of one line or the
3863 # beginning of the next line.
3865 %right_bond_strength = ();
3866 %left_bond_strength = ();
3867 %binary_bond_strength_nospace = ();
3868 %binary_bond_strength = ();
3872 # The hash keys in this section are token types, plus the text of
3873 # certain keywords like 'or', 'and'.
3875 # no break around possible filehandle
3876 $left_bond_strength{'Z'} = NO_BREAK;
3877 $right_bond_strength{'Z'} = NO_BREAK;
3879 # never put a bare word on a new line:
3880 # example print (STDERR, "bla"); will fail with break after (
3881 $left_bond_strength{'w'} = NO_BREAK;
3883 # blanks always have infinite strength to force breaks after
3885 $right_bond_strength{'b'} = NO_BREAK;
3887 # try not to break on exponentiation
3888 @q = qw# ** .. ... <=> #;
3889 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3890 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3892 # The comma-arrow has very low precedence but not a good break point
3893 $left_bond_strength{'=>'} = NO_BREAK;
3894 $right_bond_strength{'=>'} = NOMINAL;
3896 # ok to break after label
3897 $left_bond_strength{'J'} = NO_BREAK;
3898 $right_bond_strength{'J'} = NOMINAL;
3899 $left_bond_strength{'j'} = STRONG;
3900 $right_bond_strength{'j'} = STRONG;
3901 $left_bond_strength{'A'} = STRONG;
3902 $right_bond_strength{'A'} = STRONG;
3904 $left_bond_strength{'->'} = STRONG;
3905 $right_bond_strength{'->'} = VERY_STRONG;
3907 $left_bond_strength{'CORE::'} = NOMINAL;
3908 $right_bond_strength{'CORE::'} = NO_BREAK;
3910 # breaking AFTER modulus operator is ok:
3912 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3913 @right_bond_strength{@q} =
3914 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
3916 # Break AFTER math operators * and /
3918 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3919 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3921 # Break AFTER weakest math operators + and -
3922 # Make them weaker than * but a bit stronger than '.'
3924 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3925 @right_bond_strength{@q} =
3926 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
3928 # Define left strength of unary plus and minus (fixes case b511)
3929 $left_bond_strength{p} = $left_bond_strength{'+'};
3930 $left_bond_strength{m} = $left_bond_strength{'-'};
3932 # And make right strength of unary plus and minus very high.
3933 # Fixes cases b670 b790
3934 $right_bond_strength{p} = NO_BREAK;
3935 $right_bond_strength{m} = NO_BREAK;
3937 # breaking BEFORE these is just ok:
3939 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3940 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
3942 # breaking before the string concatenation operator seems best
3943 # because it can be hard to see at the end of a line
3944 $right_bond_strength{'.'} = STRONG;
3945 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
3948 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3949 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3951 # make these a little weaker than nominal so that they get
3952 # favored for end-of-line characters
3953 @q = qw< != == =~ !~ ~~ !~~ >;
3954 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3955 @right_bond_strength{@q} =
3956 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
3959 @q = qw# < > | & >= <= #;
3960 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
3961 @right_bond_strength{@q} =
3962 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
3964 # breaking either before or after a quote is ok
3965 # but bias for breaking before a quote
3966 $left_bond_strength{'Q'} = NOMINAL;
3967 $right_bond_strength{'Q'} = NOMINAL + 0.02;
3968 $left_bond_strength{'q'} = NOMINAL;
3969 $right_bond_strength{'q'} = NOMINAL;
3971 # starting a line with a keyword is usually ok
3972 $left_bond_strength{'k'} = NOMINAL;
3974 # we usually want to bond a keyword strongly to what immediately
3975 # follows, rather than leaving it stranded at the end of a line
3976 $right_bond_strength{'k'} = STRONG;
3978 $left_bond_strength{'G'} = NOMINAL;
3979 $right_bond_strength{'G'} = STRONG;
3981 # assignment operators
3983 = **= += *= &= <<= &&=
3984 -= /= |= >>= ||= //=
3989 # Default is to break AFTER various assignment operators
3990 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3991 @right_bond_strength{@q} =
3992 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
3994 # Default is to break BEFORE '&&' and '||' and '//'
3995 # set strength of '||' to same as '=' so that chains like
3996 # $a = $b || $c || $d will break before the first '||'
3997 $right_bond_strength{'||'} = NOMINAL;
3998 $left_bond_strength{'||'} = $right_bond_strength{'='};
4000 # same thing for '//'
4001 $right_bond_strength{'//'} = NOMINAL;
4002 $left_bond_strength{'//'} = $right_bond_strength{'='};
4004 # set strength of && a little higher than ||
4005 $right_bond_strength{'&&'} = NOMINAL;
4006 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
4008 $left_bond_strength{';'} = VERY_STRONG;
4009 $right_bond_strength{';'} = VERY_WEAK;
4010 $left_bond_strength{'f'} = VERY_STRONG;
4012 # make right strength of for ';' a little less than '='
4013 # to make for contents break after the ';' to avoid this:
4014 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
4015 # $number_of_fields )
4016 # and make it weaker than ',' and 'and' too
4017 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
4019 # The strengths of ?/: should be somewhere between
4020 # an '=' and a quote (NOMINAL),
4021 # make strength of ':' slightly less than '?' to help
4022 # break long chains of ? : after the colons
4023 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
4024 $right_bond_strength{':'} = NO_BREAK;
4025 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
4026 $right_bond_strength{'?'} = NO_BREAK;
4028 $left_bond_strength{','} = VERY_STRONG;
4029 $right_bond_strength{','} = VERY_WEAK;
4031 # remaining digraphs and trigraphs not defined above
4032 @q = qw( :: <> ++ --);
4033 @left_bond_strength{@q} = (WEAK) x scalar(@q);
4034 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4036 # Set bond strengths of certain keywords
4037 # make 'or', 'err', 'and' slightly weaker than a ','
4038 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
4039 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
4040 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
4041 $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
4042 $right_bond_strength{'and'} = NOMINAL;
4043 $right_bond_strength{'or'} = NOMINAL;
4044 $right_bond_strength{'err'} = NOMINAL;
4045 $right_bond_strength{'xor'} = NOMINAL;
4047 #---------------------------------------------------------------
4048 # Bond Strength BEGIN Section 2.
4049 # Set binary rules for bond strengths between certain token types.
4050 #---------------------------------------------------------------
4052 # We have a little problem making tables which apply to the
4053 # container tokens. Here is a list of container tokens and
4056 # type tokens // meaning
4057 # { {, [, ( // indent
4058 # } }, ], ) // outdent
4059 # [ [ // left non-structural [ (enclosing an array index)
4060 # ] ] // right non-structural square bracket
4061 # ( ( // left non-structural paren
4062 # ) ) // right non-structural paren
4063 # L { // left non-structural curly brace (enclosing a key)
4064 # R } // right non-structural curly brace
4066 # Some rules apply to token types and some to just the token
4067 # itself. We solve the problem by combining type and token into a
4068 # new hash key for the container types.
4070 # If a rule applies to a token 'type' then we need to make rules
4071 # for each of these 'type.token' combinations:
4082 # If a rule applies to a token then we need to make rules for
4083 # these 'type.token' combinations:
4092 # allow long lines before final { in an if statement, as in:
4097 # Otherwise, the line before the { tends to be too short.
4099 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
4100 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
4102 # break on something like '} (', but keep this stronger than a ','
4103 # example is in 'howe.pl'
4104 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4105 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4107 # keep matrix and hash indices together
4108 # but make them a little below STRONG to allow breaking open
4109 # something like {'some-word'}{'some-very-long-word'} at the }{
4111 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4112 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4113 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4114 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4116 # increase strength to the point where a break in the following
4117 # will be after the opening paren rather than at the arrow:
4119 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
4121 # Added for c140 to make 'w ->' and 'i ->' behave the same
4122 $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
4124 # Note that the following alternative strength would make the break at the
4125 # '->' rather than opening the '('. Both have advantages and disadvantages.
4126 # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
4128 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4129 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4130 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4131 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4132 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4133 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4135 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4136 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4137 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4138 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4140 #---------------------------------------------------------------
4141 # Binary NO_BREAK rules
4142 #---------------------------------------------------------------
4144 # use strict requires that bare word and => not be separated
4145 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
4146 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
4148 # Never break between a bareword and a following paren because
4149 # perl may give an error. For example, if a break is placed
4150 # between 'to_filehandle' and its '(' the following line will
4151 # give a syntax error [Carp.pm]: my( $no) =fileno(
4152 # to_filehandle( $in)) ;
4153 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
4154 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
4155 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
4156 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
4158 # use strict requires that bare word within braces not start new
4160 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
4162 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
4164 # The following two rules prevent a syntax error caused by breaking up
4165 # a construction like '{-y}'. The '-' quotes the 'y' and prevents
4166 # it from being taken as a transliteration. We have to keep
4167 # token types 'L m w' together to prevent this error.
4168 $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
4169 $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
4171 # keep 'bareword-' together, but only if there is no space between
4172 # the word and dash. Do not keep together if there is a space.
4173 # example 'use perl6-alpha'
4174 $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
4176 # use strict requires that bare word and => not be separated
4177 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
4179 # use strict does not allow separating type info from trailing { }
4180 # testfile is readmail.pl
4181 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
4182 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
4184 # As a defensive measure, do not break between a '(' and a
4185 # filehandle. In some cases, this can cause an error. For
4186 # example, the following program works:
4193 # But this program fails:
4201 # This is normally only a problem with the 'extrude' option
4202 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
4203 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
4205 # never break between sub name and opening paren
4206 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
4207 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
4209 # keep '}' together with ';'
4210 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
4212 # Breaking before a ++ can cause perl to guess wrong. For
4213 # example the following line will cause a syntax error
4214 # with -extrude if we break between '$i' and '++' [fixstyle2]
4215 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
4216 $nobreak_lhs{'++'} = NO_BREAK;
4218 # Do not break before a possible file handle
4219 $nobreak_lhs{'Z'} = NO_BREAK;
4221 # use strict hates bare words on any new line. For
4222 # example, a break before the underscore here provokes the
4223 # wrath of use strict:
4224 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
4225 $nobreak_rhs{'F'} = NO_BREAK;
4226 $nobreak_rhs{'CORE::'} = NO_BREAK;
4228 # To prevent the tokenizer from switching between types 'w' and 'G' we
4229 # need to avoid breaking between type 'G' and the following code block
4230 # brace. Fixes case b929.
4231 $nobreak_rhs{G} = NO_BREAK;
4233 #---------------------------------------------------------------
4234 # Bond Strength BEGIN Section 3.
4235 # Define tables and values for applying a small bias to the above
4237 #---------------------------------------------------------------
4238 # Adding a small 'bias' to strengths is a simple way to make a line
4239 # break at the first of a sequence of identical terms. For
4240 # example, to force long string of conditional operators to break
4241 # with each line ending in a ':', we can add a small number to the
4242 # bond strength of each ':' (colon.t)
4243 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
4244 %bias_hash = map { $_ => 0 } @bias_tokens;
4245 $delta_bias = 0.0001; # a very small strength level
4248 } ## end sub initialize_bond_strength_hashes
4250 use constant DEBUG_BOND => 0;
4252 sub set_bond_strengths {
4256 #-----------------------------------------------------------------
4257 # Define a 'bond strength' for each token pair in an output batch.
4258 # See comments above for definition of bond strength.
4259 #-----------------------------------------------------------------
4261 my $rbond_strength_to_go = [];
4263 my $rLL = $self->[_rLL_];
4264 my $rK_weld_right = $self->[_rK_weld_right_];
4265 my $rK_weld_left = $self->[_rK_weld_left_];
4266 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
4268 # patch-its always ok to break at end of line
4269 $nobreak_to_go[$max_index_to_go] = 0;
4271 # we start a new set of bias values for each line
4274 my $code_bias = -.01; # bias for closing block braces
4278 my $token_length = 1;
4280 my $last_nonblank_type = $type;
4281 my $last_nonblank_token = $token;
4282 my $list_str = $left_bond_strength{'?'};
4284 my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4286 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4287 $next_nonblank_type, $next_token, $next_type,
4288 $total_nesting_depth, );
4290 # main loop to compute bond strengths between each pair of tokens
4291 foreach my $i ( 0 .. $max_index_to_go ) {
4293 if ( $type ne 'b' ) {
4294 $last_nonblank_type = $type;
4295 $last_nonblank_token = $token;
4297 $type = $types_to_go[$i];
4299 # strength on both sides of a blank is the same
4300 if ( $type eq 'b' && $last_type ne 'b' ) {
4301 $rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
4302 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4306 $token = $tokens_to_go[$i];
4307 $token_length = $token_lengths_to_go[$i];
4308 $block_type = $block_type_to_go[$i];
4310 $next_type = $types_to_go[$i_next];
4311 $next_token = $tokens_to_go[$i_next];
4312 $total_nesting_depth = $nesting_depth_to_go[$i_next];
4313 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4314 $next_nonblank_type = $types_to_go[$i_next_nonblank];
4315 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4317 my $seqno = $type_sequence_to_go[$i];
4318 my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4320 # We are computing the strength of the bond between the current
4321 # token and the NEXT token.
4323 #---------------------------------------------------------------
4324 # Bond Strength Section 1:
4325 # First Approximation.
4326 # Use minimum of individual left and right tabulated bond
4328 #---------------------------------------------------------------
4329 my $bsr = $right_bond_strength{$type};
4330 my $bsl = $left_bond_strength{$next_nonblank_type};
4332 # define right bond strengths of certain keywords
4333 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
4334 $bsr = $right_bond_strength{$token};
4336 elsif ( $token eq 'ne' or $token eq 'eq' ) {
4340 # set terminal bond strength to the nominal value
4341 # this will cause good preceding breaks to be retained
4342 if ( $i_next_nonblank > $max_index_to_go ) {
4345 # But weaken the bond at a 'missing terminal comma'. If an
4346 # optional comma is missing at the end of a broken list, use
4347 # the strength of a comma anyway to make formatting the same as
4348 # if it were there. Fixes issue c133.
4349 if ( !defined($bsr) || $bsr > VERY_WEAK ) {
4350 my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
4351 if ( $ris_list_by_seqno->{$seqno_px} ) {
4352 my $KK = $K_to_go[$max_index_to_go];
4353 my $Kn = $self->K_next_nonblank($KK);
4354 my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
4355 if ( $seqno_n && $seqno_n eq $seqno_px ) {
4362 # define right bond strengths of certain keywords
4363 if ( $next_nonblank_type eq 'k'
4364 && defined( $left_bond_strength{$next_nonblank_token} ) )
4366 $bsl = $left_bond_strength{$next_nonblank_token};
4368 elsif ($next_nonblank_token eq 'ne'
4369 or $next_nonblank_token eq 'eq' )
4373 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
4374 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
4377 # Use the minimum of the left and right strengths. Note: it might
4378 # seem that we would want to keep a NO_BREAK if either token has
4379 # this value. This didn't work, for example because in an arrow
4380 # list, it prevents the comma from separating from the following
4381 # bare word (which is probably quoted by its arrow). So necessary
4382 # NO_BREAK's have to be handled as special cases in the final
4384 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
4385 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
4386 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4387 $bond_str_1 = $bond_str if (DEBUG_BOND);
4389 #---------------------------------------------------------------
4390 # Bond Strength Section 2:
4391 # Apply hardwired rules..
4392 #---------------------------------------------------------------
4394 # Patch to put terminal or clauses on a new line: Weaken the bond
4395 # at an || followed by die or similar keyword to make the terminal
4396 # or clause fall on a new line, like this:
4399 # || die "Cannot add broadcast: No class identifier found";
4401 # Otherwise the break will be at the previous '=' since the || and
4402 # = have the same starting strength and the or is biased, like
4406 # shift || die "Cannot add broadcast: No class identifier found";
4408 # In any case if the user places a break at either the = or the ||
4409 # it should remain there.
4410 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
4412 # /^(die|confess|croak|warn)$/
4413 if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
4414 if ( $want_break_before{$token} && $i > 0 ) {
4415 $rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
4417 # keep bond strength of a token and its following blank
4419 if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4420 $rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
4424 $bond_str -= $delta_bias;
4429 # good to break after end of code blocks
4430 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
4432 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4433 $code_bias += $delta_bias;
4436 if ( $type eq 'k' ) {
4438 # allow certain control keywords to stand out
4439 if ( $next_nonblank_type eq 'k'
4440 && $is_last_next_redo_return{$token} )
4442 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4445 # Don't break after keyword my. This is a quick fix for a
4446 # rare problem with perl. An example is this line from file
4449 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4450 # $this->{'question'} ) )
4452 if ( $token eq 'my' ) {
4453 $bond_str = NO_BREAK;
4458 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
4460 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4461 $bond_str = $list_str if ( $bond_str > $list_str );
4464 # keywords like 'unless', 'if', etc, within statements
4466 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4467 $bond_str = VERY_WEAK / 1.05;
4471 # try not to break before a comma-arrow
4472 elsif ( $next_nonblank_type eq '=>' ) {
4473 if ( $bond_str < STRONG ) { $bond_str = STRONG }
4476 #---------------------------------------------------------------
4477 # Additional hardwired NOBREAK rules
4478 #---------------------------------------------------------------
4480 # map1.t -- correct for a quirk in perl
4482 && $next_nonblank_type eq 'i'
4483 && $last_nonblank_type eq 'k'
4484 && $is_sort_map_grep{$last_nonblank_token} )
4486 # /^(sort|map|grep)$/ )
4488 $bond_str = NO_BREAK;
4491 # extrude.t: do not break before paren at:
4493 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4494 $bond_str = NO_BREAK;
4497 # OLD COMMENT: In older version of perl, use strict can cause
4498 # problems with breaks before bare words following opening parens.
4499 # For example, this will fail under older versions if a break is
4500 # made between '(' and 'MAIL':
4502 # use strict; open( MAIL, "a long filename or command"); close MAIL;
4504 # NEW COMMENT: Third fix for b1213:
4505 # This option does not seem to be needed any longer, and it can
4506 # cause instabilities. It can be turned off, but to minimize
4507 # changes to existing formatting it is retained only in the case
4508 # where the previous token was 'open' and there was no line break.
4509 # Even this could eventually be removed if it causes instability.
4510 if ( $type eq '{' ) {
4513 && $next_nonblank_type eq 'w'
4514 && $last_nonblank_type eq 'k'
4515 && $last_nonblank_token eq 'open'
4516 && !$old_breakpoint_to_go[$i] )
4518 $bond_str = NO_BREAK;
4522 # Do not break between a possible filehandle and a ? or / and do
4523 # not introduce a break after it if there is no blank
4525 elsif ( $type eq 'Z' ) {
4530 # if there is no blank and we do not want one. Examples:
4531 # print $x++ # do not break after $x
4532 # print HTML"HELLO" # break ok after HTML
4535 && defined( $want_left_space{$next_type} )
4536 && $want_left_space{$next_type} == WS_NO
4539 # or we might be followed by the start of a quote,
4540 # and this is not an existing breakpoint; fixes c039.
4541 || !$old_breakpoint_to_go[$i]
4542 && substr( $next_nonblank_token, 0, 1 ) eq '/'
4546 $bond_str = NO_BREAK;
4550 # Breaking before a ? before a quote can cause trouble if
4551 # they are not separated by a blank.
4552 # Example: a syntax error occurs if you break before the ? here
4553 # my$logic=join$all?' && ':' || ',@regexps;
4554 # From: Professional_Perl_Programming_Code/multifind.pl
4555 if ( $next_nonblank_type eq '?' ) {
4556 $bond_str = NO_BREAK
4557 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4560 # Breaking before a . followed by a number
4561 # can cause trouble if there is no intervening space
4562 # Example: a syntax error occurs if you break before the .2 here
4563 # $str .= pack($endian.2, ensurrogate($ord));
4564 # From: perl58/Unicode.pm
4565 elsif ( $next_nonblank_type eq '.' ) {
4566 $bond_str = NO_BREAK
4567 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4571 elsif ( $type eq 'w' ) {
4572 $bond_str = NO_BREAK
4573 if ( !$old_breakpoint_to_go[$i]
4574 && substr( $next_nonblank_token, 0, 1 ) eq '/'
4575 && $next_nonblank_type ne '//' );
4578 $bond_str_2 = $bond_str if (DEBUG_BOND);
4580 #---------------------------------------------------------------
4581 # End of hardwired rules
4582 #---------------------------------------------------------------
4584 #---------------------------------------------------------------
4585 # Bond Strength Section 3:
4586 # Apply table rules. These have priority over the above
4588 #---------------------------------------------------------------
4590 my $tabulated_bond_str;
4592 my $rtype = $next_nonblank_type;
4593 if ( $seqno && $is_container_token{$token} ) {
4594 $ltype = $type . $token;
4597 if ( $next_nonblank_seqno
4598 && $is_container_token{$next_nonblank_token} )
4600 $rtype = $next_nonblank_type . $next_nonblank_token;
4602 # Alternate Fix #1 for issue b1299. This version makes the
4603 # decision as soon as possible. See Alternate Fix #2 also.
4604 # Do not separate a bareword identifier from its paren: b1299
4605 # This is currently needed for stability because if the bareword
4606 # gets separated from a preceding '->' and following '(' then
4607 # the tokenizer may switch from type 'i' to type 'w'. This
4608 # patch will prevent this by keeping it adjacent to its '('.
4609 ## if ( $next_nonblank_token eq '('
4611 ## && substr( $token, 0, 1 ) =~ /^\w$/ )
4617 # apply binary rules which apply regardless of space between tokens
4618 if ( $binary_bond_strength{$ltype}{$rtype} ) {
4619 $bond_str = $binary_bond_strength{$ltype}{$rtype};
4620 $tabulated_bond_str = $bond_str;
4623 # apply binary rules which apply only if no space between tokens
4624 if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4625 $bond_str = $binary_bond_strength{$ltype}{$next_type};
4626 $tabulated_bond_str = $bond_str;
4629 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4630 $bond_str = NO_BREAK;
4631 $tabulated_bond_str = $bond_str;
4634 $bond_str_3 = $bond_str if (DEBUG_BOND);
4636 # If the hardwired rules conflict with the tabulated bond
4637 # strength then there is an inconsistency that should be fixed
4639 && $tabulated_bond_str
4641 && $bond_str_1 != $bond_str_2
4642 && $bond_str_2 != $tabulated_bond_str
4645 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4648 #-----------------------------------------------------------------
4649 # Bond Strength Section 4:
4650 # Modify strengths of certain tokens which often occur in sequence
4651 # by adding a small bias to each one in turn so that the breaks
4652 # occur from left to right.
4654 # Note that we only changing strengths by small amounts here,
4655 # and usually increasing, so we should not be altering any NO_BREAKs.
4656 # Other routines which check for NO_BREAKs will use a tolerance
4657 # of one to avoid any problem.
4658 #-----------------------------------------------------------------
4660 # The bias tables use special keys:
4661 # $type - if not keyword
4662 # $token - if keyword, but map some keywords together
4664 $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4666 $next_nonblank_type eq 'k'
4667 ? $next_nonblank_token eq 'err'
4669 : $next_nonblank_token
4670 : $next_nonblank_type;
4673 if ( defined( $bias{$left_key} ) ) {
4674 if ( !$want_break_before{$left_key} ) {
4675 $bias{$left_key} += $delta_bias;
4676 $bond_str += $bias{$left_key};
4681 if ( defined( $bias{$right_key} ) ) {
4682 if ( $want_break_before{$right_key} ) {
4684 # for leading '.' align all but 'short' quotes; the idea
4685 # is to not place something like "\n" on a single line.
4686 if ( $right_key eq '.' ) {
4688 $last_nonblank_type eq '.'
4689 && ( $token_length <=
4690 $rOpts_short_concatenation_item_length )
4691 && ( !$is_closing_token{$token} )
4694 $bias{$right_key} += $delta_bias;
4698 $bias{$right_key} += $delta_bias;
4700 $bond_str += $bias{$right_key};
4704 $bond_str_4 = $bond_str if (DEBUG_BOND);
4706 #---------------------------------------------------------------
4707 # Bond Strength Section 5:
4708 # Fifth Approximation.
4709 # Take nesting depth into account by adding the nesting depth
4710 # to the bond strength.
4711 #---------------------------------------------------------------
4714 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
4715 if ( $total_nesting_depth > 0 ) {
4716 $strength = $bond_str + $total_nesting_depth;
4719 $strength = $bond_str;
4723 $strength = NO_BREAK;
4725 # For critical code such as lines with here targets we must
4726 # be absolutely sure that we do not allow a break. So for
4727 # these the nobreak flag exceeds 1 as a signal. Otherwise we
4728 # can run into trouble when small tolerances are added.
4729 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
4732 #---------------------------------------------------------------
4733 # Bond Strength Section 6:
4734 # Sixth Approximation. Welds.
4735 #---------------------------------------------------------------
4737 # Do not allow a break within welds
4738 if ( $total_weld_count && $seqno ) {
4739 my $KK = $K_to_go[$i];
4740 if ( $rK_weld_right->{$KK} ) {
4741 $strength = NO_BREAK;
4744 # But encourage breaking after opening welded tokens
4745 elsif ($rK_weld_left->{$KK}
4746 && $is_opening_token{$token} )
4752 # always break after side comment
4753 if ( $type eq '#' ) { $strength = 0 }
4755 $rbond_strength_to_go->[$i] = $strength;
4757 # Fix for case c001: be sure NO_BREAK's are enforced by later
4758 # routines, except at a '?' because '?' as quote delimiter is
4760 if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
4761 $nobreak_to_go[$i] ||= 1;
4765 my $str = substr( $token, 0, 15 );
4766 $str .= SPACE x ( 16 - length($str) );
4768 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
4770 # reset for next pass
4771 $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
4775 return $rbond_strength_to_go;
4776 } ## end sub set_bond_strengths
4777 } ## end closure set_bond_strengths
4781 # See if a pattern will compile. We have to use a string eval here,
4782 # but it should be safe because the pattern has been constructed
4785 my $ok = eval "'##'=~/$pattern/";
4786 return !defined($ok) || $EVAL_ERROR;
4789 { ## begin closure prepare_cuddled_block_types
4793 # Add keywords here which really should not be cuddled
4795 my @q = qw(if unless for foreach while);
4796 @no_cuddle{@q} = (1) x scalar(@q);
4799 sub prepare_cuddled_block_types {
4801 # the cuddled-else style, if used, is controlled by a hash that
4804 # Include keywords here which should not be cuddled
4806 my $cuddled_string = EMPTY_STRING;
4807 if ( $rOpts->{'cuddled-else'} ) {
4810 $cuddled_string = 'elsif else continue catch finally'
4811 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
4813 # This is the old equivalent but more complex version
4814 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
4816 # Add users other blocks to be cuddled
4817 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
4818 if ($cuddled_block_list) {
4819 $cuddled_string .= SPACE . $cuddled_block_list;
4824 # If we have a cuddled string of the form
4825 # 'try-catch-finally'
4827 # we want to prepare a hash of the form
4829 # $rcuddled_block_types = {
4836 # use -dcbl to dump this hash
4838 # Multiple such strings are input as a space or comma separated list
4840 # If we get two lists with the same leading type, such as
4841 # -cbl = "-try-catch-finally -try-catch-otherwise"
4842 # then they will get merged as follows:
4843 # $rcuddled_block_types = {
4850 # This will allow either type of chain to be followed.
4852 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
4853 my @cuddled_strings = split /\s+/, $cuddled_string;
4855 $rcuddled_block_types = {};
4857 # process each dash-separated string...
4858 my $string_count = 0;
4859 foreach my $string (@cuddled_strings) {
4860 next unless $string;
4861 my @words = split /-+/, $string; # allow multiple dashes
4863 # we could look for and report possible errors here...
4864 next unless ( @words > 0 );
4866 # allow either '-continue' or *-continue' for arbitrary starting type
4869 # a single word without dashes is a secondary block type
4871 $start = shift @words;
4874 # always make an entry for the leading word. If none follow, this
4875 # will still prevent a wildcard from matching this word.
4876 if ( !defined( $rcuddled_block_types->{$start} ) ) {
4877 $rcuddled_block_types->{$start} = {};
4880 # The count gives the original word order in case we ever want it.
4883 foreach my $word (@words) {
4885 if ( $no_cuddle{$word} ) {
4887 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
4892 $rcuddled_block_types->{$start}->{$word} =
4893 1; #"$string_count.$word_count";
4895 # git#9: Remove this word from the list of desired one-line
4897 $want_one_line_block{$word} = 0;
4901 } ## end sub prepare_cuddled_block_types
4902 } ## end closure prepare_cuddled_block_types
4904 sub dump_cuddled_block_list {
4907 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
4908 # which controls this routine
4909 # my $rcuddled_block_types = {
4920 # SIMPLIFIED METHOD: the simplified method uses a wildcard for
4921 # the starting block type and puts all cuddled blocks together:
4922 # my $rcuddled_block_types = {
4931 # Both methods work, but the simplified method has proven to be adequate and
4934 my $cuddled_string = $rOpts->{'cuddled-block-list'};
4935 $cuddled_string = EMPTY_STRING unless $cuddled_string;
4937 my $flags = EMPTY_STRING;
4938 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
4939 $flags .= " -cbl='$cuddled_string'";
4941 unless ( $rOpts->{'cuddled-else'} ) {
4942 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
4946 ------------------------------------------------------------------------
4947 Hash of cuddled block types prepared for a run with these parameters:
4949 ------------------------------------------------------------------------
4953 $fh->print( Dumper($rcuddled_block_types) );
4956 ------------------------------------------------------------------------
4959 } ## end sub dump_cuddled_block_list
4961 sub make_static_block_comment_pattern {
4963 # create the pattern used to identify static block comments
4964 $static_block_comment_pattern = '^\s*##';
4966 # allow the user to change it
4967 if ( $rOpts->{'static-block-comment-prefix'} ) {
4968 my $prefix = $rOpts->{'static-block-comment-prefix'};
4969 $prefix =~ s/^\s*//;
4970 my $pattern = $prefix;
4972 # user may give leading caret to force matching left comments only
4973 if ( $prefix !~ /^\^#/ ) {
4974 if ( $prefix !~ /^#/ ) {
4976 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
4979 $pattern = '^\s*' . $prefix;
4981 if ( bad_pattern($pattern) ) {
4983 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
4986 $static_block_comment_pattern = $pattern;
4989 } ## end sub make_static_block_comment_pattern
4991 sub make_format_skipping_pattern {
4992 my ( $opt_name, $default ) = @_;
4993 my $param = $rOpts->{$opt_name};
4994 unless ($param) { $param = $default }
4996 if ( $param !~ /^#/ ) {
4997 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
4999 my $pattern = '^' . $param . '\s';
5000 if ( bad_pattern($pattern) ) {
5002 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
5006 } ## end sub make_format_skipping_pattern
5008 sub make_non_indenting_brace_pattern {
5010 # Create the pattern used to identify static side comments.
5011 # Note that we are ending the pattern in a \s. This will allow
5012 # the pattern to be followed by a space and some text, or a newline.
5013 # The pattern is used in sub 'non_indenting_braces'
5014 $non_indenting_brace_pattern = '^#<<<\s';
5016 # allow the user to change it
5017 if ( $rOpts->{'non-indenting-brace-prefix'} ) {
5018 my $prefix = $rOpts->{'non-indenting-brace-prefix'};
5019 $prefix =~ s/^\s*//;
5020 if ( $prefix !~ /^#/ ) {
5021 Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
5023 my $pattern = '^' . $prefix . '\s';
5024 if ( bad_pattern($pattern) ) {
5026 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
5029 $non_indenting_brace_pattern = $pattern;
5032 } ## end sub make_non_indenting_brace_pattern
5034 sub make_closing_side_comment_list_pattern {
5036 # turn any input list into a regex for recognizing selected block types
5037 $closing_side_comment_list_pattern = '^\w+';
5038 if ( defined( $rOpts->{'closing-side-comment-list'} )
5039 && $rOpts->{'closing-side-comment-list'} )
5041 $closing_side_comment_list_pattern =
5042 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
5045 } ## end sub make_closing_side_comment_list_pattern
5047 sub make_sub_matching_pattern {
5049 # Patterns for standardizing matches to block types for regular subs and
5050 # anonymous subs. Examples
5051 # 'sub process' is a named sub
5052 # 'sub ::m' is a named sub
5053 # 'sub' is an anonymous sub
5054 # 'sub:' is a label, not a sub
5055 # 'sub :' is a label, not a sub ( block type will be <sub:> )
5056 # sub'_ is a named sub ( block type will be <sub '_> )
5057 # 'substr' is a keyword
5058 # So note that named subs always have a space after 'sub'
5059 $SUB_PATTERN = '^sub\s'; # match normal sub
5060 $ASUB_PATTERN = '^sub$'; # match anonymous sub
5062 # Note (see also RT #133130): These patterns are used by
5063 # sub make_block_pattern, which is used for making most patterns.
5064 # So this sub needs to be called before other pattern-making routines.
5066 if ( $rOpts->{'sub-alias-list'} ) {
5068 # Note that any 'sub-alias-list' has been preprocessed to
5069 # be a trimmed, space-separated list which includes 'sub'
5070 # for example, it might be 'sub method fun'
5071 my $sub_alias_list = $rOpts->{'sub-alias-list'};
5072 $sub_alias_list =~ s/\s+/\|/g;
5073 $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
5074 $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
5077 } ## end sub make_sub_matching_pattern
5079 sub make_bl_pattern {
5081 # Set defaults lists to retain historical default behavior for -bl:
5082 my $bl_list_string = '*';
5083 my $bl_exclusion_list_string = 'sort map grep eval asub';
5085 if ( defined( $rOpts->{'brace-left-list'} )
5086 && $rOpts->{'brace-left-list'} )
5088 $bl_list_string = $rOpts->{'brace-left-list'};
5090 if ( $bl_list_string =~ /\bsub\b/ ) {
5091 $rOpts->{'opening-sub-brace-on-new-line'} ||=
5092 $rOpts->{'opening-brace-on-new-line'};
5094 if ( $bl_list_string =~ /\basub\b/ ) {
5095 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5096 $rOpts->{'opening-brace-on-new-line'};
5099 $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
5101 # for -bl, a list with '*' turns on -sbl and -asbl
5102 if ( $bl_pattern =~ /\.\*/ ) {
5103 $rOpts->{'opening-sub-brace-on-new-line'} ||=
5104 $rOpts->{'opening-brace-on-new-line'};
5105 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5106 $rOpts->{'opening-anonymous-brace-on-new-line'};
5109 if ( defined( $rOpts->{'brace-left-exclusion-list'} )
5110 && $rOpts->{'brace-left-exclusion-list'} )
5112 $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
5113 if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
5114 $rOpts->{'opening-sub-brace-on-new-line'} = 0;
5116 if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
5117 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
5121 $bl_exclusion_pattern =
5122 make_block_pattern( '-blxl', $bl_exclusion_list_string );
5124 } ## end sub make_bl_pattern
5126 sub make_bli_pattern {
5128 # default list of block types for which -bli would apply
5129 my $bli_list_string = 'if else elsif unless while for foreach do : sub';
5130 my $bli_exclusion_list_string = SPACE;
5132 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
5133 && $rOpts->{'brace-left-and-indent-list'} )
5135 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
5138 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
5140 if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
5141 && $rOpts->{'brace-left-and-indent-exclusion-list'} )
5143 $bli_exclusion_list_string =
5144 $rOpts->{'brace-left-and-indent-exclusion-list'};
5146 $bli_exclusion_pattern =
5147 make_block_pattern( '-blixl', $bli_exclusion_list_string );
5149 } ## end sub make_bli_pattern
5151 sub make_keyword_group_list_pattern {
5153 # turn any input list into a regex for recognizing selected block types.
5154 # Here are the defaults:
5155 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
5156 $keyword_group_list_comment_pattern = EMPTY_STRING;
5157 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
5158 && $rOpts->{'keyword-group-blanks-list'} )
5160 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
5163 foreach my $word (@words) {
5164 if ( $word eq 'BC' || $word eq 'SBC' ) {
5165 push @comment_list, $word;
5166 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
5169 push @keyword_list, $word;
5172 $keyword_group_list_pattern =
5173 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
5174 $keyword_group_list_comment_pattern =
5175 make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
5178 } ## end sub make_keyword_group_list_pattern
5180 sub make_block_brace_vertical_tightness_pattern {
5182 # turn any input list into a regex for recognizing selected block types
5183 $block_brace_vertical_tightness_pattern =
5184 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5185 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
5186 && $rOpts->{'block-brace-vertical-tightness-list'} )
5188 $block_brace_vertical_tightness_pattern =
5189 make_block_pattern( '-bbvtl',
5190 $rOpts->{'block-brace-vertical-tightness-list'} );
5193 } ## end sub make_block_brace_vertical_tightness_pattern
5195 sub make_blank_line_pattern {
5197 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
5198 my $key = 'blank-lines-before-closing-block-list';
5199 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5200 $blank_lines_before_closing_block_pattern =
5201 make_block_pattern( '-blbcl', $rOpts->{$key} );
5204 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
5205 $key = 'blank-lines-after-opening-block-list';
5206 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5207 $blank_lines_after_opening_block_pattern =
5208 make_block_pattern( '-blaol', $rOpts->{$key} );
5211 } ## end sub make_blank_line_pattern
5213 sub make_block_pattern {
5215 # given a string of block-type keywords, return a regex to match them
5216 # The only tricky part is that labels are indicated with a single ':'
5217 # and the 'sub' token text may have additional text after it (name of
5222 # input string: "if else elsif unless while for foreach do : sub";
5223 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5227 # To distinguish between anonymous subs and named subs, use 'sub' to
5228 # indicate a named sub, and 'asub' to indicate an anonymous sub
5230 my ( $abbrev, $string ) = @_;
5231 my @list = split_words($string);
5235 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
5238 if ( $i eq 'sub' ) {
5240 elsif ( $i eq 'asub' ) {
5242 elsif ( $i eq ';' ) {
5245 elsif ( $i eq '{' ) {
5248 elsif ( $i eq ':' ) {
5249 push @words, '\w+:';
5251 elsif ( $i =~ /^\w/ ) {
5255 Warn("unrecognized block type $i after $abbrev, ignoring\n");
5259 # Fix 2 for c091, prevent the pattern from matching an empty string
5260 # '1 ' is an impossible block name.
5261 if ( !@words ) { push @words, "1 " }
5263 my $pattern = '(' . join( '|', @words ) . ')$';
5264 my $sub_patterns = EMPTY_STRING;
5265 if ( $seen{'sub'} ) {
5266 $sub_patterns .= '|' . $SUB_PATTERN;
5268 if ( $seen{'asub'} ) {
5269 $sub_patterns .= '|' . $ASUB_PATTERN;
5271 if ($sub_patterns) {
5272 $pattern = '(' . $pattern . $sub_patterns . ')';
5274 $pattern = '^' . $pattern;
5276 } ## end sub make_block_pattern
5278 sub make_static_side_comment_pattern {
5280 # create the pattern used to identify static side comments
5281 $static_side_comment_pattern = '^##';
5283 # allow the user to change it
5284 if ( $rOpts->{'static-side-comment-prefix'} ) {
5285 my $prefix = $rOpts->{'static-side-comment-prefix'};
5286 $prefix =~ s/^\s*//;
5287 my $pattern = '^' . $prefix;
5288 if ( bad_pattern($pattern) ) {
5290 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5293 $static_side_comment_pattern = $pattern;
5296 } ## end sub make_static_side_comment_pattern
5298 sub make_closing_side_comment_prefix {
5300 # Be sure we have a valid closing side comment prefix
5301 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5302 my $csc_prefix_pattern;
5303 if ( !defined($csc_prefix) ) {
5304 $csc_prefix = '## end';
5305 $csc_prefix_pattern = '^##\s+end';
5308 my $test_csc_prefix = $csc_prefix;
5309 if ( $test_csc_prefix !~ /^#/ ) {
5310 $test_csc_prefix = '#' . $test_csc_prefix;
5313 # make a regex to recognize the prefix
5314 my $test_csc_prefix_pattern = $test_csc_prefix;
5316 # escape any special characters
5317 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5319 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5321 # allow exact number of intermediate spaces to vary
5322 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5324 # make sure we have a good pattern
5325 # if we fail this we probably have an error in escaping
5328 if ( bad_pattern($test_csc_prefix_pattern) ) {
5330 # shouldn't happen..must have screwed up escaping, above
5333 Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5337 # just warn and keep going with defaults
5339 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5341 Warn("Please consider using a simpler -cscp prefix\n");
5342 Warn("Using default -cscp instead; please check output\n");
5345 $csc_prefix = $test_csc_prefix;
5346 $csc_prefix_pattern = $test_csc_prefix_pattern;
5349 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5350 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5352 } ## end sub make_closing_side_comment_prefix
5354 ##################################################
5355 # CODE SECTION 4: receive lines from the tokenizer
5356 ##################################################
5358 { ## begin closure write_line
5362 # Variables used by sub check_sequence_numbers:
5364 my %saw_opening_seqno;
5365 my %saw_closing_seqno;
5368 sub initialize_write_line {
5370 $nesting_depth = undef;
5372 $last_seqno = SEQ_ROOT;
5373 %saw_opening_seqno = ();
5374 %saw_closing_seqno = ();
5377 } ## end sub initialize_write_line
5379 sub check_sequence_numbers {
5381 # Routine for checking sequence numbers. This only needs to be
5382 # done occasionally in DEVEL_MODE to be sure everything is working
5384 my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5385 my $jmax = @{$rtokens} - 1;
5386 return unless ( $jmax >= 0 );
5387 foreach my $j ( 0 .. $jmax ) {
5388 my $seqno = $rtype_sequence->[$j];
5389 my $token = $rtokens->[$j];
5390 my $type = $rtoken_type->[$j];
5391 $seqno = EMPTY_STRING unless ( defined($seqno) );
5393 "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5397 # Sequence numbers are generated for opening tokens, so every opening
5398 # token should be sequenced. Closing tokens will be unsequenced
5399 # if they do not have a matching opening token.
5400 if ( $is_opening_sequence_token{$token}
5406 $err_msg Unexpected opening token without sequence number
5413 # Save starting seqno to identify sequence method:
5414 # New method starts with 2 and has continuous numbering
5415 # Old method starts with >2 and may have gaps
5416 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
5418 if ( $is_opening_sequence_token{$token} ) {
5420 # New method should have continuous numbering
5421 if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5424 $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5428 $last_seqno = $seqno;
5430 # Numbers must be unique
5431 if ( $saw_opening_seqno{$seqno} ) {
5432 my $lno = $saw_opening_seqno{$seqno};
5435 $err_msg Already saw an opening tokens at line $lno with this sequence number
5439 $saw_opening_seqno{$seqno} = $input_line_no;
5442 # only one closing item per seqno
5443 elsif ( $is_closing_sequence_token{$token} ) {
5444 if ( $saw_closing_seqno{$seqno} ) {
5445 my $lno = $saw_closing_seqno{$seqno};
5448 $err_msg Already saw a closing token with this seqno at line $lno
5452 $saw_closing_seqno{$seqno} = $input_line_no;
5454 # Every closing seqno must have an opening seqno
5455 if ( !$saw_opening_seqno{$seqno} ) {
5458 $err_msg Saw a closing token but no opening token with this seqno
5464 # Sequenced items must be opening or closing
5468 $err_msg Unexpected token type with a sequence number
5475 } ## end sub check_sequence_numbers
5477 sub store_block_type {
5478 my ( $self, $block_type, $seqno ) = @_;
5480 return if ( !$block_type );
5482 $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type;
5484 if ( substr( $block_type, 0, 3 ) eq 'sub'
5485 || $rOpts_sub_alias_list )
5487 if ( $block_type =~ /$ASUB_PATTERN/ ) {
5488 $self->[_ris_asub_block_]->{$seqno} = 1;
5490 elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5491 $self->[_ris_sub_block_]->{$seqno} = 1;
5499 # This routine receives lines one-by-one from the tokenizer and stores
5500 # them in a format suitable for further processing. After the last
5501 # line has been sent, the tokenizer will call sub 'finish_formatting'
5502 # to do the actual formatting.
5504 my ( $self, $line_of_tokens_old ) = @_;
5506 my $rLL = $self->[_rLL_];
5507 my $line_of_tokens = {};
5512 _guessed_indentation_level
5518 _square_bracket_depth
5523 $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
5526 my $line_type = $line_of_tokens_old->{_line_type};
5529 my $Klimit = $self->[_Klimit_];
5532 # Handle line of non-code
5533 if ( $line_type ne 'CODE' ) {
5534 $tee_output ||= $rOpts_tee_pod
5535 && substr( $line_type, 0, 3 ) eq 'POD';
5537 $line_of_tokens->{_level_0} = 0;
5538 $line_of_tokens->{_ci_level_0} = 0;
5539 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5540 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5541 $line_of_tokens->{_ended_in_blank_token} = undef;
5545 # Handle line of code
5548 my $rtokens = $line_of_tokens_old->{_rtokens};
5549 my $jmax = @{$rtokens} - 1;
5553 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5555 #----------------------------
5556 # get the tokens on this line
5557 #----------------------------
5558 $self->write_line_inner_loop( $line_of_tokens_old,
5561 # update Klimit for added tokens
5562 $Klimit = @{$rLL} - 1;
5564 } ## end if ( $jmax >= 0 )
5568 $line_of_tokens->{_level_0} = 0;
5569 $line_of_tokens->{_ci_level_0} = 0;
5570 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5571 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5572 $line_of_tokens->{_ended_in_blank_token} = undef;
5577 $rOpts_tee_block_comments
5579 && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5582 $rOpts_tee_side_comments
5584 && $Klimit > $Kfirst
5585 && $rLL->[$Klimit]->[_TYPE_] eq '#';
5587 } ## end if ( $line_type eq 'CODE')
5589 # Finish storing line variables
5590 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
5591 $self->[_Klimit_] = $Klimit;
5592 my $rlines = $self->[_rlines_];
5593 push @{$rlines}, $line_of_tokens;
5596 my $fh_tee = $self->[_fh_tee_];
5597 my $line_text = $line_of_tokens_old->{_line_text};
5598 $fh_tee->print($line_text) if ($fh_tee);
5602 } ## end sub write_line
5604 sub write_line_inner_loop {
5605 my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
5607 #---------------------------------------------------------------------
5608 # Copy the tokens on one line received from the tokenizer to their new
5609 # storage locations.
5610 #---------------------------------------------------------------------
5613 # $line_of_tokens_old = line received from tokenizer
5614 # $line_of_tokens = line of tokens being formed for formatter
5616 my $rtokens = $line_of_tokens_old->{_rtokens};
5617 my $jmax = @{$rtokens} - 1;
5620 # safety check; shouldn't happen
5621 DEVEL_MODE && Fault("unexpected jmax=$jmax\n");
5625 my $line_number = $line_of_tokens_old->{_line_number};
5626 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
5627 my $rblock_type = $line_of_tokens_old->{_rblock_type};
5628 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
5629 my $rlevels = $line_of_tokens_old->{_rlevels};
5630 my $rci_levels = $line_of_tokens_old->{_rci_levels};
5632 my $rLL = $self->[_rLL_];
5633 my $rSS = $self->[_rSS_];
5634 my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5637 && check_sequence_numbers( $rtokens, $rtoken_type,
5638 $rtype_sequence, $line_number );
5640 # Find the starting nesting depth ...
5641 # It must be the value of variable 'level' of the first token
5642 # because the nesting depth is used as a token tag in the
5643 # vertical aligner and is compared to actual levels.
5644 # So vertical alignment problems will occur with any other
5646 if ( !defined($nesting_depth) ) {
5647 $nesting_depth = $rlevels->[0];
5648 $nesting_depth = 0 if ( $nesting_depth < 0 );
5649 $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
5652 foreach my $j ( 0 .. $jmax ) {
5654 # Do not clip the 'level' variable yet. We will do this
5655 # later, in sub 'store_token_to_go'. The reason is that in
5656 # files with level errors, the logic in 'weld_cuddled_else'
5657 # uses a stack logic that will give bad welds if we clip
5659 ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
5661 # Handle tokens with sequence numbers ...
5662 my $seqno = $rtype_sequence->[$j];
5664 my $token = $rtokens->[$j];
5666 if ( $is_opening_token{$token} ) {
5667 $self->[_K_opening_container_]->{$seqno} = @{$rLL};
5668 $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
5671 # Save a sequenced block type at its opening token.
5672 # Note that unsequenced block types can occur in
5673 # unbalanced code with errors but are ignored here.
5674 $self->store_block_type( $rblock_type->[$j], $seqno )
5675 if ( $rblock_type->[$j] );
5677 elsif ( $is_closing_token{$token} ) {
5679 # The opening depth should always be defined, and
5680 # it should equal $nesting_depth-1. To protect
5681 # against unforseen error conditions, however, we
5682 # will check this and fix things if necessary. For
5683 # a test case see issue c055.
5684 my $opening_depth = $rdepth_of_opening_seqno->[$seqno];
5685 if ( !defined($opening_depth) ) {
5686 $opening_depth = $nesting_depth - 1;
5687 $opening_depth = 0 if ( $opening_depth < 0 );
5688 $rdepth_of_opening_seqno->[$seqno] = $opening_depth;
5690 # This is not fatal but should not happen. The
5691 # tokenizer generates sequence numbers
5692 # incrementally upon encountering each new
5693 # opening token, so every positive sequence
5694 # number should correspond to an opening token.
5695 DEVEL_MODE && Fault(<<EOM);
5696 No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
5699 $self->[_K_closing_container_]->{$seqno} = @{$rLL};
5700 $nesting_depth = $opening_depth;
5703 elsif ( $token eq '?' ) {
5705 elsif ( $token eq ':' ) {
5709 # The only sequenced types output by the tokenizer are
5710 # the opening & closing containers and the ternary
5711 # types. So we would only get here if the tokenizer has
5712 # been changed to mark some other tokens with sequence
5713 # numbers, or if an error has been introduced in a
5714 # hash such as %is_opening_container
5716 DEVEL_MODE && Fault(<<EOM);
5717 Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
5718 Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
5723 $self->[_Iss_opening_]->[$seqno] = @{$rSS};
5725 # For efficiency, we find the maximum level of
5726 # opening tokens of any type. The actual maximum
5727 # level will be that of their contents which is 1
5728 # greater. That will be fixed in sub
5729 # 'finish_formatting'.
5730 my $level = $rlevels->[$j];
5731 if ( $level > $self->[_maximum_level_] ) {
5732 $self->[_maximum_level_] = $level;
5733 $self->[_maximum_level_at_line_] = $line_number;
5736 else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} }
5737 push @{$rSS}, $sign * $seqno;
5741 $seqno = EMPTY_STRING unless ( defined($seqno) );
5746 _TOKEN_, _TYPE_, _TYPE_SEQUENCE_,
5747 _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
5750 $rtokens->[$j], $rtoken_type->[$j], $seqno, $rlevels->[$j],
5751 $rci_levels->[$j], $line_number - 1,
5753 push @{$rLL}, \@tokary;
5754 } ## end foreach my $j ( 0 .. $jmax )
5756 # Need to remember if we can trim the input line
5757 $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b';
5759 # Values needed by Logger
5760 $line_of_tokens->{_level_0} = $rlevels->[0];
5761 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
5762 $line_of_tokens->{_nesting_blocks_0} =
5763 $line_of_tokens_old->{_nesting_blocks_0};
5764 $line_of_tokens->{_nesting_tokens_0} =
5765 $line_of_tokens_old->{_nesting_tokens_0};
5769 } ## end sub write_line_inner_loop
5771 } ## end closure write_line
5773 #############################################
5774 # CODE SECTION 5: Pre-process the entire file
5775 #############################################
5777 sub finish_formatting {
5779 my ( $self, $severe_error ) = @_;
5781 # The file has been tokenized and is ready to be formatted.
5782 # All of the relevant data is stored in $self, ready to go.
5784 # Some of the code in sub break_lists is not robust enough to process code
5785 # with arbitrary brace errors. The simplest fix is to just return the file
5786 # verbatim if there are brace errors. This fixes issue c160.
5787 $severe_error ||= get_saw_brace_error();
5789 # Check the maximum level. If it is extremely large we will give up and
5790 # output the file verbatim. Note that the actual maximum level is 1
5791 # greater than the saved value, so we fix that here.
5792 $self->[_maximum_level_] += 1;
5793 my $maximum_level = $self->[_maximum_level_];
5794 my $maximum_table_index = $#maximum_line_length_at_level;
5795 if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
5796 $severe_error ||= 1;
5798 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
5799 Something may be wrong; formatting will be skipped.
5803 # output file verbatim if severe error or no formatting requested
5804 if ( $severe_error || $rOpts->{notidy} ) {
5805 $self->dump_verbatim();
5806 $self->wrapup($severe_error);
5810 # Update the 'save_logfile' flag based to include any tokenization errors.
5811 # We can save time by skipping logfile calls if it is not going to be saved.
5812 my $logger_object = $self->[_logger_object_];
5813 if ($logger_object) {
5814 $self->[_save_logfile_] = $logger_object->get_save_logfile();
5818 my $rix_side_comments = $self->set_CODE_type();
5820 $self->find_non_indenting_braces($rix_side_comments);
5822 # Handle any requested side comment deletions. It is easier to get
5823 # this done here rather than farther down the pipeline because IO
5824 # lines take a different route, and because lines with deleted HSC
5825 # become BL lines. We have already handled any tee requests in sub
5826 # getline, so it is safe to delete side comments now.
5827 $self->delete_side_comments($rix_side_comments)
5828 if ( $rOpts_delete_side_comments
5829 || $rOpts_delete_closing_side_comments );
5832 # Verify that the line hash does not have any unknown keys.
5833 $self->check_line_hashes() if (DEVEL_MODE);
5836 # Make a pass through all tokens, adding or deleting any whitespace as
5837 # required. Also make any other changes, such as adding semicolons.
5838 # All token changes must be made here so that the token data structure
5839 # remains fixed for the rest of this iteration.
5840 my ( $error, $rqw_lines ) = $self->respace_tokens();
5842 $self->dump_verbatim();
5847 $self->find_multiline_qw($rqw_lines);
5850 $self->examine_vertical_tightness_flags();
5852 $self->set_excluded_lp_containers();
5854 $self->keep_old_line_breaks();
5856 # Implement any welding needed for the -wn or -cb options
5857 $self->weld_containers();
5859 # Collect info needed to implement the -xlp style
5860 $self->xlp_collapsed_lengths()
5861 if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
5863 # Locate small nested blocks which should not be broken
5864 $self->mark_short_nested_blocks();
5866 $self->special_indentation_adjustments();
5868 # Verify that the main token array looks OK. If this ever causes a fault
5869 # then place similar checks before the sub calls above to localize the
5871 $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
5873 # Finishes formatting and write the result to the line sink.
5874 # Eventually this call should just change the 'rlines' data according to the
5875 # new line breaks and then return so that we can do an internal iteration
5876 # before continuing with the next stages of formatting.
5877 $self->process_all_lines();
5879 # A final routine to tie up any loose ends
5882 } ## end sub finish_formatting
5887 # Examine each line of code and set a flag '$CODE_type' to describe it.
5888 # Also return a list of lines with side comments.
5890 my $rLL = $self->[_rLL_];
5891 my $Klimit = $self->[_Klimit_];
5892 my $rlines = $self->[_rlines_];
5893 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5895 my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
5896 my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
5897 my $rOpts_static_block_comment_prefix =
5898 $rOpts->{'static-block-comment-prefix'};
5900 # Remember indexes of lines with side comments
5901 my @ix_side_comments;
5903 my $In_format_skipping_section = 0;
5904 my $Saw_VERSION_in_this_file = 0;
5905 my $has_side_comment = 0;
5906 my ( $Kfirst, $Klast );
5909 # Loop to set CODE_type
5911 # Possible CODE_types
5912 # 'VB' = Verbatim - line goes out verbatim (a quote)
5913 # 'FS' = Format Skipping - line goes out verbatim
5915 # 'HSC' = Hanging Side Comment - fix this hanging side comment
5916 # 'SBCX'= Static Block Comment Without Leading Space
5917 # 'SBC' = Static Block Comment
5918 # 'BC' = Block Comment - an ordinary full line comment
5919 # 'IO' = Indent Only - line goes out unchanged except for indentation
5920 # 'NIN' = No Internal Newlines - line does not get broken
5921 # 'VER' = VERSION statement
5922 # '' = ordinary line of code with no restrictions
5925 foreach my $line_of_tokens ( @{$rlines} ) {
5927 my $line_type = $line_of_tokens->{_line_type};
5929 my $Last_line_had_side_comment = $has_side_comment;
5930 if ($has_side_comment) {
5931 push @ix_side_comments, $ix_line - 1;
5932 $has_side_comment = 0;
5935 my $last_CODE_type = $CODE_type;
5936 $CODE_type = EMPTY_STRING;
5938 if ( $line_type ne 'CODE' ) {
5942 my $Klast_prev = $Klast;
5944 my $rK_range = $line_of_tokens->{_rK_range};
5945 ( $Kfirst, $Klast ) = @{$rK_range};
5947 my $input_line = $line_of_tokens->{_line_text};
5948 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
5950 my $is_block_comment = 0;
5951 if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5952 if ( $jmax == 0 ) { $is_block_comment = 1; }
5953 else { $has_side_comment = 1 }
5956 # Write line verbatim if we are in a formatting skip section
5957 if ($In_format_skipping_section) {
5959 # Note: extra space appended to comment simplifies pattern matching
5963 # optional fast pre-check
5964 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
5965 || $rOpts_format_skipping_end )
5967 && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
5968 /$format_skipping_pattern_end/
5971 $In_format_skipping_section = 0;
5972 my $input_line_no = $line_of_tokens->{_line_number};
5973 write_logfile_entry(
5974 "Line $input_line_no: Exiting format-skipping section\n");
5980 # Check for a continued quote..
5981 if ( $line_of_tokens->{_starting_in_quote} ) {
5983 # A line which is entirely a quote or pattern must go out
5984 # verbatim. Note: the \n is contained in $input_line.
5986 if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
5987 my $input_line_number = $line_of_tokens->{_line_number};
5988 $self->note_embedded_tab($input_line_number);
5995 # See if we are entering a formatting skip section
5999 # optional fast pre-check
6000 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
6001 || $rOpts_format_skipping_begin )
6003 && $rOpts_format_skipping
6004 && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
6005 /$format_skipping_pattern_begin/
6008 $In_format_skipping_section = 1;
6009 my $input_line_no = $line_of_tokens->{_line_number};
6010 write_logfile_entry(
6011 "Line $input_line_no: Entering format-skipping section\n");
6016 # ignore trailing blank tokens (they will get deleted later)
6017 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
6028 if ($is_block_comment) {
6030 # see if this is a static block comment (starts with ## by default)
6031 my $is_static_block_comment = 0;
6032 my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
6035 # optional fast pre-check
6037 substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
6038 || $rOpts_static_block_comment_prefix
6041 && $rOpts_static_block_comments
6042 && $input_line =~ /$static_block_comment_pattern/
6045 $is_static_block_comment = 1;
6048 # Check for comments which are line directives
6049 # Treat exactly as static block comments without leading space
6050 # reference: perlsyn, near end, section Plain Old Comments (Not!)
6051 # example: '# line 42 "new_filename.plx"'
6054 && $input_line =~ /^\# \s*
6056 (?:\s("?)([^"]+)\2)? \s*
6060 $is_static_block_comment = 1;
6063 # look for hanging side comment ...
6065 $Last_line_had_side_comment # last line had side comment
6066 && !$no_leading_space # there is some leading space
6068 $is_static_block_comment # do not make static comment hanging
6072 # continuing an existing HSC chain?
6073 if ( $last_CODE_type eq 'HSC' ) {
6074 $has_side_comment = 1;
6079 # starting a new HSC chain?
6082 $rOpts->{'hanging-side-comments'} # user is allowing
6083 # hanging side comments
6086 && ( defined($Klast_prev) && $Klast_prev > 1 )
6088 # and the previous side comment was not static (issue c070)
6090 $rOpts->{'static-side-comments'}
6091 && $rLL->[$Klast_prev]->[_TOKEN_] =~
6092 /$static_side_comment_pattern/
6098 # and it is not a closing side comment (issue c070).
6099 my $K_penult = $Klast_prev - 1;
6100 $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
6102 ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
6103 && $rLL->[$K_penult]->[_TYPE_] eq '}'
6104 && $rLL->[$Klast_prev]->[_TOKEN_] =~
6105 /$closing_side_comment_prefix_pattern/ );
6107 if ( !$follows_csc ) {
6108 $has_side_comment = 1;
6115 if ($is_static_block_comment) {
6116 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
6119 elsif ($Last_line_had_side_comment
6120 && !$rOpts_maximum_consecutive_blank_lines
6121 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
6123 # Emergency fix to keep a block comment from becoming a hanging
6124 # side comment. This fix is for the case that blank lines
6125 # cannot be inserted. There is related code in sub
6126 # 'process_line_of_CODE'
6127 $CODE_type = 'SBCX';
6136 # End of comments. Handle a line of normal code:
6138 if ($rOpts_indent_only) {
6143 if ( !$rOpts_add_newlines ) {
6148 # Patch needed for MakeMaker. Do not break a statement
6149 # in which $VERSION may be calculated. See MakeMaker.pm;
6150 # this is based on the coding in it.
6151 # The first line of a file that matches this will be eval'd:
6152 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
6154 # *VERSION = \'1.01';
6155 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
6156 # We will pass such a line straight through without breaking
6157 # it unless -npvl is used.
6159 # Patch for problem reported in RT #81866, where files
6160 # had been flattened into a single line and couldn't be
6161 # tidied without -npvl. There are two parts to this patch:
6162 # First, it is not done for a really long line (80 tokens for now).
6163 # Second, we will only allow up to one semicolon
6164 # before the VERSION. We need to allow at least one semicolon
6165 # for statements like this:
6166 # require Exporter; our $VERSION = $Exporter::VERSION;
6167 # where both statements must be on a single line for MakeMaker
6169 if ( !$Saw_VERSION_in_this_file
6172 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
6174 $Saw_VERSION_in_this_file = 1;
6175 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
6177 # This code type has lower priority than others
6183 $line_of_tokens->{_code_type} = $CODE_type;
6186 if ($has_side_comment) {
6187 push @ix_side_comments, $ix_line;
6190 return \@ix_side_comments;
6191 } ## end sub set_CODE_type
6193 sub find_non_indenting_braces {
6195 my ( $self, $rix_side_comments ) = @_;
6196 return unless ( $rOpts->{'non-indenting-braces'} );
6197 my $rLL = $self->[_rLL_];
6198 my $Klimit = $self->[_Klimit_];
6199 return unless ( defined($rLL) && @{$rLL} );
6200 my $rlines = $self->[_rlines_];
6201 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6202 my $rseqno_non_indenting_brace_by_ix =
6203 $self->[_rseqno_non_indenting_brace_by_ix_];
6205 foreach my $ix ( @{$rix_side_comments} ) {
6206 my $line_of_tokens = $rlines->[$ix];
6207 my $line_type = $line_of_tokens->{_line_type};
6208 if ( $line_type ne 'CODE' ) {
6211 DEVEL_MODE && Fault("unexpected line_type=$line_type\n");
6214 my $CODE_type = $line_of_tokens->{_code_type};
6215 my $rK_range = $line_of_tokens->{_rK_range};
6216 my ( $Kfirst, $Klast ) = @{$rK_range};
6217 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
6220 DEVEL_MODE && Fault("did not get a comment\n");
6223 next unless ( $Klast > $Kfirst ); # maybe HSC
6224 my $token_sc = $rLL->[$Klast]->[_TOKEN_];
6225 my $K_m = $Klast - 1;
6226 my $type_m = $rLL->[$K_m]->[_TYPE_];
6227 if ( $type_m eq 'b' && $K_m > $Kfirst ) {
6229 $type_m = $rLL->[$K_m]->[_TYPE_];
6231 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
6233 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
6235 # The pattern ends in \s but we have removed the newline, so
6236 # we added it back for the match. That way we require an exact
6237 # match to the special string and also allow additional text.
6240 && $is_opening_type{$type_m}
6241 && $token_sc =~ /$non_indenting_brace_pattern/ )
6243 $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
6248 } ## end sub find_non_indenting_braces
6250 sub delete_side_comments {
6251 my ( $self, $rix_side_comments ) = @_;
6253 # Given a list of indexes of lines with side comments, handle any
6254 # requested side comment deletions.
6256 my $rLL = $self->[_rLL_];
6257 my $rlines = $self->[_rlines_];
6258 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6259 my $rseqno_non_indenting_brace_by_ix =
6260 $self->[_rseqno_non_indenting_brace_by_ix_];
6262 foreach my $ix ( @{$rix_side_comments} ) {
6263 my $line_of_tokens = $rlines->[$ix];
6264 my $line_type = $line_of_tokens->{_line_type};
6266 # This fault shouldn't happen because we only saved CODE lines with
6267 # side comments in the TASK 1 loop above.
6268 if ( $line_type ne 'CODE' ) {
6272 Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
6278 my $CODE_type = $line_of_tokens->{_code_type};
6279 my $rK_range = $line_of_tokens->{_rK_range};
6280 my ( $Kfirst, $Klast ) = @{$rK_range};
6282 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
6286 Did not find side comment near line $lno while deleting side comments
6292 my $delete_side_comment =
6293 $rOpts_delete_side_comments
6294 && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
6296 || $CODE_type eq 'HSC'
6297 || $CODE_type eq 'IO'
6298 || $CODE_type eq 'NIN' );
6300 # Do not delete special control side comments
6301 if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
6302 $delete_side_comment = 0;
6306 $rOpts_delete_closing_side_comments
6307 && !$delete_side_comment
6310 || $CODE_type eq 'HSC'
6311 || $CODE_type eq 'IO'
6312 || $CODE_type eq 'NIN' )
6315 my $token = $rLL->[$Klast]->[_TOKEN_];
6316 my $K_m = $Klast - 1;
6317 my $type_m = $rLL->[$K_m]->[_TYPE_];
6318 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
6319 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
6321 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
6323 && $token =~ /$closing_side_comment_prefix_pattern/
6324 && $block_type_m =~ /$closing_side_comment_list_pattern/ )
6326 $delete_side_comment = 1;
6329 } ## end if ( $rOpts_delete_closing_side_comments...)
6331 if ($delete_side_comment) {
6333 # We are actually just changing the side comment to a blank.
6334 # This may produce multiple blanks in a row, but sub respace_tokens
6335 # will check for this and fix it.
6336 $rLL->[$Klast]->[_TYPE_] = 'b';
6337 $rLL->[$Klast]->[_TOKEN_] = SPACE;
6339 # The -io option outputs the line text, so we have to update
6340 # the line text so that the comment does not reappear.
6341 if ( $CODE_type eq 'IO' ) {
6342 my $line = EMPTY_STRING;
6343 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
6344 $line .= $rLL->[$KK]->[_TOKEN_];
6347 $line_of_tokens->{_line_text} = $line . "\n";
6350 # If we delete a hanging side comment the line becomes blank.
6351 if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
6355 } ## end sub delete_side_comments
6359 my $rlines = $self->[_rlines_];
6360 foreach my $line ( @{$rlines} ) {
6361 my $input_line = $line->{_line_text};
6362 $self->write_unindented_line($input_line);
6371 my %is_nonlist_keyword;
6372 my %is_nonlist_type;
6374 my %is_unexpected_equals;
6378 # added 'U' to fix cases b1125 b1126 b1127
6380 @{wU}{@q} = (1) x scalar(@q);
6382 @q = qw(w i q Q G C Z);
6383 @{wiq}{@q} = (1) x scalar(@q);
6386 @{is_wit}{@q} = (1) x scalar(@q);
6389 @{is_sigil}{@q} = (1) x scalar(@q);
6391 # Parens following these keywords will not be marked as lists. Note that
6392 # 'for' is not included and is handled separately, by including 'f' in the
6393 # hash %is_counted_type, since it may or may not be a c-style for loop.
6394 @q = qw( if elsif unless and or );
6395 @is_nonlist_keyword{@q} = (1) x scalar(@q);
6397 # Parens following these types will not be marked as lists
6399 @is_nonlist_type{@q} = (1) x scalar(@q);
6402 @is_s_y_m_slash{@q} = (1) x scalar(@q);
6405 @is_unexpected_equals{@q} = (1) x scalar(@q);
6409 { #<<< begin clousure respace_tokens
6411 my $rLL_new; # This will be the new array of tokens
6413 # These are variables in $self
6415 my $length_function;
6416 my $is_encoded_data;
6418 my $K_closing_ternary;
6419 my $K_opening_ternary;
6420 my $rchildren_of_seqno;
6421 my $rhas_broken_code_block;
6422 my $rhas_broken_list;
6423 my $rhas_broken_list_with_lec;
6424 my $rhas_code_block;
6427 my $ris_assigned_structure;
6428 my $ris_broken_container;
6429 my $ris_excluded_lp_container;
6430 my $ris_list_by_seqno;
6431 my $ris_permanently_broken;
6432 my $rlec_count_by_seqno;
6434 my $rparent_of_seqno;
6435 my $rtype_count_by_seqno;
6436 my $rblock_type_of_seqno;
6438 my $K_opening_container;
6439 my $K_closing_container;
6441 my %K_first_here_doc_by_seqno;
6443 my $last_nonblank_code_type;
6444 my $last_nonblank_code_token;
6445 my $last_nonblank_block_type;
6446 my $last_last_nonblank_code_type;
6447 my $last_last_nonblank_code_token;
6450 my %K_old_opening_by_seqno;
6454 my $cumulative_length;
6456 # Variables holding the current line info
6463 my $rwhitespace_flags;
6465 sub initialize_respace_tokens_closure {
6469 $rLL_new = []; # This is the new array
6471 $rLL = $self->[_rLL_];
6472 $length_function = $self->[_length_function_];
6473 $is_encoded_data = $self->[_is_encoded_data_];
6475 $K_closing_ternary = $self->[_K_closing_ternary_];
6476 $K_opening_ternary = $self->[_K_opening_ternary_];
6477 $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
6478 $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
6479 $rhas_broken_list = $self->[_rhas_broken_list_];
6480 $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
6481 $rhas_code_block = $self->[_rhas_code_block_];
6482 $rhas_list = $self->[_rhas_list_];
6483 $rhas_ternary = $self->[_rhas_ternary_];
6484 $ris_assigned_structure = $self->[_ris_assigned_structure_];
6485 $ris_broken_container = $self->[_ris_broken_container_];
6486 $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
6487 $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
6488 $ris_permanently_broken = $self->[_ris_permanently_broken_];
6489 $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
6490 $roverride_cab3 = $self->[_roverride_cab3_];
6491 $rparent_of_seqno = $self->[_rparent_of_seqno_];
6492 $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
6493 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6495 # Note that $K_opening_container and $K_closing_container have values
6496 # defined in sub get_line() for the previous K indexes. They were needed
6497 # in case option 'indent-only' was set, and we didn't get here. We no longer
6498 # need those and will eliminate them now to avoid any possible mixing of
6499 # old and new values.
6500 $K_opening_container = $self->[_K_opening_container_] = {};
6501 $K_closing_container = $self->[_K_closing_container_] = {};
6503 %K_first_here_doc_by_seqno = ();
6505 $last_nonblank_code_type = ';';
6506 $last_nonblank_code_token = ';';
6507 $last_nonblank_block_type = EMPTY_STRING;
6508 $last_last_nonblank_code_type = ';';
6509 $last_last_nonblank_code_token = ';';
6512 %K_old_opening_by_seqno = (); # Note: old K index
6514 $depth_next_max = 0;
6516 # we will be setting token lengths as we go
6517 $cumulative_length = 0;
6519 $Ktoken_vars = undef; # the old K value of $rtoken_vars
6520 $Kfirst_old = undef; # min K of old line
6521 $Klast_old = undef; # max K of old line
6522 $Klast_old_code = undef; # K of last token if side comment
6523 $CODE_type = EMPTY_STRING;
6525 # Set the whitespace flags, which indicate the token spacing preference.
6526 $rwhitespace_flags = $self->set_whitespace_flags();
6530 } ## end sub initialize_respace_tokens_closure
6532 sub respace_tokens {
6536 #--------------------------------------------------------------------------
6537 # This routine is called once per file to do as much formatting as possible
6538 # before new line breaks are set.
6539 #--------------------------------------------------------------------------
6541 # Return parameters:
6542 # Set $severe_error=true if processing must terminate immediately
6543 my ( $severe_error, $rqw_lines );
6545 # We change any spaces in --indent-only mode
6546 if ( $rOpts->{'indent-only'} ) {
6547 return ( $severe_error, $rqw_lines );
6550 # This routine makes all necessary and possible changes to the tokenization
6551 # after the initial tokenization of the file. This is a tedious routine,
6552 # but basically it consists of inserting and deleting whitespace between
6553 # nonblank tokens according to the selected parameters. In a few cases
6554 # non-space characters are added, deleted or modified.
6556 # The goal of this routine is to create a new token array which only needs
6557 # the definition of new line breaks and padding to complete formatting. In
6558 # a few cases we have to cheat a little to achieve this goal. In
6559 # particular, we may not know if a semicolon will be needed, because it
6560 # depends on how the line breaks go. To handle this, we include the
6561 # semicolon as a 'phantom' which can be displayed as normal or as an empty
6564 # Method: The old tokens are copied one-by-one, with changes, from the old
6565 # linear storage array $rLL to a new array $rLL_new.
6567 # (re-)initialize closure variables for this problem
6568 $self->initialize_respace_tokens_closure();
6570 #--------------------------------
6571 # Main over all lines of the file
6572 #--------------------------------
6573 my $rlines = $self->[_rlines_];
6574 my $line_type = EMPTY_STRING;
6577 foreach my $line_of_tokens ( @{$rlines} ) {
6579 my $input_line_number = $line_of_tokens->{_line_number};
6580 my $last_line_type = $line_type;
6581 $line_type = $line_of_tokens->{_line_type};
6582 next unless ( $line_type eq 'CODE' );
6583 my $last_CODE_type = $CODE_type;
6584 $CODE_type = $line_of_tokens->{_code_type};
6586 if ( $CODE_type eq 'BL' ) {
6587 my $seqno = $seqno_stack{ $depth_next - 1 };
6588 if ( defined($seqno) ) {
6589 $self->[_rblank_and_comment_count_]->{$seqno} += 1;
6590 $self->set_permanently_broken($seqno)
6591 if (!$ris_permanently_broken->{$seqno}
6592 && $rOpts_maximum_consecutive_blank_lines );
6596 my $rK_range = $line_of_tokens->{_rK_range};
6597 my ( $Kfirst, $Klast ) = @{$rK_range};
6598 next unless defined($Kfirst);
6599 ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
6600 $Klast_old_code = $Klast_old;
6602 # Be sure an old K value is defined for sub store_token
6603 $Ktoken_vars = $Kfirst;
6605 # Check for correct sequence of token indexes...
6606 # An error here means that sub write_line() did not correctly
6607 # package the tokenized lines as it received them. If we
6608 # get a fault here it has not output a continuous sequence
6609 # of K values. Or a line of CODE may have been mis-marked as
6610 # something else. There is no good way to continue after such an
6612 if ( defined($last_K_out) ) {
6613 if ( $Kfirst != $last_K_out + 1 ) {
6615 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
6618 return ( $severe_error, $rqw_lines );
6623 # The first token should always have been given index 0 by sub
6625 if ( $Kfirst != 0 ) {
6626 Fault("Program Bug: first K is $Kfirst but should be 0");
6629 $last_K_out = $Klast;
6631 # Handle special lines of code
6632 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
6634 # CODE_types are as follows.
6636 # 'VB' = Verbatim - line goes out verbatim
6637 # 'FS' = Format Skipping - line goes out verbatim, no blanks
6638 # 'IO' = Indent Only - only indentation may be changed
6639 # 'NIN' = No Internal Newlines - line does not get broken
6640 # 'HSC'=Hanging Side Comment - fix this hanging side comment
6641 # 'BC'=Block Comment - an ordinary full line comment
6642 # 'SBC'=Static Block Comment - a block comment which does not get
6644 # 'SBCX'=Static Block Comment Without Leading Space
6645 # 'VER'=VERSION statement
6646 # '' or (undefined) - no restructions
6648 # For a hanging side comment we insert an empty quote before
6649 # the comment so that it becomes a normal side comment and
6650 # will be aligned by the vertical aligner
6651 if ( $CODE_type eq 'HSC' ) {
6653 # Safety Check: This must be a line with one token (a comment)
6654 my $rvars_Kfirst = $rLL->[$Kfirst];
6655 if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) {
6657 # Note that even if the flag 'noadd-whitespace' is set, we
6658 # will make an exception here and allow a blank to be
6659 # inserted to push the comment to the right. We can think
6660 # of this as an adjustment of indentation rather than
6661 # whitespace between tokens. This will also prevent the
6662 # hanging side comment from getting converted to a block
6663 # comment if whitespace gets deleted, as for example with
6664 # the -extrude and -mangle options.
6666 copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
6667 $self->store_token($rcopy);
6668 $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
6669 $self->store_token($rcopy);
6670 $self->store_token($rvars_Kfirst);
6675 # This line was mis-marked by sub scan_comment. Catch in
6676 # DEVEL_MODE, otherwise try to repair and keep going.
6678 "Program bug. A hanging side comment has been mismarked"
6681 $CODE_type = EMPTY_STRING;
6682 $line_of_tokens->{_code_type} = $CODE_type;
6686 # Copy tokens unchanged
6687 foreach my $KK ( $Kfirst .. $Klast ) {
6689 $self->store_token( $rLL->[$KK] );
6694 # Handle normal line..
6696 # Define index of last token before any side comment for comma counts
6697 my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
6698 if ( ( $type_end eq '#' || $type_end eq 'b' )
6699 && $Klast_old_code > $Kfirst_old )
6702 if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
6703 && $Klast_old_code > $Kfirst_old )
6709 # Insert any essential whitespace between lines
6710 # if last line was normal CODE.
6711 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
6712 # because comments may disappear.
6713 if ( $last_line_type eq 'CODE' ) {
6714 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
6715 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
6717 is_essential_whitespace(
6718 $last_last_nonblank_code_token,
6719 $last_last_nonblank_code_type,
6720 $last_nonblank_code_token,
6721 $last_nonblank_code_type,
6728 # Copy this first token as blank, but use previous line number
6729 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', SPACE );
6730 $rcopy->[_LINE_INDEX_] =
6731 $rLL_new->[-1]->[_LINE_INDEX_];
6733 # The level and ci_level of newly created spaces should be the
6734 # same as the previous token. Otherwise blinking states can
6735 # be created if the -lp mode is used. See similar coding in
6736 # sub 'store_space_and_token'. Fixes cases b1109 b1110.
6738 $rLL_new->[-1]->[_LEVEL_];
6739 $rcopy->[_CI_LEVEL_] =
6740 $rLL_new->[-1]->[_CI_LEVEL_];
6742 $self->store_token($rcopy);
6746 #-----------------------------------------------
6747 # Inner loop to respace tokens on a line of code
6748 #-----------------------------------------------
6750 # The inner loop is in a separate sub for clarity
6751 $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number );
6755 # finalize data structures
6756 $self->respace_post_loop_ops();
6758 # Reset memory to be the new array
6759 $self->[_rLL_] = $rLL_new;
6761 if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
6762 $self->[_Klimit_] = $Klimit;
6764 # During development, verify that the new array still looks okay.
6765 DEVEL_MODE && $self->check_token_array();
6767 # update the token limits of each line
6768 ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens();
6770 return ( $severe_error, $rqw_lines );
6771 } ## end sub respace_tokens
6773 sub respace_tokens_inner_loop {
6775 my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
6777 #-----------------------------------------------------------------
6778 # Loop to copy all tokens on one line, making any spacing changes,
6779 # while also collecting information needed by later subs.
6780 #-----------------------------------------------------------------
6781 foreach my $KK ( $Kfirst .. $Klast ) {
6783 # TODO: consider eliminating this closure var by passing directly to
6784 # store_token following pattern of store_tokens_to_go.
6787 my $rtoken_vars = $rLL->[$KK];
6788 my $type = $rtoken_vars->[_TYPE_];
6790 # Handle a blank space ...
6791 if ( $type eq 'b' ) {
6793 # Delete it if not wanted by whitespace rules
6794 # or we are deleting all whitespace
6795 # Note that whitespace flag is a flag indicating whether a
6796 # white space BEFORE the token is needed
6797 next if ( $KK >= $Klast ); # skip terminal blank
6798 my $Knext = $KK + 1;
6800 if ($rOpts_freeze_whitespace) {
6801 $self->store_token($rtoken_vars);
6805 my $ws = $rwhitespace_flags->[$Knext];
6807 || $rOpts_delete_old_whitespace )
6810 my $token_next = $rLL->[$Knext]->[_TOKEN_];
6811 my $type_next = $rLL->[$Knext]->[_TYPE_];
6813 my $do_not_delete = is_essential_whitespace(
6814 $last_last_nonblank_code_token,
6815 $last_last_nonblank_code_type,
6816 $last_nonblank_code_token,
6817 $last_nonblank_code_type,
6822 # Note that repeated blanks will get filtered out here
6823 next unless ($do_not_delete);
6826 # make it just one character
6827 $rtoken_vars->[_TOKEN_] = SPACE;
6828 $self->store_token($rtoken_vars);
6832 my $token = $rtoken_vars->[_TOKEN_];
6834 # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? :
6835 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
6838 if ( $is_closing_token{$token} ) {
6840 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6841 my $block_type = $rblock_type_of_seqno->{$type_sequence};
6843 #---------------------------------------------
6844 # check for semicolon addition in a code block
6845 #---------------------------------------------
6848 # if not preceded by a ';' ..
6849 if ( $last_nonblank_code_type ne ';' ) {
6851 # tentatively insert a semicolon if appropriate
6852 $self->add_phantom_semicolon($KK)
6853 if $rOpts->{'add-semicolons'};
6857 #----------------------------------------------------------
6858 # check for addition/deletion of a trailing comma in a list
6859 #----------------------------------------------------------
6862 # if this is a list ..
6863 my $rtype_count = $rtype_count_by_seqno->{$type_sequence};
6865 && $rtype_count->{','}
6866 && !$rtype_count->{';'}
6867 && !$rtype_count->{'f'} )
6870 # if NOT preceded by a comma..
6871 if ( $last_nonblank_code_type ne ',' ) {
6873 # insert a comma if requested
6874 if ( $rOpts_add_trailing_commas
6875 && %trailing_comma_rules )
6877 $self->add_trailing_comma( $KK, $Kfirst,
6878 $trailing_comma_rules{$token} );
6882 # if preceded by a comma ..
6885 # delete a trailing comma if requested
6887 if ( $rOpts_delete_trailing_commas
6888 && %trailing_comma_rules )
6891 $self->delete_trailing_comma( $KK, $Kfirst,
6892 $trailing_comma_rules{$token} );
6895 # delete a weld-interfering comma if requested
6897 && $rOpts_delete_weld_interfering_commas
6898 && $is_closing_type{
6899 $last_last_nonblank_code_type} )
6901 $self->delete_weld_interfering_comma($KK);
6909 # Modify certain tokens here for whitespace
6910 # The following is not yet done, but could be:
6912 # ( $type =~ /^[wit]$/ )
6913 elsif ( $is_wit{$type} ) {
6915 # change '$ var' to '$var' etc
6916 # change '@ ' to '@'
6917 # Examples: <<snippets/space1.in>>
6918 my $ord = ord( substr( $token, 1, 1 ) );
6921 # quick test for possible blank at second char
6922 $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
6923 || $ord > ORD_PRINTABLE_MAX )
6926 my ( $sigil, $word ) = split /\s+/, $token, 2;
6928 # $sigil =~ /^[\$\&\%\*\@]$/ )
6929 if ( $is_sigil{$sigil} ) {
6931 $token .= $word if ( defined($word) ); # fix c104
6932 $rtoken_vars->[_TOKEN_] = $token;
6936 # Trim certain spaces in identifiers
6937 if ( $type eq 'i' ) {
6941 substr( $token, 0, 3 ) eq 'sub'
6942 || $rOpts_sub_alias_list
6944 && $token =~ /$SUB_PATTERN/
6948 # -spp = 0 : no space before opening prototype paren
6949 # -spp = 1 : stable (follow input spacing)
6950 # -spp = 2 : always space before opening prototype paren
6951 if ( !defined($rOpts_space_prototype_paren)
6952 || $rOpts_space_prototype_paren == 1 )
6956 elsif ( $rOpts_space_prototype_paren == 0 ) {
6957 $token =~ s/\s+\(/\(/;
6959 elsif ( $rOpts_space_prototype_paren == 2 ) {
6963 # one space max, and no tabs
6964 $token =~ s/\s+/ /g;
6965 $rtoken_vars->[_TOKEN_] = $token;
6968 # clean up spaces in package identifiers, like
6969 # "package Bob::Dog;"
6970 elsif ( substr( $token, 0, 7 ) eq 'package'
6971 && $token =~ /^package\s/ )
6973 $token =~ s/\s+/ /g;
6974 $rtoken_vars->[_TOKEN_] = $token;
6977 # trim identifiers of trailing blanks which can occur
6978 # under some unusual circumstances, such as if the
6979 # identifier 'witch' has trailing blanks on input here:
6983 # () # prototype may be on new line ...
6985 my $ord_ch = ord( substr( $token, -1, 1 ) );
6988 # quick check for possible ending space
6989 $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
6990 || $ord_ch > ORD_PRINTABLE_MAX )
6993 $token =~ s/\s+$//g;
6994 $rtoken_vars->[_TOKEN_] = $token;
7000 elsif ( $type eq ';' ) {
7002 # Remove unnecessary semicolons, but not after bare
7003 # blocks, where it could be unsafe if the brace is
7006 $rOpts->{'delete-semicolons'}
7009 $last_nonblank_block_type
7010 && $last_nonblank_code_type eq '}'
7012 $is_block_without_semicolon{
7013 $last_nonblank_block_type}
7014 || $last_nonblank_block_type =~ /$SUB_PATTERN/
7015 || $last_nonblank_block_type =~ /^\w+:$/
7018 || $last_nonblank_code_type eq ';'
7023 # This looks like a deletable semicolon, but even if a
7024 # semicolon can be deleted it is not necessarily best to do
7025 # so. We apply these additional rules for deletion:
7026 # - Always ok to delete a ';' at the end of a line
7027 # - Never delete a ';' before a '#' because it would
7028 # promote it to a block comment.
7029 # - If a semicolon is not at the end of line, then only
7030 # delete if it is followed by another semicolon or closing
7031 # token. This includes the comment rule. It may take
7032 # two passes to get to a final state, but it is a little
7033 # safer. For example, keep the first semicolon here:
7034 # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
7035 # It is not required but adds some clarity.
7036 my $ok_to_delete = 1;
7037 if ( $KK < $Klast ) {
7038 my $Kn = $self->K_next_nonblank($KK);
7039 if ( defined($Kn) && $Kn <= $Klast ) {
7040 my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_];
7041 $ok_to_delete = $next_nonblank_token_type eq ';'
7042 || $next_nonblank_token_type eq '}';
7046 # do not delete only nonblank token in a file
7048 my $Kp = $self->K_previous_code( undef, $rLL_new );
7049 my $Kn = $self->K_next_nonblank($KK);
7050 $ok_to_delete = defined($Kn) || defined($Kp);
7053 if ($ok_to_delete) {
7054 $self->note_deleted_semicolon($input_line_number);
7058 write_logfile_entry("Extra ';'\n");
7063 # Old patch to add space to something like "x10".
7064 # Note: This is now done in the Tokenizer, but this code remains
7066 elsif ( $type eq 'n' ) {
7067 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
7069 $rtoken_vars->[_TOKEN_] = $token;
7072 Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
7078 # check for a qw quote
7079 elsif ( $type eq 'q' ) {
7081 # trim blanks from right of qw quotes
7082 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
7085 $rtoken_vars->[_TOKEN_] = $token;
7086 if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
7087 $self->note_embedded_tab($input_line_number);
7089 if ( $rwhitespace_flags->[$KK] == WS_YES ) {
7090 $self->store_space_and_token($rtoken_vars);
7093 $self->store_token($rtoken_vars);
7096 } ## end if ( $type eq 'q' )
7098 # delete repeated commas if requested
7099 elsif ( $type eq ',' ) {
7100 if ( $last_nonblank_code_type eq ','
7101 && $rOpts->{'delete-repeated-commas'} )
7103 # Could note this deletion as a possible future update:
7104 ## $self->note_deleted_comma($input_line_number);
7108 # remember input line index of first comma if -wtc is used
7109 if (%trailing_comma_rules) {
7110 my $seqno = $seqno_stack{ $depth_next - 1 };
7111 if ( defined($seqno)
7112 && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
7115 $self->[_rfirst_comma_line_index_]->{$seqno} =
7116 $rtoken_vars->[_LINE_INDEX_];
7121 # change 'LABEL :' to 'LABEL:'
7122 elsif ( $type eq 'J' ) {
7124 $rtoken_vars->[_TOKEN_] = $token;
7127 # check a quote for problems
7128 elsif ( $type eq 'Q' ) {
7129 $self->check_Q( $KK, $Kfirst, $input_line_number )
7130 if ( $self->[_save_logfile_] );
7133 # Store this token with possible previous blank
7134 if ( $rwhitespace_flags->[$KK] == WS_YES ) {
7135 $self->store_space_and_token($rtoken_vars);
7138 $self->store_token($rtoken_vars);
7143 } ## end sub respace_tokens_inner_loop
7145 sub respace_post_loop_ops {
7149 # Walk backwards through the tokens, making forward links to sequence items.
7150 if ( @{$rLL_new} ) {
7152 foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) {
7153 $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
7154 if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
7156 $self->[_K_first_seq_item_] = $KNEXT;
7159 # Find and remember lists by sequence number
7161 foreach my $seqno ( keys %{$K_opening_container} ) {
7162 my $K_opening = $K_opening_container->{$seqno};
7163 next unless defined($K_opening);
7165 # code errors may leave undefined closing tokens
7166 my $K_closing = $K_closing_container->{$seqno};
7167 next unless defined($K_closing);
7169 my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
7170 my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
7171 my $line_diff = $lx_close - $lx_open;
7172 $ris_broken_container->{$seqno} = $line_diff;
7174 # See if this is a list
7176 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7178 my $comma_count = $rtype_count->{','};
7179 my $fat_comma_count = $rtype_count->{'=>'};
7180 my $semicolon_count = $rtype_count->{';'};
7181 if ( $rtype_count->{'f'} ) {
7182 $semicolon_count += $rtype_count->{'f'};
7183 $is_C_style_for{$seqno} = 1;
7186 # We will define a list to be a container with one or more commas
7187 # and no semicolons. Note that we have included the semicolons
7188 # in a 'for' container in the semicolon count to keep c-style for
7189 # statements from being formatted as lists.
7190 if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
7193 # We need to do one more check for a parenthesized list:
7194 # At an opening paren following certain tokens, such as 'if',
7195 # we do not want to format the contents as a list.
7196 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
7197 my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
7198 if ( defined($Kp) ) {
7199 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7200 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
7203 ? !$is_nonlist_keyword{$token_p}
7204 : !$is_nonlist_type{$type_p};
7210 # Look for a block brace marked as uncertain. If the tokenizer thinks
7211 # its guess is uncertain for the type of a brace following an unknown
7212 # bareword then it adds a trailing space as a signal. We can fix the
7213 # type here now that we have had a better look at the contents of the
7214 # container. This fixes case b1085. To find the corresponding code in
7215 # Tokenizer.pm search for 'b1085' with an editor.
7216 my $block_type = $rblock_type_of_seqno->{$seqno};
7217 if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
7219 # Always remove the trailing space
7220 $block_type =~ s/\s+$//;
7222 # Try to filter out parenless sub calls
7223 my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
7225 if ( defined($Knn1) ) {
7226 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
7228 my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
7229 my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
7231 # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
7232 if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
7236 # Convert to a hash brace if it looks like it holds a list
7239 $block_type = EMPTY_STRING;
7241 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
7242 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
7245 $rblock_type_of_seqno->{$seqno} = $block_type;
7248 # Handle a list container
7249 if ( $is_list && !$block_type ) {
7250 $ris_list_by_seqno->{$seqno} = $seqno;
7251 my $seqno_parent = $rparent_of_seqno->{$seqno};
7253 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7256 # for $rhas_list we need to save the minimum depth
7257 if ( !$rhas_list->{$seqno_parent}
7258 || $rhas_list->{$seqno_parent} > $depth )
7260 $rhas_list->{$seqno_parent} = $depth;
7264 $rhas_broken_list->{$seqno_parent} = 1;
7266 # Patch1: We need to mark broken lists with non-terminal
7267 # line-ending commas for the -bbx=2 parameter. This insures
7268 # that the list will stay broken. Otherwise the flag
7269 # -bbx=2 can be unstable. This fixes case b789 and b938.
7271 # Patch2: Updated to also require either one fat comma or
7272 # one more line-ending comma. Fixes cases b1069 b1070
7275 $rlec_count_by_seqno->{$seqno}
7276 && ( $rlec_count_by_seqno->{$seqno} > 1
7277 || $rtype_count_by_seqno->{$seqno}->{'=>'} )
7280 $rhas_broken_list_with_lec->{$seqno_parent} = 1;
7283 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7287 # Handle code blocks ...
7288 # The -lp option needs to know if a container holds a code block
7289 elsif ( $block_type && $rOpts_line_up_parentheses ) {
7290 my $seqno_parent = $rparent_of_seqno->{$seqno};
7291 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7292 $rhas_code_block->{$seqno_parent} = 1;
7293 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
7294 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7299 # Find containers with ternaries, needed for -lp formatting.
7300 foreach my $seqno ( keys %{$K_opening_ternary} ) {
7301 my $seqno_parent = $rparent_of_seqno->{$seqno};
7302 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7303 $rhas_ternary->{$seqno_parent} = 1;
7304 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7308 # Turn off -lp for containers with here-docs with text within a container,
7309 # since they have their own fixed indentation. Fixes case b1081.
7310 if ($rOpts_line_up_parentheses) {
7311 foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
7312 my $Kh = $K_first_here_doc_by_seqno{$seqno};
7313 my $Kc = $K_closing_container->{$seqno};
7314 my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
7315 my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
7316 next if ( $line_Kh == $line_Kc );
7317 $ris_excluded_lp_container->{$seqno} = 1;
7321 # Set a flag to turn off -cab=3 in complex structures. Otherwise,
7322 # instability can occur. When it is overridden the behavior of the closest
7323 # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
7324 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
7325 foreach my $seqno ( keys %{$K_opening_container} ) {
7327 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7328 next unless ( $rtype_count && $rtype_count->{'=>'} );
7330 # override -cab=3 if this contains a sub-list
7331 if ( $rhas_list->{$seqno} ) {
7332 $roverride_cab3->{$seqno} = 1;
7335 # or if this is a sub-list of its parent container
7337 my $seqno_parent = $rparent_of_seqno->{$seqno};
7338 if ( defined($seqno_parent)
7339 && $ris_list_by_seqno->{$seqno_parent} )
7341 $roverride_cab3->{$seqno} = 1;
7347 # Add -ci to C-style for loops (issue c154)
7348 # This is much easier to do here than in the tokenizer.
7349 foreach my $seqno ( keys %is_C_style_for ) {
7350 my $K_opening = $K_opening_container->{$seqno};
7351 my $K_closing = $K_closing_container->{$seqno};
7352 my $type_last = 'f';
7353 for my $KK ( $K_opening + 1 .. $K_closing - 1 ) {
7354 $rLL_new->[$KK]->[_CI_LEVEL_] = $type_last eq 'f' ? 0 : 1;
7355 my $type = $rLL_new->[$KK]->[_TYPE_];
7356 if ( $type ne 'b' && $type ne '#' ) { $type_last = $type }
7361 } ## end sub respace_post_loop_ops
7363 sub set_permanently_broken {
7364 my ( $self, $seqno ) = @_;
7365 while ( defined($seqno) ) {
7366 $ris_permanently_broken->{$seqno} = 1;
7367 $seqno = $rparent_of_seqno->{$seqno};
7370 } ## end sub set_permanently_broken
7374 my ( $self, $item ) = @_;
7376 #------------------------------------------
7377 # Store one token during respace operations
7378 #------------------------------------------
7381 # $item = ref to a token
7383 # NOTE: this sub is called once per token so coding efficiency is critical.
7385 # The next multiple assignment statements are significantly faster than
7386 # doing them one-by-one.
7401 # Set the token length. Later it may be adjusted again if phantom or
7402 # ignoring side comment lengths.
7404 $is_encoded_data ? $length_function->($token) : length($token);
7407 if ( $type eq 'b' ) {
7409 # Do not output consecutive blanks. This situation should have been
7410 # prevented earlier, but it is worth checking because later routines
7411 # make this assumption.
7412 if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
7418 elsif ( $type eq '#' ) {
7420 # trim comments if necessary
7421 my $ord = ord( substr( $token, -1, 1 ) );
7424 && ( $ord < ORD_PRINTABLE_MIN
7425 || $ord > ORD_PRINTABLE_MAX )
7426 && $token =~ s/\s+$//
7429 $token_length = $length_function->($token);
7430 $item->[_TOKEN_] = $token;
7433 # Mark length of side comments as just 1 if sc lengths are ignored
7434 if ( $rOpts_ignore_side_comment_lengths
7435 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
7439 my $seqno = $seqno_stack{ $depth_next - 1 };
7440 if ( defined($seqno) ) {
7441 $self->[_rblank_and_comment_count_]->{$seqno} += 1
7442 if ( $CODE_type eq 'BC' );
7443 $self->set_permanently_broken($seqno)
7444 if !$ris_permanently_broken->{$seqno};
7448 # handle non-blanks and non-comments
7453 # check for a sequenced item (i.e., container or ?/:)
7454 if ($type_sequence) {
7456 # This will be the index of this item in the new array
7457 my $KK_new = @{$rLL_new};
7459 if ( $is_opening_token{$token} ) {
7461 $K_opening_container->{$type_sequence} = $KK_new;
7462 $block_type = $rblock_type_of_seqno->{$type_sequence};
7464 # Fix for case b1100: Count a line ending in ', [' as having
7465 # a line-ending comma. Otherwise, these commas can be hidden
7466 # with something like --opening-square-bracket-right
7467 if ( $last_nonblank_code_type eq ','
7468 && $Ktoken_vars == $Klast_old_code
7469 && $Ktoken_vars > $Kfirst_old )
7471 $rlec_count_by_seqno->{$type_sequence}++;
7474 if ( $last_nonblank_code_type eq '='
7475 || $last_nonblank_code_type eq '=>' )
7477 $ris_assigned_structure->{$type_sequence} =
7478 $last_nonblank_code_type;
7481 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
7482 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
7483 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
7484 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
7485 $seqno_stack{$depth_next} = $type_sequence;
7486 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
7489 if ( $depth_next > $depth_next_max ) {
7490 $depth_next_max = $depth_next;
7493 elsif ( $is_closing_token{$token} ) {
7495 $K_closing_container->{$type_sequence} = $KK_new;
7496 $block_type = $rblock_type_of_seqno->{$type_sequence};
7498 # Do not include terminal commas in counts
7499 if ( $last_nonblank_code_type eq ','
7500 || $last_nonblank_code_type eq '=>' )
7502 $rtype_count_by_seqno->{$type_sequence}
7503 ->{$last_nonblank_code_type}--;
7505 if ( $Ktoken_vars == $Kfirst_old
7506 && $last_nonblank_code_type eq ','
7507 && $rlec_count_by_seqno->{$type_sequence} )
7509 $rlec_count_by_seqno->{$type_sequence}--;
7513 # Update the stack...
7518 # For ternary, note parent but do not include as child
7519 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
7520 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
7521 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
7523 # These are not yet used but could be useful
7524 if ( $token eq '?' ) {
7525 $K_opening_ternary->{$type_sequence} = $KK_new;
7527 elsif ( $token eq ':' ) {
7528 $K_closing_ternary->{$type_sequence} = $KK_new;
7532 # We really shouldn't arrive here, just being cautious:
7533 # The only sequenced types output by the tokenizer are the
7534 # opening & closing containers and the ternary types. Each
7535 # of those was checked above. So we would only get here
7536 # if the tokenizer has been changed to mark some other
7537 # tokens with sequence numbers.
7540 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
7547 # Remember the most recent two non-blank, non-comment tokens.
7548 # NOTE: the phantom semicolon code may change the output stack
7549 # without updating these values. Phantom semicolons are considered
7550 # the same as blanks for now, but future needs might change that.
7551 # See the related note in sub 'add_phantom_semicolon'.
7552 $last_last_nonblank_code_type = $last_nonblank_code_type;
7553 $last_last_nonblank_code_token = $last_nonblank_code_token;
7555 $last_nonblank_code_type = $type;
7556 $last_nonblank_code_token = $token;
7557 $last_nonblank_block_type = $block_type;
7559 # count selected types
7560 if ( $is_counted_type{$type} ) {
7561 my $seqno = $seqno_stack{ $depth_next - 1 };
7562 if ( defined($seqno) ) {
7563 $rtype_count_by_seqno->{$seqno}->{$type}++;
7565 # Count line-ending commas for -bbx
7566 if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
7567 $rlec_count_by_seqno->{$seqno}++;
7570 # Remember index of first here doc target
7571 if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
7572 my $KK_new = @{$rLL_new};
7573 $K_first_here_doc_by_seqno{$seqno} = $KK_new;
7579 # cumulative length is the length sum including this token
7580 $cumulative_length += $token_length;
7582 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
7583 $item->[_TOKEN_LENGTH_] = $token_length;
7585 # For reference, here is how to get the parent sequence number.
7586 # This is not used because it is slower than finding it on the fly
7587 # in sub parent_seqno_by_K:
7589 # my $seqno_parent =
7590 # $type_sequence && $is_opening_token{$token}
7591 # ? $seqno_stack{ $depth_next - 2 }
7592 # : $seqno_stack{ $depth_next - 1 };
7593 # my $KK = @{$rLL_new};
7594 # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
7596 # and finally, add this item to the new array
7597 push @{$rLL_new}, $item;
7599 } ## end sub store_token
7601 sub store_space_and_token {
7602 my ( $self, $item ) = @_;
7604 # store a token with preceding space if requested and needed
7606 # First store the space
7608 && $rLL_new->[-1]->[_TYPE_] ne 'b'
7609 && $rOpts_add_whitespace )
7611 my $rcopy = [ @{$item} ];
7612 $rcopy->[_TYPE_] = 'b';
7613 $rcopy->[_TOKEN_] = SPACE;
7614 $rcopy->[_TYPE_SEQUENCE_] = EMPTY_STRING;
7616 $rcopy->[_LINE_INDEX_] =
7617 $rLL_new->[-1]->[_LINE_INDEX_];
7619 # Patch 23-Jan-2021 to fix -lp blinkers:
7620 # The level and ci_level of newly created spaces should be the same
7621 # as the previous token. Otherwise the coding for the -lp option
7622 # can create a blinking state in some rare cases.
7624 $rLL_new->[-1]->[_LEVEL_];
7625 $rcopy->[_CI_LEVEL_] =
7626 $rLL_new->[-1]->[_CI_LEVEL_];
7628 $self->store_token($rcopy);
7632 $self->store_token($item);
7634 } ## end sub store_space_and_token
7636 sub add_phantom_semicolon {
7638 my ( $self, $KK ) = @_;
7640 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
7641 return unless ( defined($Kp) );
7643 # we are only adding semicolons for certain block types
7644 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7645 return unless ($type_sequence);
7646 my $block_type = $rblock_type_of_seqno->{$type_sequence};
7647 return unless ($block_type);
7649 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
7650 || $block_type =~ /^(sub|package)/
7651 || $block_type =~ /^\w+\:$/ );
7653 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7654 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
7655 my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
7657 # Do not add a semicolon if...
7661 # it would follow a comment (and be isolated)
7664 # it follows a code block ( because they are not always wanted
7665 # there and may add clutter)
7666 || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
7668 # it would follow a label
7671 # it would be inside a 'format' statement (and cause syntax error)
7673 && $token_p =~ /format/ )
7677 # Do not add a semicolon if it would impede a weld with an immediately
7678 # following closing token...like this
7680 # ^--No semicolon can go here
7682 # look at the previous token... note use of the _NEW rLL array here,
7683 # but sequence numbers are invariant.
7684 my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
7686 # If it is also a CLOSING token we have to look closer...
7689 && $is_closing_token{$token_p}
7691 # we only need to look if there is just one inner container..
7692 && defined( $rchildren_of_seqno->{$type_sequence} )
7693 && @{ $rchildren_of_seqno->{$type_sequence} } == 1
7697 # Go back and see if the corresponding two OPENING tokens are also
7698 # together. Note that we are using the OLD K indexing here:
7699 my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
7700 if ( defined($K_outer_opening) ) {
7701 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
7702 if ( defined($K_nxt) ) {
7703 my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
7705 # Is the next token after the outer opening the same as
7706 # our inner closing (i.e. same sequence number)?
7707 # If so, do not insert a semicolon here.
7708 return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
7713 # We will insert an empty semicolon here as a placeholder. Later, if
7714 # it becomes the last token on a line, we will bring it to life. The
7715 # advantage of doing this is that (1) we just have to check line
7716 # endings, and (2) the phantom semicolon has zero width and therefore
7717 # won't cause needless breaks of one-line blocks.
7719 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
7720 && $want_left_space{';'} == WS_NO )
7723 # convert the blank into a semicolon..
7724 # be careful: we are working on the new stack top
7725 # on a token which has been stored.
7726 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
7728 # Convert the existing blank to:
7729 # a phantom semicolon for one_line_block option = 0 or 1
7730 # a real semicolon for one_line_block option = 2
7731 my $tok = EMPTY_STRING;
7733 if ( $rOpts_one_line_block_semicolons == 2 ) {
7738 $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
7739 $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
7740 $rLL_new->[$Ktop]->[_TYPE_] = ';';
7742 $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++;
7744 # NOTE: we are changing the output stack without updating variables
7745 # $last_nonblank_code_type, etc. Future needs might require that
7746 # those variables be updated here. For now, it seems ok to skip
7749 # Then store a new blank
7750 $self->store_token($rcopy);
7754 # Patch for issue c078: keep line indexes in order. If the top
7755 # token is a space that we are keeping (due to '-wls=';') then
7756 # we have to check that old line indexes stay in order.
7758 # instances in which side comments have been deleted and converted
7759 # into blanks, we may have filtered down multiple blanks into just
7760 # one. In that case the top blank may have a higher line number
7761 # than the previous nonblank token. Although the line indexes of
7762 # blanks are not really significant, we need to keep them in order
7763 # in order to pass error checks.
7764 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
7765 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
7766 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
7767 if ( $new_top_ix < $old_top_ix ) {
7768 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
7772 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
7773 $self->store_token($rcopy);
7776 } ## end sub add_phantom_semicolon
7778 sub add_trailing_comma {
7780 # Implement the --add-trailing-commas flag to the line end before index $KK:
7782 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
7785 # $KK = index of closing token in old ($rLL) token list
7786 # which starts a new line and is not preceded by a comma
7787 # $Kfirst = index of first token on the current line of input tokens
7788 # $add_flags = user control flags
7790 # For example, we might want to add a comma here:
7795 # _rebate => $rebate <------ location of possible bare comma
7797 # ^-------------------closing token at index $KK on new line
7799 # Do not add a comma if it would follow a comment
7800 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
7801 return unless ( defined($Kp) );
7802 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7803 return if ( $type_p eq '#' );
7805 # see if the user wants a trailing comma here
7807 $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
7808 $trailing_comma_rule, 1 );
7810 # if so, add a comma
7812 my $Knew = $self->store_new_token( ',', ',', $Kp );
7817 } ## end sub add_trailing_comma
7819 sub delete_trailing_comma {
7821 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
7823 # Apply the --delete-trailing-commas flag to the comma before index $KK
7826 # $KK = index of a closing token in OLD ($rLL) token list
7827 # which is preceded by a comma on the same line.
7828 # $Kfirst = index of first token on the current line of input tokens
7829 # $delete_option = user control flag
7831 # Returns true if the comma was deleted
7833 # For example, we might want to delete this comma:
7834 # my @asset = ("FASMX", "FASGX", "FASIX",);
7835 # | |^--------token at index $KK
7836 # | ^------comma of interest
7837 # ^-------------token at $Kfirst
7839 # Verify that the previous token is a comma. Note that we are working in
7840 # the new token list $rLL_new.
7841 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
7842 return unless ( defined($Kp) );
7843 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
7845 # there must be a '#' between the ',' and closing token; give up.
7849 # Do not delete commas when formatting under stress to avoid instability.
7850 # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has
7851 # been found to work well for trailing commas.
7852 if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) {
7856 # See if the user wants this trailing comma
7858 $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
7859 $trailing_comma_rule, 0 );
7861 # Patch: the --noadd-whitespace flag can cause instability in complex
7862 # structures. In this case do not delete the comma. Fixes b1409.
7863 if ( !$match && !$rOpts_add_whitespace ) {
7864 my $Kn = $self->K_next_nonblank($KK);
7865 if ( defined($Kn) ) {
7866 my $type_n = $rLL->[$Kn]->[_TYPE_];
7867 if ( $type_n ne ';' && $type_n ne '#' ) { return }
7871 # If no match, delete it
7874 return $self->unstore_last_nonblank_token(',');
7878 } ## end sub delete_trailing_comma
7880 sub delete_weld_interfering_comma {
7882 my ( $self, $KK ) = @_;
7884 # Apply the flag '--delete-weld-interfering-commas' to the comma
7888 # $KK = index of a closing token in OLD ($rLL) token list
7889 # which is preceded by a comma on the same line.
7891 # Returns true if the comma was deleted
7893 # For example, we might want to delete this comma:
7895 # my $tmpl = { foo => {no_override => 1, default => 42}, };
7901 # index $KK is in the old $rLL array, but
7902 # indexes $Kp and $Kpp are in the new $rLL_new array.
7904 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7905 return unless ($type_sequence);
7907 # Find the previous token and verify that it is a comma.
7908 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
7909 return unless ( defined($Kp) );
7910 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
7912 # it is not a comma, so give up ( it is probably a '#' )
7916 # This must be the only comma in this list
7917 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
7919 unless ( defined($rtype_count)
7920 && $rtype_count->{','}
7921 && $rtype_count->{','} == 1 );
7923 # Back up to the previous closing token
7924 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
7925 return unless ( defined($Kpp) );
7926 my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
7927 my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
7929 # The containers must be nesting (i.e., sequence numbers must differ by 1 )
7930 if ( $seqno_pp && $is_closing_type{$type_pp} ) {
7931 if ( $seqno_pp == $type_sequence + 1 ) {
7933 # remove the ',' from the top of the new token list
7934 return $self->unstore_last_nonblank_token(',');
7939 } ## end sub delete_trailing_comma
7941 sub unstore_last_nonblank_token {
7943 my ( $self, $type ) = @_;
7945 # remove the most recent nonblank token from the new token list
7947 # $type = type to be removed (for safety check)
7949 # Returns true if success
7952 # This was written and is used for removing commas, but might
7953 # be useful for other tokens. If it is ever used for other tokens
7954 # then the issue of what to do about the other variables, such
7955 # as token counts and the '$last...' vars needs to be considered.
7957 # Safety check, shouldn't happen
7958 if ( @{$rLL_new} < 3 ) {
7959 DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n");
7963 my ( $rcomma, $rblank );
7965 # case 1: pop comma from top of stack
7966 if ( $rLL_new->[-1]->[_TYPE_] eq $type ) {
7967 $rcomma = pop @{$rLL_new};
7970 # case 2: pop blank and then comma from top of stack
7971 elsif ($rLL_new->[-1]->[_TYPE_] eq 'b'
7972 && $rLL_new->[-2]->[_TYPE_] eq $type )
7974 $rblank = pop @{$rLL_new};
7975 $rcomma = pop @{$rLL_new};
7978 # case 3: error, shouldn't happen unless bad call
7980 DEVEL_MODE && Fault("Could not find token type '$type' to remove\n");
7984 # A note on updating vars set by sub store_token for this comma: If we
7985 # reduce the comma count by 1 then we also have to change the variable
7986 # $last_nonblank_code_type to be $last_last_nonblank_code_type because
7987 # otherwise sub store_token is going to ALSO reduce the comma count.
7988 # Alternatively, we can leave the count alone and the
7989 # $last_nonblank_code_type alone. Then sub store_token will produce
7990 # the correct result. This is simpler and is done here.
7992 # Now add a blank space after the comma if appropriate.
7993 # Some unusual spacing controls might need another iteration to
7994 # reach a final state.
7995 if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
7996 if ( defined($rblank) ) {
7997 $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma
7998 push @{$rLL_new}, $rblank;
8004 sub match_trailing_comma_rule {
8006 my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_;
8008 # Decide if a trailing comma rule is matched.
8011 # $KK = index of closing token in old ($rLL) token list which follows
8012 # the location of a possible trailing comma. See diagram below.
8013 # $Kfirst = (old) index of first token on the current line of input tokens
8014 # $Kp = index of previous nonblank token in new ($rLL_new) array
8015 # $trailing_comma_rule = packed user control flags
8016 # $if_add = true if adding comma, false if deleteing comma
8022 # For example, we might be checking for addition of a comma here:
8027 # _rebate => $rebate <------ location of possible trailing comma
8029 # ^-------------------closing token at index $KK
8031 return unless ($trailing_comma_rule);
8032 my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule};
8034 # List of $trailing_comma_style values:
8035 # undef stable: do not change
8036 # '0' : no list should have a trailing comma
8037 # '1' or '*' : every list should have a trailing comma
8038 # 'm' a multi-line list should have a trailing commas
8039 # 'b' trailing commas should be 'bare' (comma followed by newline)
8040 # 'h' lists of key=>value pairs with a bare trailing comma
8041 # 'i' same as s=h but also include any list with no more than about one
8043 # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT].
8045 # Note: an interesting generalization would be to let an upper case
8046 # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might
8047 # be useful for undoing operations. It would be implemented as a wrapper
8048 # around this routine.
8050 #-----------------------------------------
8051 # No style defined : do not add or delete
8052 #-----------------------------------------
8053 if ( !defined($trailing_comma_style) ) { return !$if_add }
8055 #----------------------------------------
8056 # Set some flags describing this location
8057 #----------------------------------------
8058 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8059 return unless ($type_sequence);
8060 my $closing_token = $rLL->[$KK]->[_TOKEN_];
8061 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
8062 return unless ( defined($rtype_count) && $rtype_count->{','} );
8063 my $is_permanently_broken =
8064 $self->[_ris_permanently_broken_]->{$type_sequence};
8066 # Note that _ris_broken_container_ also stores the line diff
8067 # but it is not available at this early stage.
8068 my $K_opening = $self->[_K_opening_container_]->{$type_sequence};
8069 return if ( !defined($K_opening) );
8071 # multiline definition 1: opening and closing tokens on different lines
8072 my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_];
8073 my $iline_c = $rLL->[$KK]->[_LINE_INDEX_];
8074 my $line_diff_containers = $iline_c - $iline_o;
8075 my $has_multiline_containers = $line_diff_containers > 0;
8077 # multiline definition 2: first and last commas on different lines
8078 my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence};
8079 my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_];
8080 my $has_multiline_commas;
8081 my $line_diff_commas = 0;
8082 if ( !defined($iline_first) ) {
8084 # shouldn't happen if caller checked comma count
8085 my $type_kp = $rLL_new->[$Kp]->[_TYPE_];
8087 "at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n"
8091 $line_diff_commas = $iline_last - $iline_first;
8092 $has_multiline_commas = $line_diff_commas > 0;
8095 # To avoid instability in edge cases, when adding commas we uses the
8096 # multiline_commas definition, but when deleting we use multiline
8097 # containers. This fixes b1384, b1396, b1397, b1398, b1400.
8099 $if_add ? $has_multiline_commas : $has_multiline_containers;
8101 my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst;
8105 #----------------------------
8106 # 0 : does not match any list
8107 #----------------------------
8108 if ( $trailing_comma_style eq '0' ) {
8112 #------------------------------
8113 # '*' or '1' : matches any list
8114 #------------------------------
8115 elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) {
8119 #-----------------------------
8120 # 'm' matches a Multiline list
8121 #-----------------------------
8122 elsif ( $trailing_comma_style eq 'm' ) {
8123 $match = $is_multiline;
8126 #----------------------------------
8127 # 'b' matches a Bare trailing comma
8128 #----------------------------------
8129 elsif ( $trailing_comma_style eq 'b' ) {
8130 $match = $is_bare_multiline_comma;
8133 #--------------------------------------------------------------------------
8134 # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line.
8135 # 'i' matches a bare stable list with about 1 comma per line.
8136 #--------------------------------------------------------------------------
8137 elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) {
8139 # We can treat these together because they are similar.
8140 # The set of 'i' matches includes the set of 'h' matches.
8142 # the trailing comma must be bare for both 'h' and 'i'
8143 return if ( !$is_bare_multiline_comma );
8145 # there must be no more than one comma per line for both 'h' and 'i'
8146 my $new_comma_count = $rtype_count->{','};
8147 $new_comma_count += 1 if ($if_add);
8148 return if ( $new_comma_count > $line_diff_commas + 1 );
8150 # a list of key=>value pairs with at least 2 fat commas is a match
8151 # for both 'h' and 'i'
8152 my $fat_comma_count = $rtype_count->{'=>'};
8153 if ( $fat_comma_count && $fat_comma_count >= 2 ) {
8155 # comma count (including trailer) and fat comma count must differ by
8156 # by no more than 1. This allows for some small variations.
8157 my $comma_diff = $new_comma_count - $fat_comma_count;
8158 $match = ( $comma_diff >= -1 && $comma_diff <= 1 );
8161 # For 'i' only, a list that can be shown to be stable is a match
8162 if ( $trailing_comma_style eq 'i' ) {
8164 $is_permanently_broken
8165 || ( $rOpts_break_at_old_comma_breakpoints
8166 && !$rOpts_ignore_old_breakpoints )
8171 #-------------------------------------------------------------------------
8172 # Unrecognized parameter. This should have been caught in the input check.
8173 #-------------------------------------------------------------------------
8176 DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n");
8178 # do not add or delete
8182 # Now do any special paren check
8185 && $paren_flag ne '1'
8186 && $paren_flag ne '*'
8187 && $closing_token eq ')' )
8190 $self->match_paren_control_flag( $type_sequence, $paren_flag,
8194 # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas
8195 # for use by -vtc logic to avoid instability when -dtc and -atc are both
8198 if ( $if_add && $rOpts_delete_trailing_commas
8199 || !$if_add && $rOpts_add_trailing_commas )
8201 $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1;
8203 # The combination of -atc and -dtc and -cab=3 can be unstable
8204 # (b1394). So we deactivate -cab=3 in this case.
8205 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
8206 $self->[_roverride_cab3_]->{$type_sequence} = 1;
8213 sub store_new_token {
8215 my ( $self, $type, $token, $Kp ) = @_;
8217 # Create and insert a completely new token into the output stream
8220 # $type = the token type
8221 # $token = the token text
8222 # $Kp = index of the previous token in the new list, $rLL_new
8225 # $Knew = index in $rLL_new of the new token
8227 # This operation is a little tricky because we are creating a new token and
8228 # we have to take care to follow the requested whitespace rules.
8230 my $Ktop = @{$rLL_new} - 1;
8231 my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b';
8233 if ( $top_is_space && $want_left_space{$type} == WS_NO ) {
8235 #----------------------------------------------------
8236 # Method 1: Convert the top blank into the new token.
8237 #----------------------------------------------------
8239 # Be Careful: we are working on the top of the new stack, on a token
8240 # which has been stored.
8242 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
8245 $rLL_new->[$Knew]->[_TOKEN_] = $token;
8246 $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token);
8247 $rLL_new->[$Knew]->[_TYPE_] = $type;
8249 # NOTE: we are changing the output stack without updating variables
8250 # $last_nonblank_code_type, etc. Future needs might require that
8251 # those variables be updated here. For now, we just update the
8252 # type counts as necessary.
8254 if ( $is_counted_type{$type} ) {
8255 my $seqno = $seqno_stack{ $depth_next - 1 };
8257 $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++;
8261 # Then store a new blank
8262 $self->store_token($rcopy);
8266 #----------------------------------------
8267 # Method 2: Use the normal storage method
8268 #----------------------------------------
8270 # Patch for issue c078: keep line indexes in order. If the top
8271 # token is a space that we are keeping (due to '-wls=...) then
8272 # we have to check that old line indexes stay in order.
8274 # instances in which side comments have been deleted and converted
8275 # into blanks, we may have filtered down multiple blanks into just
8276 # one. In that case the top blank may have a higher line number
8277 # than the previous nonblank token. Although the line indexes of
8278 # blanks are not really significant, we need to keep them in order
8279 # in order to pass error checks.
8280 if ($top_is_space) {
8281 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
8282 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
8283 if ( $new_top_ix < $old_top_ix ) {
8284 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
8288 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token );
8289 $self->store_token($rcopy);
8290 $Knew = @{$rLL_new} - 1;
8293 } ## end sub store_new_token
8297 # Check that a quote looks okay, and report possible problems
8300 my ( $self, $KK, $Kfirst, $line_number ) = @_;
8301 my $token = $rLL->[$KK]->[_TOKEN_];
8302 if ( $token =~ /\t/ ) {
8303 $self->note_embedded_tab($line_number);
8306 # The remainder of this routine looks for something like
8307 # '$var = s/xxx/yyy/;'
8308 # in case it should have been '$var =~ s/xxx/yyy/;'
8310 # Start by looking for a token beginning with one of: s y m / tr
8312 unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
8313 || substr( $token, 0, 2 ) eq 'tr' );
8315 # ... and preceded by one of: = == !=
8316 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8317 return unless ( defined($Kp) );
8318 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
8319 return unless ( $is_unexpected_equals{$previous_nonblank_type} );
8320 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
8322 my $previous_nonblank_type_2 = 'b';
8323 my $previous_nonblank_token_2 = EMPTY_STRING;
8324 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
8325 if ( defined($Kpp) ) {
8326 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
8327 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
8330 my $next_nonblank_token = EMPTY_STRING;
8332 my $Kmax = @{$rLL} - 1;
8333 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
8334 if ( $Kn <= $Kmax ) {
8335 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
8338 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
8339 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
8343 # preceded by simple scalar
8344 $previous_nonblank_type_2 eq 'i'
8345 && $previous_nonblank_token_2 =~ /^\$/
8347 # followed by some kind of termination
8348 # (but give complaint if we can not see far enough ahead)
8349 && $next_nonblank_token =~ /^[; \)\}]$/
8351 # scalar is not declared
8352 ## =~ /^(my|our|local)$/
8353 && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
8356 my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
8357 my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
8359 "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
8363 } ## end sub check_Q
8365 } ## end closure respace_tokens
8367 sub copy_token_as_type {
8369 # This provides a quick way to create a new token by
8370 # slightly modifying an existing token.
8371 my ( $rold_token, $type, $token ) = @_;
8372 if ( !defined($token) ) {
8373 if ( $type eq 'b' ) {
8376 elsif ( $type eq 'q' ) {
8377 $token = EMPTY_STRING;
8379 elsif ( $type eq '->' ) {
8382 elsif ( $type eq ';' ) {
8385 elsif ( $type eq ',' ) {
8390 # Unexpected type ... this sub will work as long as both $token and
8391 # $type are defined, but we should catch any unexpected types during
8395 sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
8399 # Shouldn't get here
8404 my @rnew_token = @{$rold_token};
8405 $rnew_token[_TYPE_] = $type;
8406 $rnew_token[_TOKEN_] = $token;
8407 $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
8408 return \@rnew_token;
8409 } ## end sub copy_token_as_type
8412 my ( $self, $KK, $rLL ) = @_;
8414 # return the index K of the next nonblank, non-comment token
8415 return unless ( defined($KK) && $KK >= 0 );
8417 # use the standard array unless given otherwise
8418 $rLL = $self->[_rLL_] unless ( defined($rLL) );
8421 while ( $Knnb < $Num ) {
8422 if ( !defined( $rLL->[$Knnb] ) ) {
8424 # We seem to have encountered a gap in our array.
8425 # This shouldn't happen because sub write_line() pushed
8426 # items into the $rLL array.
8427 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
8430 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
8431 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
8438 } ## end sub K_next_code
8440 sub K_next_nonblank {
8441 my ( $self, $KK, $rLL ) = @_;
8443 # return the index K of the next nonblank token, or
8444 # return undef if none
8445 return unless ( defined($KK) && $KK >= 0 );
8447 # The third arg allows this routine to be used on any array. This is
8448 # useful in sub respace_tokens when we are copying tokens from an old $rLL
8449 # to a new $rLL array. But usually the third arg will not be given and we
8450 # will just use the $rLL array in $self.
8451 $rLL = $self->[_rLL_] unless ( defined($rLL) );
8454 return unless ( $Knnb < $Num );
8455 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
8456 return unless ( ++$Knnb < $Num );
8457 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
8459 # Backup loop. Very unlikely to get here; it means we have neighboring
8460 # blanks in the token stream.
8462 while ( $Knnb < $Num ) {
8464 # Safety check, this fault shouldn't happen: The $rLL array is the
8465 # main array of tokens, so all entries should be used. It is
8466 # initialized in sub write_line, and then re-initialized by sub
8467 # store_token() within sub respace_tokens. Tokens are pushed on
8468 # so there shouldn't be any gaps.
8469 if ( !defined( $rLL->[$Knnb] ) ) {
8470 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
8473 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
8477 } ## end sub K_next_nonblank
8479 sub K_previous_code {
8481 # return the index K of the previous nonblank, non-comment token
8482 # Call with $KK=undef to start search at the top of the array
8483 my ( $self, $KK, $rLL ) = @_;
8485 # use the standard array unless given otherwise
8486 $rLL = $self->[_rLL_] unless ( defined($rLL) );
8488 if ( !defined($KK) ) { $KK = $Num }
8489 elsif ( $KK > $Num ) {
8491 # This fault can be caused by a programming error in which a bad $KK is
8492 # given. The caller should make the first call with KK_new=undef to
8495 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
8500 while ( $Kpnb >= 0 ) {
8501 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
8502 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
8509 } ## end sub K_previous_code
8511 sub K_previous_nonblank {
8513 # return index of previous nonblank token before item K;
8514 # Call with $KK=undef to start search at the top of the array
8515 my ( $self, $KK, $rLL ) = @_;
8517 # use the standard array unless given otherwise
8518 $rLL = $self->[_rLL_] unless ( defined($rLL) );
8520 if ( !defined($KK) ) { $KK = $Num }
8521 elsif ( $KK > $Num ) {
8523 # This fault can be caused by a programming error in which a bad $KK is
8524 # given. The caller should make the first call with KK_new=undef to
8527 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
8532 return unless ( $Kpnb >= 0 );
8533 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
8534 return unless ( --$Kpnb >= 0 );
8535 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
8537 # Backup loop. We should not get here unless some routine
8538 # slipped repeated blanks into the token stream.
8539 return unless ( --$Kpnb >= 0 );
8540 while ( $Kpnb >= 0 ) {
8541 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
8545 } ## end sub K_previous_nonblank
8547 sub parent_seqno_by_K {
8549 # Return the sequence number of the parent container of token K, if any.
8551 my ( $self, $KK ) = @_;
8552 my $rLL = $self->[_rLL_];
8554 # The task is to jump forward to the next container token
8555 # and use the sequence number of either it or its parent.
8557 # For example, consider the following with seqno=5 of the '[' and ']'
8558 # being called with index K of the first token of each line:
8563 # sub { 99 }, 'do {&{%s} for 1,2}', # 5
8564 # '(&{})(&{})', undef, # 5
8565 # [ 2, 2, 0 ], 0 # 5
8568 # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
8569 # unbalanced files, last sequence number will either be undefined or it may
8570 # be at a deeper level. In either case we will just return SEQ_ROOT to
8571 # have a defined value and allow formatting to proceed.
8572 my $parent_seqno = SEQ_ROOT;
8573 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8574 if ($type_sequence) {
8575 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
8578 my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
8579 if ( defined($Kt) ) {
8580 $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
8581 my $type = $rLL->[$Kt]->[_TYPE_];
8583 # if next container token is closing, it is the parent seqno
8584 if ( $is_closing_type{$type} ) {
8585 $parent_seqno = $type_sequence;
8588 # otherwise we want its parent container
8590 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
8594 $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
8595 return $parent_seqno;
8596 } ## end sub parent_seqno_by_K
8598 sub is_in_block_by_i {
8599 my ( $self, $i ) = @_;
8602 # token at i is contained in a BLOCK
8603 # or is at root level
8604 # or there is some kind of error (i.e. unbalanced file)
8605 # returns false otherwise
8608 DEVEL_MODE && Fault("Bad call, i='$i'\n");
8612 my $seqno = $parent_seqno_to_go[$i];
8613 return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
8614 return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
8616 } ## end sub is_in_block_by_i
8618 sub is_in_list_by_i {
8619 my ( $self, $i ) = @_;
8621 # returns true if token at i is contained in a LIST
8622 # returns false otherwise
8623 my $seqno = $parent_seqno_to_go[$i];
8624 return unless ( $seqno && $seqno ne SEQ_ROOT );
8625 if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
8629 } ## end sub is_in_list_by_i
8633 # Return true if token K is in a list
8634 my ( $self, $KK ) = @_;
8636 my $parent_seqno = $self->parent_seqno_by_K($KK);
8637 return unless defined($parent_seqno);
8638 return $self->[_ris_list_by_seqno_]->{$parent_seqno};
8641 sub is_list_by_seqno {
8643 # Return true if the immediate contents of a container appears to be a
8645 my ( $self, $seqno ) = @_;
8646 return unless defined($seqno);
8647 return $self->[_ris_list_by_seqno_]->{$seqno};
8650 sub resync_lines_and_tokens {
8654 # Re-construct the arrays of tokens associated with the original input
8655 # lines since they have probably changed due to inserting and deleting
8656 # blanks and a few other tokens.
8658 # Return paremeters:
8659 # set severe_error = true if processing needs to terminate
8663 my $rLL = $self->[_rLL_];
8664 my $Klimit = $self->[_Klimit_];
8665 my $rlines = $self->[_rlines_];
8666 my @Krange_code_without_comments;
8667 my @Klast_valign_code;
8669 # This is the next token and its line index:
8671 my $Kmax = defined($Klimit) ? $Klimit : -1;
8673 # Verify that old line indexes are in still order. If this error occurs,
8674 # check locations where sub 'respace_tokens' creates new tokens (like
8675 # blank spaces). It must have set a bad old line index.
8676 if ( DEVEL_MODE && defined($Klimit) ) {
8677 my $iline = $rLL->[0]->[_LINE_INDEX_];
8678 foreach my $KK ( 1 .. $Klimit ) {
8679 my $iline_last = $iline;
8680 $iline = $rLL->[$KK]->[_LINE_INDEX_];
8681 if ( $iline < $iline_last ) {
8683 my $token_m = $rLL->[$KK_m]->[_TOKEN_];
8684 my $token = $rLL->[$KK]->[_TOKEN_];
8685 my $type_m = $rLL->[$KK_m]->[_TYPE_];
8686 my $type = $rLL->[$KK]->[_TYPE_];
8688 Line indexes out of order at index K=$KK:
8689 at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
8690 at KK =$KK: old line=$iline, type='$type', token='$token',
8697 foreach my $line_of_tokens ( @{$rlines} ) {
8699 my $line_type = $line_of_tokens->{_line_type};
8700 if ( $line_type eq 'CODE' ) {
8702 # Get the old number of tokens on this line
8703 my $rK_range_old = $line_of_tokens->{_rK_range};
8704 my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
8706 if ( defined($Kfirst_old) ) {
8707 $Kdiff_old = $Klast_old - $Kfirst_old;
8710 # Find the range of NEW K indexes for the line:
8711 # $Kfirst = index of first token on line
8712 # $Klast = index of last token on line
8713 my ( $Kfirst, $Klast );
8715 my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
8717 # Optimization: Although the actual K indexes may be completely
8718 # changed after respacing, the number of tokens on any given line
8719 # will often be nearly unchanged. So we will see if we can start
8720 # our search by guessing that the new line has the same number
8721 # of tokens as the old line.
8722 my $Knext_guess = $Knext + $Kdiff_old;
8723 if ( $Knext_guess > $Knext
8724 && $Knext_guess < $Kmax
8725 && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
8728 # the guess is good, so we can start our search here
8729 $Knext = $Knext_guess + 1;
8732 while ($Knext <= $Kmax
8733 && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
8738 if ( $Knext > $Knext_beg ) {
8740 $Klast = $Knext - 1;
8742 # Delete any terminal blank token
8743 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
8745 if ( $Klast < $Knext_beg ) {
8750 $Kfirst = $Knext_beg;
8752 # Save ranges of non-comment code. This will be used by
8753 # sub keep_old_line_breaks.
8754 if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
8755 push @Krange_code_without_comments, [ $Kfirst, $Klast ];
8758 # Only save ending K indexes of code types which are blank
8759 # or 'VER'. These will be used for a convergence check.
8760 # See related code in sub 'convey_batch_to_vertical_aligner'
8761 my $CODE_type = $line_of_tokens->{_code_type};
8763 || $CODE_type eq 'VER' )
8765 push @Klast_valign_code, $Klast;
8770 # It is only safe to trim the actual line text if the input
8771 # line had a terminal blank token. Otherwise, we may be
8773 if ( $line_of_tokens->{_ended_in_blank_token} ) {
8774 $line_of_tokens->{_line_text} =~ s/\s+$//;
8776 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
8778 # Deleting semicolons can create new empty code lines
8779 # which should be marked as blank
8780 if ( !defined($Kfirst) ) {
8781 my $CODE_type = $line_of_tokens->{_code_type};
8782 if ( !$CODE_type ) {
8783 $line_of_tokens->{_code_type} = 'BL';
8788 #---------------------------------------------------
8789 # save indexes of all lines with a 'q' at either end
8790 # for later use by sub find_multiline_qw
8791 #---------------------------------------------------
8792 if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
8793 || $rLL->[$Klast]->[_TYPE_] eq 'q' )
8795 push @{$rqw_lines}, $iline;
8801 # There shouldn't be any nodes beyond the last one. This routine is
8802 # relinking lines and tokens after the tokens have been respaced. A fault
8803 # here indicates some kind of bug has been introduced into the above loops.
8804 # There is not good way to keep going; we better stop here.
8805 if ( $Knext <= $Kmax ) {
8807 "unexpected tokens at end of file when reconstructing lines");
8809 return ( $severe_error, $rqw_lines );
8811 $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
8813 # Setup the convergence test in the FileWriter based on line-ending indexes
8814 my $file_writer_object = $self->[_file_writer_object_];
8815 $file_writer_object->setup_convergence_test( \@Klast_valign_code );
8817 # Mark essential old breakpoints if combination -iob -lp is used. These
8818 # two options do not work well together, but we can avoid turning -iob off
8819 # by ignoring -iob at certain essential line breaks.
8820 # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
8821 if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
8822 my %is_assignment_or_fat_comma = %is_assignment;
8823 $is_assignment_or_fat_comma{'=>'} = 1;
8824 my $ris_essential_old_breakpoint =
8825 $self->[_ris_essential_old_breakpoint_];
8826 my ( $Kfirst, $Klast );
8827 foreach my $line_of_tokens ( @{$rlines} ) {
8828 my $line_type = $line_of_tokens->{_line_type};
8829 if ( $line_type ne 'CODE' ) {
8830 ( $Kfirst, $Klast ) = ( undef, undef );
8833 my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
8834 ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
8836 next unless defined($Klast_prev);
8837 next unless defined($Kfirst);
8838 my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
8839 my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
8841 unless ( $is_assignment_or_fat_comma{$type_last}
8842 || $is_assignment_or_fat_comma{$type_first} );
8843 $ris_essential_old_breakpoint->{$Klast_prev} = 1;
8846 return ( $severe_error, $rqw_lines );
8847 } ## end sub resync_lines_and_tokens
8849 sub keep_old_line_breaks {
8851 # Called once per file to find and mark any old line breaks which
8852 # should be kept. We will be translating the input hashes into
8855 # A flag is set as follows:
8856 # = 1 make a hard break (flush the current batch)
8857 # best for something like leading commas (-kbb=',')
8858 # = 2 make a soft break (keep building current batch)
8859 # best for something like leading ->
8863 my $rLL = $self->[_rLL_];
8864 my $rKrange_code_without_comments =
8865 $self->[_rKrange_code_without_comments_];
8866 my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
8867 my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
8868 my $rwant_container_open = $self->[_rwant_container_open_];
8869 my $K_opening_container = $self->[_K_opening_container_];
8870 my $ris_broken_container = $self->[_ris_broken_container_];
8871 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
8873 # This code moved here from sub break_lists to fix b1120
8874 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
8875 foreach my $item ( @{$rKrange_code_without_comments} ) {
8876 my ( $Kfirst, $Klast ) = @{$item};
8877 my $type = $rLL->[$Kfirst]->[_TYPE_];
8878 my $token = $rLL->[$Kfirst]->[_TOKEN_];
8880 # leading '->' use a value of 2 which causes a soft
8881 # break rather than a hard break
8882 if ( $type eq '->' ) {
8883 $rbreak_before_Kfirst->{$Kfirst} = 2;
8886 # leading ')->' use a special flag to insure that both
8887 # opening and closing parens get opened
8888 # Fix for b1120: only for parens, not braces
8889 elsif ( $token eq ')' ) {
8890 my $Kn = $self->K_next_nonblank($Kfirst);
8892 unless ( defined($Kn)
8894 && $rLL->[$Kn]->[_TYPE_] eq '->' );
8895 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
8896 next unless ($seqno);
8898 # Note: in previous versions there was a fix here to avoid
8899 # instability between conflicting -bom and -pvt or -pvtc flags.
8900 # The fix skipped -bom for a small line difference. But this
8901 # was troublesome, and instead the fix has been moved to
8902 # sub set_vertical_tightness_flags where priority is given to
8903 # the -bom flag over -pvt and -pvtc flags. Both opening and
8904 # closing paren flags are involved because even though -bom only
8905 # requests breaking before the closing paren, automated logic
8906 # opens the opening paren when the closing paren opens.
8907 # Relevant cases are b977, b1215, b1270, b1303
8909 $rwant_container_open->{$seqno} = 1;
8914 return unless ( %keep_break_before_type || %keep_break_after_type );
8916 my $check_for_break = sub {
8917 my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
8918 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8920 # non-container tokens use the type as the key
8922 my $type = $rLL->[$KK]->[_TYPE_];
8923 if ( $rkeep_break_hash->{$type} ) {
8924 $rbreak_hash->{$KK} = 1;
8928 # container tokens use the token as the key
8930 my $token = $rLL->[$KK]->[_TOKEN_];
8931 my $flag = $rkeep_break_hash->{$token};
8934 my $match = $flag eq '1' || $flag eq '*';
8936 # check for special matching codes
8938 if ( $token eq '(' || $token eq ')' ) {
8940 $self->match_paren_control_flag( $seqno, $flag );
8942 elsif ( $token eq '{' || $token eq '}' ) {
8944 # These tentative codes 'b' and 'B' for brace types are
8945 # placeholders for possible future brace types. They
8946 # are not documented and may be changed.
8948 $self->[_rblock_type_of_seqno_]->{$seqno};
8949 if ( $flag eq 'b' ) { $match = $block_type }
8950 elsif ( $flag eq 'B' ) { $match = !$block_type }
8952 # unknown code - no match
8956 $rbreak_hash->{$KK} = 1 if ($match);
8961 foreach my $item ( @{$rKrange_code_without_comments} ) {
8962 my ( $Kfirst, $Klast ) = @{$item};
8964 $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
8967 $Klast, \%keep_break_after_type, $rbreak_after_Klast
8971 } ## end sub keep_old_line_breaks
8973 sub weld_containers {
8975 # Called once per file to do any welding operations requested by --weld*
8979 # This count is used to eliminate needless calls for weld checks elsewhere
8980 $total_weld_count = 0;
8982 return if ( $rOpts->{'indent-only'} );
8983 return unless ($rOpts_add_newlines);
8985 # Important: sub 'weld_cuddled_blocks' must be called before
8986 # sub 'weld_nested_containers'. This is because the cuddled option needs to
8987 # use the original _LEVEL_ values of containers, but the weld nested
8988 # containers changes _LEVEL_ of welded containers.
8990 # Here is a good test case to be sure that both cuddling and welding
8991 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
8995 # if ($BOLD_MATH) { (
8996 # $labels, $comment,
8997 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
8999 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
9003 $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
9005 if ( $rOpts->{'weld-nested-containers'} ) {
9007 $self->weld_nested_containers();
9009 $self->weld_nested_quotes();
9012 #-------------------------------------------------------------
9013 # All welding is done. Finish setting up weld data structures.
9014 #-------------------------------------------------------------
9016 my $rLL = $self->[_rLL_];
9017 my $rK_weld_left = $self->[_rK_weld_left_];
9018 my $rK_weld_right = $self->[_rK_weld_right_];
9019 my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
9022 my @keys = keys %{$rK_weld_right};
9023 $total_weld_count = @keys;
9025 # First pass to process binary welds.
9026 # This loop is processed in unsorted order for efficiency.
9027 foreach my $Kstart (@keys) {
9028 my $Kend = $rK_weld_right->{$Kstart};
9030 # An error here would be due to an incorrect initialization introduced
9031 # in one of the above weld routines, like sub weld_nested.
9032 if ( $Kend <= $Kstart ) {
9033 Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
9038 # Set weld values for all tokens this welded pair
9039 foreach ( $Kstart + 1 .. $Kend ) {
9040 $rK_weld_left->{$_} = $Kstart;
9042 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
9043 $rK_weld_right->{$Kx} = $Kend;
9044 $rweld_len_right_at_K->{$Kx} =
9045 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
9046 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
9049 # Remember the leftmost index of welds which continue to the right
9050 if ( defined( $rK_weld_right->{$Kend} )
9051 && !defined( $rK_weld_left->{$Kstart} ) )
9053 push @K_multi_weld, $Kstart;
9057 # Second pass to process chains of welds (these are rare).
9058 # This has to be processed in sorted order.
9059 if (@K_multi_weld) {
9061 foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
9063 # Skip any interior K which was originally missing a left link
9064 next if ( $Kstart <= $Kend );
9066 # Find the end of this chain
9067 $Kend = $rK_weld_right->{$Kstart};
9068 my $Knext = $rK_weld_right->{$Kend};
9069 while ( defined($Knext) ) {
9071 $Knext = $rK_weld_right->{$Kend};
9074 # Set weld values this chain
9075 foreach ( $Kstart + 1 .. $Kend ) {
9076 $rK_weld_left->{$_} = $Kstart;
9078 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
9079 $rK_weld_right->{$Kx} = $Kend;
9080 $rweld_len_right_at_K->{$Kx} =
9081 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
9082 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
9088 } ## end sub weld_containers
9090 sub cumulative_length_before_K {
9091 my ( $self, $KK ) = @_;
9092 my $rLL = $self->[_rLL_];
9093 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
9096 sub weld_cuddled_blocks {
9099 # Called once per file to handle cuddled formatting
9101 my $rK_weld_left = $self->[_rK_weld_left_];
9102 my $rK_weld_right = $self->[_rK_weld_right_];
9103 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9105 # This routine implements the -cb flag by finding the appropriate
9106 # closing and opening block braces and welding them together.
9107 return unless ( %{$rcuddled_block_types} );
9109 my $rLL = $self->[_rLL_];
9110 return unless ( defined($rLL) && @{$rLL} );
9112 my $rbreak_container = $self->[_rbreak_container_];
9113 my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
9114 my $K_opening_container = $self->[_K_opening_container_];
9115 my $K_closing_container = $self->[_K_closing_container_];
9117 my $is_broken_block = sub {
9119 # a block is broken if the input line numbers of the braces differ
9120 # we can only cuddle between broken blocks
9122 my $K_opening = $K_opening_container->{$seqno};
9123 return unless ( defined($K_opening) );
9124 my $K_closing = $K_closing_container->{$seqno};
9125 return unless ( defined($K_closing) );
9126 return $rbreak_container->{$seqno}
9127 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
9128 $rLL->[$K_opening]->[_LINE_INDEX_];
9131 # A stack to remember open chains at all levels: This is a hash rather than
9132 # an array for safety because negative levels can occur in files with
9133 # errors. This allows us to keep processing with negative levels.
9134 # $in_chain{$level} = [$chain_type, $type_sequence];
9136 my $CBO = $rOpts->{'cuddled-break-option'};
9138 # loop over structure items to find cuddled pairs
9140 my $KNEXT = $self->[_K_first_seq_item_];
9141 while ( defined($KNEXT) ) {
9143 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9144 my $rtoken_vars = $rLL->[$KK];
9145 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
9146 if ( !$type_sequence ) {
9147 next if ( $KK == 0 ); # first token in file may not be container
9149 # A fault here implies that an error was made in the little loop at
9150 # the bottom of sub 'respace_tokens' which set the values of
9151 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
9152 # loop control lines above.
9153 Fault("sequence = $type_sequence not defined at K=$KK")
9158 # NOTE: we must use the original levels here. They can get changed
9159 # by sub 'weld_nested_containers', so this routine must be called
9160 # before sub 'weld_nested_containers'.
9161 my $last_level = $level;
9162 $level = $rtoken_vars->[_LEVEL_];
9164 if ( $level < $last_level ) { $in_chain{$last_level} = undef }
9165 elsif ( $level > $last_level ) { $in_chain{$level} = undef }
9167 # We are only looking at code blocks
9168 my $token = $rtoken_vars->[_TOKEN_];
9169 my $type = $rtoken_vars->[_TYPE_];
9170 next unless ( $type eq $token );
9172 if ( $token eq '{' ) {
9174 my $block_type = $rblock_type_of_seqno->{$type_sequence};
9175 if ( !$block_type ) {
9177 # patch for unrecognized block types which may not be labeled
9178 my $Kp = $self->K_previous_nonblank($KK);
9179 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
9180 $Kp = $self->K_previous_nonblank($Kp);
9183 $block_type = $rLL->[$Kp]->[_TOKEN_];
9185 if ( $in_chain{$level} ) {
9187 # we are in a chain and are at an opening block brace.
9188 # See if we are welding this opening brace with the previous
9189 # block brace. Get their identification numbers:
9190 my $closing_seqno = $in_chain{$level}->[1];
9191 my $opening_seqno = $type_sequence;
9193 # The preceding block must be on multiple lines so that its
9194 # closing brace will start a new line.
9195 if ( !$is_broken_block->($closing_seqno) ) {
9196 next unless ( $CBO == 2 );
9197 $rbreak_container->{$closing_seqno} = 1;
9200 # We can weld the closing brace to its following word ..
9201 my $Ko = $K_closing_container->{$closing_seqno};
9203 if ( defined($Ko) ) {
9204 $Kon = $self->K_next_nonblank($Ko);
9207 # ..unless it is a comment
9208 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
9210 # OK to weld these two tokens...
9211 $rK_weld_right->{$Ko} = $Kon;
9212 $rK_weld_left->{$Kon} = $Ko;
9214 # Set flag that we want to break the next container
9215 # so that the cuddled line is balanced.
9216 $rbreak_container->{$opening_seqno} = 1
9219 # Remember which braces are cuddled.
9220 # The closing brace is used to set adjusted indentations.
9221 # The opening brace is not yet used but might eventually
9222 # be needed in setting adjusted indentation.
9223 $ris_cuddled_closing_brace->{$closing_seqno} = 1;
9230 # We are not in a chain. Start a new chain if we see the
9231 # starting block type.
9232 if ( $rcuddled_block_types->{$block_type} ) {
9233 $in_chain{$level} = [ $block_type, $type_sequence ];
9237 $in_chain{$level} = [ $block_type, $type_sequence ];
9241 elsif ( $token eq '}' ) {
9242 if ( $in_chain{$level} ) {
9244 # We are in a chain at a closing brace. See if this chain
9246 my $Knn = $self->K_next_code($KK);
9249 my $chain_type = $in_chain{$level}->[0];
9250 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
9252 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
9256 # Note that we do not weld yet because we must wait until
9257 # we we are sure that an opening brace for this follows.
9258 $in_chain{$level}->[1] = $type_sequence;
9260 else { $in_chain{$level} = undef }
9265 } ## end sub weld_cuddled_blocks
9267 sub find_nested_pairs {
9270 # This routine is called once per file to do preliminary work needed for
9271 # the --weld-nested option. This information is also needed for adding
9274 my $rLL = $self->[_rLL_];
9275 return unless ( defined($rLL) && @{$rLL} );
9278 my $K_opening_container = $self->[_K_opening_container_];
9279 my $K_closing_container = $self->[_K_closing_container_];
9280 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9282 # We define an array of pairs of nested containers
9285 # Names of calling routines can either be marked as 'i' or 'w',
9286 # and they may invoke a sub call with an '->'. We will consider
9287 # any consecutive string of such types as a single unit when making
9288 # weld decisions. We also allow a leading !
9289 my $is_name_type = {
9297 # Loop over all closing container tokens
9298 foreach my $inner_seqno ( keys %{$K_closing_container} ) {
9299 my $K_inner_closing = $K_closing_container->{$inner_seqno};
9301 # See if it is immediately followed by another, outer closing token
9302 my $K_outer_closing = $K_inner_closing + 1;
9303 $K_outer_closing += 1
9304 if ( $K_outer_closing < $Num
9305 && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
9307 next unless ( $K_outer_closing < $Num );
9308 my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
9309 next unless ($outer_seqno);
9310 my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
9311 next unless ( $is_closing_token{$token_outer_closing} );
9313 # Simple filter: No commas or semicolons in the outer container
9314 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
9316 next if ( $rtype_count->{','} || $rtype_count->{';'} );
9319 # Now we have to check the opening tokens.
9320 my $K_outer_opening = $K_opening_container->{$outer_seqno};
9321 my $K_inner_opening = $K_opening_container->{$inner_seqno};
9322 next unless defined($K_outer_opening) && defined($K_inner_opening);
9324 my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
9325 my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
9327 # Verify that the inner opening token is the next container after the
9328 # outer opening token.
9329 my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
9330 next unless defined($K_io_check);
9331 if ( $K_io_check != $K_inner_opening ) {
9333 # The inner opening container does not immediately follow the outer
9334 # opening container, but we may still allow a weld if they are
9335 # separated by a sub signature. For example, we may have something
9336 # like this, where $K_io_check may be at the first 'x' instead of
9337 # 'io'. So we need to hop over the signature and see if we arrive
9342 # $obj->then( sub ( $code ) {
9344 # return $c->render(text => '', status => $code);
9349 next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
9350 next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
9351 my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
9352 next unless defined($seqno_signature);
9353 my $K_signature_closing = $K_closing_container->{$seqno_signature};
9354 next unless defined($K_signature_closing);
9355 my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
9357 unless ( defined($K_test) && $K_test == $K_inner_opening );
9359 # OK, we have arrived at 'io' in the above diagram. We should put
9360 # a limit on the length or complexity of the signature here. There
9361 # is no perfect way to do this, one way is to put a limit on token
9362 # count. For consistency with older versions, we should allow a
9363 # signature with a single variable to weld, but not with
9364 # multiple variables. A single variable as in 'sub ($code) {' can
9365 # have a $Kdiff of 2 to 4, depending on spacing.
9367 # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
9368 # 7, depending on spacing. So to keep formatting consistent with
9369 # previous versions, we will also avoid welding if there is a comma
9372 my $Kdiff = $K_signature_closing - $K_io_check;
9373 next if ( $Kdiff > 4 );
9375 # backup comma count test; but we cannot get here with Kdiff<=4
9376 my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
9377 next if ( $rtc && $rtc->{','} );
9380 # Yes .. this is a possible nesting pair.
9381 # They can be separated by a small amount.
9382 my $K_diff = $K_inner_opening - $K_outer_opening;
9384 # Count the number of nonblank characters separating them.
9385 # Note: the $nonblank_count includes the inner opening container
9386 # but not the outer opening container, so it will be >= 1.
9387 if ( $K_diff < 0 ) { next } # Shouldn't happen
9388 my $nonblank_count = 0;
9392 # Here is an example of a long identifier chain which counts as a
9393 # single nonblank here (this spans about 10 K indexes):
9394 # if ( !Boucherot::SetOfConnections->new->handler->execute(
9397 my $Kn_first = $K_outer_opening;
9398 my $Kn_last_nonblank;
9401 foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
9402 next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
9403 if ( !$nonblank_count ) { $Kn_first = $Kn }
9404 if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
9405 $Kn_last_nonblank = $Kn;
9407 # skip chain of identifier tokens
9408 my $last_type = $type;
9409 my $last_is_name = $is_name;
9410 $type = $rLL->[$Kn]->[_TYPE_];
9411 if ( $type eq '#' ) { $saw_comment = 1; last }
9412 $is_name = $is_name_type->{$type};
9413 next if ( $is_name && $last_is_name );
9415 # do not count a possible leading - of bareword hash key
9416 next if ( $type eq 'm' && !$last_type );
9419 last if ( $nonblank_count > 2 );
9422 # Do not weld across a comment .. fix for c058.
9423 next if ($saw_comment);
9425 # Patch for b1104: do not weld to a paren preceded by sort/map/grep
9426 # because the special line break rules may cause a blinking state
9427 if ( defined($Kn_last_nonblank)
9428 && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
9429 && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
9431 my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
9433 # Turn off welding at sort/map/grep (
9434 if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
9437 my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
9441 # 1: adjacent opening containers, like: do {{
9442 $nonblank_count == 1
9444 # 2. anonymous sub + prototype or sig: )->then( sub ($code) {
9445 # ... but it seems best not to stack two structural blocks, like
9447 # sub make_anon_with_my_sub { sub {
9448 # because it probably hides the structure a little too much.
9449 || ( $inner_blocktype
9450 && $inner_blocktype eq 'sub'
9451 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
9452 && !$outer_blocktype )
9454 # 3. short item following opening paren, like: fun( yyy (
9455 || $nonblank_count == 2 && $token_oo eq '('
9457 # 4. weld around fat commas, if requested (git #108), such as
9458 # elf->call_method( method_name_foo => {
9460 && $nonblank_count <= 3
9461 && %weld_fat_comma_rules
9462 && $weld_fat_comma_rules{$token_oo} )
9466 [ $inner_seqno, $outer_seqno, $K_inner_closing ];
9471 # The weld routine expects the pairs in order in the form
9472 # [$seqno_inner, $seqno_outer]
9473 # And they must be in the same order as the inner closing tokens
9474 # (otherwise, welds of three or more adjacent tokens will not work). The K
9475 # value of this inner closing token has temporarily been stored for
9479 # Drop the K index after sorting (it would cause trouble downstream)
9480 map { [ $_->[0], $_->[1] ] }
9482 # Sort on the K values
9483 sort { $a->[2] <=> $b->[2] } @nested_pairs;
9485 return \@nested_pairs;
9486 } ## end sub find_nested_pairs
9488 sub match_paren_control_flag {
9490 # Decide if this paren is excluded by user request:
9491 # undef matches no parens
9492 # '*' matches all parens
9493 # 'k' matches only if the previous nonblank token is a perl builtin
9494 # keyword (such as 'if', 'while'),
9495 # 'K' matches if 'k' does not, meaning if the previous token is not a
9497 # 'f' matches if the previous token is a function other than a keyword.
9498 # 'F' matches if 'f' does not.
9499 # 'w' matches if either 'k' or 'f' match.
9500 # 'W' matches if 'w' does not.
9501 my ( $self, $seqno, $flag, $rLL ) = @_;
9504 # $seqno = sequence number of the container (should be paren)
9505 # $flag = the flag which defines what matches
9506 # $rLL = an optional alternate token list needed for respace operations
9507 $rLL = $self->[_rLL_] unless ( defined($rLL) );
9509 return 0 unless ( defined($flag) );
9510 return 0 if $flag eq '0';
9511 return 1 if $flag eq '1';
9512 return 1 if $flag eq '*';
9513 return 0 unless ($seqno);
9514 my $K_opening = $self->[_K_opening_container_]->{$seqno};
9515 return unless ( defined($K_opening) );
9517 my ( $is_f, $is_k, $is_w );
9518 my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
9519 if ( defined($Kp) ) {
9520 my $type_p = $rLL->[$Kp]->[_TYPE_];
9523 $is_k = $type_p eq 'k';
9526 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
9528 # either keyword or function call?
9529 $is_w = $is_k || $is_f;
9532 if ( $flag eq 'k' ) { $match = $is_k }
9533 elsif ( $flag eq 'K' ) { $match = !$is_k }
9534 elsif ( $flag eq 'f' ) { $match = $is_f }
9535 elsif ( $flag eq 'F' ) { $match = !$is_f }
9536 elsif ( $flag eq 'w' ) { $match = $is_w }
9537 elsif ( $flag eq 'W' ) { $match = !$is_w }
9539 } ## end sub match_paren_control_flag
9541 sub is_excluded_weld {
9543 # decide if this weld is excluded by user request
9544 my ( $self, $KK, $is_leading ) = @_;
9545 my $rLL = $self->[_rLL_];
9546 my $rtoken_vars = $rLL->[$KK];
9547 my $token = $rtoken_vars->[_TOKEN_];
9548 my $rflags = $weld_nested_exclusion_rules{$token};
9549 return 0 unless ( defined($rflags) );
9550 my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
9551 return 0 unless ( defined($flag) );
9552 return 1 if $flag eq '*';
9553 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9554 return $self->match_paren_control_flag( $seqno, $flag );
9555 } ## end sub is_excluded_weld
9557 # hashes to simplify welding logic
9558 my %type_ok_after_bareword;
9559 my %has_tight_paren;
9563 # types needed for welding RULE 6
9564 my @q = qw# => -> { ( [ #;
9565 @type_ok_after_bareword{@q} = (1) x scalar(@q);
9567 # these types do not 'like' to be separated from a following paren
9568 @q = qw(w i q Q G C Z U);
9569 @{has_tight_paren}{@q} = (1) x scalar(@q);
9572 use constant DEBUG_WELD => 0;
9574 sub setup_new_weld_measurements {
9576 # Define quantities to check for excess line lengths when welded.
9577 # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
9579 my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
9581 # Given indexes of outer and inner opening containers to be welded:
9582 # $Kouter_opening, $Kinner_opening
9584 # Returns these variables:
9585 # $new_weld_ok = true (new weld ok) or false (do not start new weld)
9586 # $starting_indent = starting indentation
9587 # $starting_lentot = starting cumulative length
9588 # $msg = diagnostic message for debugging
9590 my $rLL = $self->[_rLL_];
9591 my $rlines = $self->[_rlines_];
9595 my $starting_lentot;
9596 my $maximum_text_length;
9597 my $msg = EMPTY_STRING;
9599 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
9600 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
9601 my ( $Kfirst, $Klast ) = @{$rK_range};
9603 #-------------------------------------------------------------------------
9604 # We now define a reference index, '$Kref', from which to start measuring
9605 # This choice turns out to be critical for keeping welds stable during
9606 # iterations, so we go through a number of STEPS...
9607 #-------------------------------------------------------------------------
9609 # STEP 1: Our starting guess is to use measure from the first token of the
9610 # current line. This is usually a good guess.
9613 # STEP 2: See if we should go back a little farther
9614 my $Kprev = $self->K_previous_nonblank($Kfirst);
9615 if ( defined($Kprev) ) {
9617 # Avoid measuring from between an opening paren and a previous token
9618 # which should stay close to it ... fixes b1185
9619 my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
9620 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
9621 if ( $Kouter_opening == $Kfirst
9623 && $has_tight_paren{$type_prev} )
9628 # Back up and count length from a token like '=' or '=>' if -lp
9629 # is used (this fixes b520)
9630 # ...or if a break is wanted before there
9631 elsif ($rOpts_line_up_parentheses
9632 || $want_break_before{$type_prev} )
9635 # If there are other sequence items between the start of this line
9636 # and the opening token in question, then do not include tokens on
9637 # the previous line in length calculations. This check added to
9638 # fix case b1174 which had a '?' on the line
9639 my $no_previous_seq_item = $Kref == $Kouter_opening
9640 || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
9642 if ( $no_previous_seq_item
9643 && substr( $type_prev, 0, 1 ) eq '=' )
9647 # Fix for b1144 and b1112: backup to the first nonblank
9648 # character before the =>, or to the start of its line.
9649 if ( $type_prev eq '=>' ) {
9650 my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
9651 my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
9652 my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
9653 foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
9654 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
9663 # STEP 3: Now look ahead for a ternary and, if found, use it.
9664 # This fixes case b1182.
9665 # Also look for a ')' at the same level and, if found, use it.
9666 # This fixes case b1224.
9667 if ( $Kref < $Kouter_opening ) {
9668 my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
9669 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
9670 while ( $Knext < $Kouter_opening ) {
9671 if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
9672 if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
9673 || $rLL->[$Knext]->[_TOKEN_] eq ')' )
9679 $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
9683 # Define the starting measurements we will need
9685 $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
9686 $starting_level = $rLL->[$Kref]->[_LEVEL_];
9687 $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
9689 $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
9690 $starting_ci * $rOpts_continuation_indentation;
9692 # STEP 4: Switch to using the outer opening token as the reference
9693 # point if a line break before it would make a longer line.
9694 # Fixes case b1055 and is also an alternate fix for b1065.
9695 my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
9696 if ( $Kref < $Kouter_opening ) {
9697 my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
9698 my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
9699 my $maximum_text_length_oo =
9700 $maximum_text_length_at_level[$starting_level_oo] -
9701 $starting_ci_oo * $rOpts_continuation_indentation;
9703 # The excess length to any cumulative length K = lenK is either
9704 # $excess = $lenk - ($lentot + $maximum_text_length), or
9705 # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
9706 # so the worst case (maximum excess) corresponds to the configuration
9707 # with minimum value of the sum: $lentot + $maximum_text_length
9708 if ( $lentot_oo + $maximum_text_length_oo <
9709 $starting_lentot + $maximum_text_length )
9711 $Kref = $Kouter_opening;
9712 $starting_level = $starting_level_oo;
9713 $starting_ci = $starting_ci_oo;
9714 $starting_lentot = $lentot_oo;
9715 $maximum_text_length = $maximum_text_length_oo;
9719 my $new_weld_ok = 1;
9721 # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
9722 # combination -wn -lp -dws -naws does not work well and can cause blinkers.
9723 # It will probably only occur in stress testing. For this situation we
9724 # will only start a new weld if we start at a 'good' location.
9725 # - Added 'if' to fix case b1032.
9726 # - Require blank before certain previous characters to fix b1111.
9727 # - Add ';' to fix case b1139
9728 # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
9729 # - relaxed constraints for b1227
9730 # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353
9732 && $rOpts_line_up_parentheses
9733 && $rOpts_delete_old_whitespace
9734 && !$rOpts_add_whitespace
9735 && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q'
9736 && defined($Kprev) )
9738 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
9739 my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
9740 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
9742 if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
9744 $type_prev =~ /^[\,\.\;]/
9745 || $type_prev =~ /^[=\{\[\(\L]/
9746 && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
9747 || $type_first =~ /^[=\,\.\;\{\[\(\L]/
9748 || $type_first eq '||'
9751 && ( $token_first eq 'if'
9752 || $token_first eq 'or' )
9757 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
9761 return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
9762 } ## end sub setup_new_weld_measurements
9764 sub excess_line_length_for_Krange {
9765 my ( $self, $Kfirst, $Klast ) = @_;
9767 # returns $excess_length =
9768 # by how many characters a line composed of tokens $Kfirst .. $Klast will
9769 # exceed the allowed line length
9771 my $rLL = $self->[_rLL_];
9772 my $length_before_Kfirst =
9775 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
9777 # backup before a side comment if necessary
9779 if ( $rOpts_ignore_side_comment_lengths
9780 && $rLL->[$Klast]->[_TYPE_] eq '#' )
9782 my $Kprev = $self->K_previous_nonblank($Klast);
9783 if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
9786 # get the length of the text
9787 my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
9789 # get the size of the text window
9790 my $level = $rLL->[$Kfirst]->[_LEVEL_];
9791 my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
9792 my $max_text_length = $maximum_text_length_at_level[$level] -
9793 $ci_level * $rOpts_continuation_indentation;
9795 my $excess_length = $length - $max_text_length;
9799 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
9800 return ($excess_length);
9801 } ## end sub excess_line_length_for_Krange
9803 sub weld_nested_containers {
9806 # Called once per file for option '--weld-nested-containers'
9808 my $rK_weld_left = $self->[_rK_weld_left_];
9809 my $rK_weld_right = $self->[_rK_weld_right_];
9811 # This routine implements the -wn flag by "welding together"
9812 # the nested closing and opening tokens which were previously
9813 # identified by sub 'find_nested_pairs'. "welding" simply
9814 # involves setting certain hash values which will be checked
9815 # later during formatting.
9817 my $rLL = $self->[_rLL_];
9818 my $rlines = $self->[_rlines_];
9819 my $K_opening_container = $self->[_K_opening_container_];
9820 my $K_closing_container = $self->[_K_closing_container_];
9821 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9822 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
9823 my $ris_asub_block = $self->[_ris_asub_block_];
9824 my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_];
9826 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9828 # Find nested pairs of container tokens for any welding.
9829 my $rnested_pairs = $self->find_nested_pairs();
9831 # Return unless there are nested pairs to weld
9832 return unless defined($rnested_pairs) && @{$rnested_pairs};
9834 # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
9835 # pairs. But it isn't clear if this is possible because we don't know
9836 # which sequences might actually start a weld.
9838 my $rOpts_break_at_old_method_breakpoints =
9839 $rOpts->{'break-at-old-method-breakpoints'};
9841 # This array will hold the sequence numbers of the tokens to be welded.
9844 # Variables needed for estimating line lengths
9845 my $maximum_text_length; # maximum spaces available for text
9846 my $starting_lentot; # cumulative text to start of current line
9848 my $iline_outer_opening = -1;
9849 my $weld_count_this_start = 0;
9851 # OLD: $single_line_tol added to fix cases b1180 b1181
9852 # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
9853 # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
9854 my $single_line_tol = 0;
9856 my $multiline_tol = $single_line_tol + 1 +
9857 max( $rOpts_indent_columns, $rOpts_continuation_indentation );
9859 # Define a welding cutoff level: do not start a weld if the inside
9860 # container level equals or exceeds this level.
9862 # We use the minimum of two criteria, either of which may be more
9863 # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
9864 # the 'beta' value is more restrictive in other cases (b1243).
9865 # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously:
9866 # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2);
9867 # This is now '$high_stress_level'.
9869 # The vertical tightness flags can throw off line length calculations.
9870 # This patch was added to fix instability issue b1284.
9871 # It works to always use a tol of 1 for 1 line block length tests, but
9872 # this restricted value keeps test case wn6.wn working as before.
9873 # It may be necessary to include '[' and '{' here in the future.
9874 my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
9877 # _oo=outer opening, i.e. first of { {
9878 # _io=inner opening, i.e. second of { {
9879 # _oc=outer closing, i.e. second of } {
9880 # _ic=inner closing, i.e. first of } }
9884 # Main loop over nested pairs...
9885 # We are working from outermost to innermost pairs so that
9886 # level changes will be complete when we arrive at the inner pairs.
9887 while ( my $item = pop( @{$rnested_pairs} ) ) {
9888 my ( $inner_seqno, $outer_seqno ) = @{$item};
9890 my $Kouter_opening = $K_opening_container->{$outer_seqno};
9891 my $Kinner_opening = $K_opening_container->{$inner_seqno};
9892 my $Kouter_closing = $K_closing_container->{$outer_seqno};
9893 my $Kinner_closing = $K_closing_container->{$inner_seqno};
9895 # RULE: do not weld if inner container has <= 3 tokens unless the next
9896 # token is a heredoc (so we know there will be multiple lines)
9897 if ( $Kinner_closing - $Kinner_opening <= 4 ) {
9898 my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
9899 next unless defined($Knext_nonblank);
9900 my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
9901 next unless ( $type eq 'h' );
9904 my $outer_opening = $rLL->[$Kouter_opening];
9905 my $inner_opening = $rLL->[$Kinner_opening];
9906 my $outer_closing = $rLL->[$Kouter_closing];
9907 my $inner_closing = $rLL->[$Kinner_closing];
9909 # RULE: do not weld to a hash brace. The reason is that it has a very
9910 # strong bond strength to the next token, so a line break after it
9911 # may not work. Previously we allowed welding to something like @{
9912 # but that caused blinking states (cases b751, b779).
9913 if ( $inner_opening->[_TYPE_] eq 'L' ) {
9917 # RULE: do not weld to a square bracket which does not contain commas
9918 if ( $inner_opening->[_TYPE_] eq '[' ) {
9919 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
9920 next unless ( $rtype_count && $rtype_count->{','} );
9922 # Do not weld if there is text before a '[' such as here:
9923 # curr_opt ( @beg [2,5] )
9924 # It will not break into the desired sandwich structure.
9925 # This fixes case b109, 110.
9926 my $Kdiff = $Kinner_opening - $Kouter_opening;
9927 next if ( $Kdiff > 2 );
9930 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
9934 # RULE: Avoid welding under stress. The idea is that we need to have a
9935 # little space* within a welded container to avoid instability. Note
9936 # that after each weld the level values are reduced, so long multiple
9937 # welds can still be made. This rule will seldom be a limiting factor
9938 # in actual working code. Fixes b1206, b1243.
9939 my $inner_level = $inner_opening->[_LEVEL_];
9940 if ( $inner_level >= $high_stress_level ) { next }
9942 # Set flag saying if this pair starts a new weld
9943 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
9945 # Set flag saying if this pair is adjacent to the previous nesting pair
9946 # (even if previous pair was rejected as a weld)
9947 my $touch_previous_pair =
9948 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
9949 $previous_pair = $item;
9951 my $do_not_weld_rule = 0;
9952 my $Msg = EMPTY_STRING;
9953 my $is_one_line_weld;
9955 my $iline_oo = $outer_opening->[_LINE_INDEX_];
9956 my $iline_io = $inner_opening->[_LINE_INDEX_];
9957 my $iline_ic = $inner_closing->[_LINE_INDEX_];
9958 my $iline_oc = $outer_closing->[_LINE_INDEX_];
9959 my $token_oo = $outer_opening->[_TOKEN_];
9960 my $token_io = $inner_opening->[_TOKEN_];
9962 # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
9963 # Added for case b973. Moved here from below to fix b1423.
9964 if ( !$do_not_weld_rule
9965 && $rOpts_break_at_old_method_breakpoints
9966 && $iline_io > $iline_oo )
9969 foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
9970 my $rK_range = $rlines->[$iline]->{_rK_range};
9971 next unless defined($rK_range);
9972 my ( $Kfirst, $Klast ) = @{$rK_range};
9973 next unless defined($Kfirst);
9974 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
9975 $do_not_weld_rule = 7;
9980 next if ($do_not_weld_rule);
9982 # Turn off vertical tightness at possible one-line welds. Fixes b1402,
9983 # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339,
9984 # b1340, b1341, b1342, b1343, which previously used a separate fix.
9985 # Issue c161 is the latest and simplest check, using
9986 # $iline_ic==$iline_io as the test.
9987 if ( %opening_vertical_tightness
9988 && $iline_ic == $iline_io
9989 && $opening_vertical_tightness{$token_oo} )
9991 $rmax_vertical_tightness->{$outer_seqno} = 0;
9994 my $is_multiline_weld =
9995 $iline_oo == $iline_io
9996 && $iline_ic == $iline_oc
9997 && $iline_io != $iline_ic;
10000 my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
10001 my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
10003 Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
10004 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
10005 tokens '$token_oo' .. '$token_io'
10009 # DO-NOT-WELD RULE 0:
10010 # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
10011 # by one line). This can produce instabilities (fixes b1250 b1251
10013 if ( !$is_multiline_weld
10014 && $iline_ic == $iline_io + 1
10015 && $token_oo eq '('
10016 && $token_io eq '(' )
10019 $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
10025 # If this pair is not adjacent to the previous pair (skipped or not),
10026 # then measure lengths from the start of line of oo.
10028 !$touch_previous_pair
10030 # Also do this if restarting at a new line; fixes case b965, s001
10031 || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
10035 # Remember the line we are using as a reference
10036 $iline_outer_opening = $iline_oo;
10037 $weld_count_this_start = 0;
10039 ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
10040 = $self->setup_new_weld_measurements( $Kouter_opening,
10045 && ( $iline_oo != $iline_io
10046 || $iline_ic != $iline_oc )
10049 if (DEBUG_WELD) { print $msg}
10053 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
10054 my ( $Kfirst, $Klast ) = @{$rK_range};
10056 # An existing one-line weld is a line in which
10057 # (1) the containers are all on one line, and
10058 # (2) the line does not exceed the allowable length
10059 if ( $iline_oo == $iline_oc ) {
10061 # All the tokens are on one line, now check their length.
10062 # Start with the full line index range. We will reduce this
10063 # in the coding below in some cases.
10064 my $Kstart = $Kfirst;
10065 my $Kstop = $Klast;
10067 # Note that the following minimal choice for measuring will
10068 # work and will not cause any instabilities because it is
10071 ## my $Kstart = $Kouter_opening;
10072 ## my $Kstop = $Kouter_closing;
10074 # But that can lead to some undesirable welds. So a little
10075 # more complicated method has been developed.
10077 # We are trying to avoid creating bad two-line welds when we are
10078 # working on long, previously un-welded input text, such as
10080 # INPUT (example of a long input line weld candidate):
10081 ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
10083 # GOOD two-line break: (not welded; result marked too long):
10084 ## $mutation->transpos(
10085 ## $self->RNA->position($mutation->label, $atg_label));
10087 # BAD two-line break: (welded; result if we weld):
10088 ## $mutation->transpos($self->RNA->position(
10089 ## $mutation->label, $atg_label));
10091 # We can only get an approximate estimate of the final length,
10092 # since the line breaks may change, and for -lp mode because
10093 # even the indentation is not yet known.
10095 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
10096 my $level_last = $rLL->[$Klast]->[_LEVEL_];
10097 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
10098 my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
10100 # - measure to the end of the original line if balanced
10101 # - measure to the closing container if unbalanced (fixes b1230)
10102 #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
10103 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
10105 # - measure from the start of the original line if balanced
10106 # - measure from the most previous token with same level
10107 # if unbalanced (b1232)
10108 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
10109 $Kstart = $Kouter_opening;
10112 my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
10114 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
10115 last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
10121 $self->excess_line_length_for_Krange( $Kstart, $Kstop );
10123 # Coding simplified here for case b1219.
10124 # Increased tol from 0 to 1 when pvt>0 to fix b1284.
10125 $is_one_line_weld = $excess <= $one_line_tol;
10128 # DO-NOT-WELD RULE 1:
10129 # Do not weld something that looks like the start of a two-line
10130 # function call, like this: <<snippets/wn6.in>>
10131 # $trans->add_transformation(
10132 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
10133 # We will look for a semicolon after the closing paren.
10135 # We want to weld something complex, like this though
10136 # my $compass = uc( opposite_direction( line_to_canvas_direction(
10137 # @{ $coords[0] }, @{ $coords[1] } ) ) );
10138 # Otherwise we will get a 'blinker'. For example, the following
10139 # would become a blinker without this rule:
10140 # $Self->_Add( $SortOrderDisplay{ $Field
10141 # ->GenerateFieldForSelectSQL() } );
10142 # But it is okay to weld a two-line statement if it looks like
10143 # it was already welded, meaning that the two opening containers are
10144 # on a different line that the two closing containers. This is
10145 # necessary to prevent blinking of something like this with
10146 # perltidy -wn -pbp (starting indentation two levels deep):
10148 # $top_label->set_text( gettext(
10149 # "Unable to create personal directory - check permissions.") );
10150 if ( $iline_oc == $iline_oo + 1
10151 && $iline_io == $iline_ic
10152 && $token_oo eq '(' )
10155 # Look for following semicolon...
10156 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
10157 my $next_nonblank_type =
10158 defined($Knext_nonblank)
10159 ? $rLL->[$Knext_nonblank]->[_TYPE_]
10161 if ( $next_nonblank_type eq ';' ) {
10163 # Then do not weld if no other containers between inner
10164 # opening and closing.
10165 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
10166 if ( $Knext_seq_item == $Kinner_closing ) {
10167 $do_not_weld_rule = 1;
10171 } ## end starting new weld sequence
10175 # set the 1-line flag if continuing a weld sequence; fixes b1239
10176 $is_one_line_weld = ( $iline_oo == $iline_oc );
10179 # DO-NOT-WELD RULE 2:
10180 # Do not weld an opening paren to an inner one line brace block
10181 # We will just use old line numbers for this test and require
10182 # iterations if necessary for convergence
10184 # For example, otherwise we could cause the opening paren
10185 # in the following example to separate from the caller name
10188 # $_[0]->code_handler
10189 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
10191 # Here is another example where we do not want to weld:
10192 # $wrapped->add_around_modifier(
10193 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
10195 # If the one line sub block gets broken due to length or by the
10196 # user, then we can weld. The result will then be:
10197 # $wrapped->add_around_modifier( sub {
10198 # push @tracelog => 'around 1';
10202 # Updated to fix cases b1082 b1102 b1106 b1115:
10203 # Also, do not weld to an intact inner block if the outer opening token
10204 # is on a different line. For example, this prevents oscillation
10205 # between these two states in case b1106:
10208 # ($_,[$self->$_(@_[1..$#_])])
10212 # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
10215 # The effect of this change on typical code is very minimal. Sometimes
10216 # it may take a second iteration to converge, but this gives protection
10217 # against blinking.
10218 if ( !$do_not_weld_rule
10219 && !$is_one_line_weld
10220 && $iline_ic == $iline_io )
10222 $do_not_weld_rule = 2
10223 if ( $token_oo eq '(' || $iline_oo != $iline_io );
10226 # DO-NOT-WELD RULE 2A:
10227 # Do not weld an opening asub brace in -lp mode if -asbl is set. This
10228 # helps avoid instabilities in one-line block formation, and fixes
10229 # b1241. Previously, the '$is_one_line_weld' flag was tested here
10230 # instead of -asbl, and this fixed most cases. But it turns out that
10231 # the real problem was the -asbl flag, and switching to this was
10232 # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
10233 if ( !$do_not_weld_rule
10234 && $rOpts_line_up_parentheses
10236 && $ris_asub_block->{$outer_seqno} )
10238 $do_not_weld_rule = '2A';
10241 # DO-NOT-WELD RULE 3:
10242 # Do not weld if this makes our line too long.
10243 # Use a tolerance which depends on if the old tokens were welded
10244 # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
10245 if ( !$do_not_weld_rule ) {
10247 # Measure to a little beyond the inner opening token if it is
10248 # followed by a bare word, which may have unusual line break rules.
10250 # NOTE: Originally this was OLD RULE 6: do not weld to a container
10251 # which is followed on the same line by an unknown bareword token.
10252 # This can cause blinkers (cases b626, b611). But OK to weld one
10253 # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
10254 # has been merged into RULE 3 here to also fix cases b1078 b1091.
10256 my $K_for_length = $Kinner_opening;
10257 my $Knext_io = $self->K_next_nonblank($Kinner_opening);
10258 next unless ( defined($Knext_io) ); # shouldn't happen
10259 my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
10261 # Note: may need to eventually also include other types here,
10262 # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
10263 if ( $type_io_next eq 'w' ) {
10264 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
10265 next unless ( defined($Knext_io2) );
10266 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
10267 if ( !$type_ok_after_bareword{$type_io_next2} ) {
10268 $K_for_length = $Knext_io2;
10272 # Use a tolerance for welds over multiple lines to avoid blinkers.
10273 # We can use zero tolerance if it looks like we are working on an
10276 $is_one_line_weld || $is_multiline_weld
10280 # By how many characters does this exceed the text window?
10282 $self->cumulative_length_before_K($K_for_length) -
10283 $starting_lentot + 1 + $tol -
10284 $maximum_text_length;
10286 # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
10287 # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
10288 # Revised patch: New tolerance definition allows going back to '> 0'
10289 # here. This fixes case b1124. See also cases b1087 and b1087a.
10290 if ( $excess > 0 ) { $do_not_weld_rule = 3 }
10294 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
10298 # DO-NOT-WELD RULE 4; implemented for git#10:
10299 # Do not weld an opening -ce brace if the next container is on a single
10300 # line, different from the opening brace. (This is very rare). For
10301 # example, given the following with -ce, we will avoid joining the {
10305 # [ $_, length($_) ]
10308 # because this would produce a terminal one-line block:
10310 # } else { [ $_, length($_) ] }
10312 # which may not be what is desired. But given this input:
10314 # } else { [ $_, length($_) ] }
10316 # then we will do the weld and retain the one-line block
10317 if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
10318 my $block_type = $rblock_type_of_seqno->{$outer_seqno};
10319 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
10320 my $io_line = $inner_opening->[_LINE_INDEX_];
10321 my $ic_line = $inner_closing->[_LINE_INDEX_];
10322 my $oo_line = $outer_opening->[_LINE_INDEX_];
10323 if ( $oo_line < $io_line && $ic_line == $io_line ) {
10324 $do_not_weld_rule = 4;
10329 # DO-NOT-WELD RULE 5: do not include welds excluded by user
10332 && %weld_nested_exclusion_rules
10333 && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
10334 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
10337 $do_not_weld_rule = 5;
10340 # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
10342 if ($do_not_weld_rule) {
10344 # After neglecting a pair, we start measuring from start of point
10345 # io ... but not if previous type does not like to be separated
10346 # from its container (fixes case b1184)
10347 my $Kprev = $self->K_previous_nonblank($Kinner_opening);
10348 my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
10349 if ( !$has_tight_paren{$type_prev} ) {
10350 my $starting_level = $inner_opening->[_LEVEL_];
10351 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
10353 $self->cumulative_length_before_K($Kinner_opening);
10354 $maximum_text_length =
10355 $maximum_text_length_at_level[$starting_level] -
10356 $starting_ci_level * $rOpts_continuation_indentation;
10360 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
10364 # Normally, a broken pair should not decrease indentation of
10365 # intermediate tokens:
10366 ## if ( $last_pair_broken ) { next }
10367 # However, for long strings of welded tokens, such as '{{{{{{...'
10368 # we will allow broken pairs to also remove indentation.
10369 # This will keep very long strings of opening and closing
10370 # braces from marching off to the right. We will do this if the
10371 # number of tokens in a weld before the broken weld is 4 or more.
10372 # This rule will mainly be needed for test scripts, since typical
10373 # welds have fewer than about 4 welded tokens.
10374 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
10377 # otherwise start new weld ...
10378 elsif ($starting_new_weld) {
10379 $weld_count_this_start++;
10381 $Msg .= "Starting new weld\n";
10384 push @welds, $item;
10386 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
10387 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
10389 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
10390 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
10393 # ... or extend current weld
10395 $weld_count_this_start++;
10397 $Msg .= "Extending current weld\n";
10400 unshift @{ $welds[-1] }, $inner_seqno;
10401 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
10402 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
10404 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
10405 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
10408 # After welding, reduce the indentation level if all intermediate tokens
10409 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
10410 if ( $dlevel != 0 ) {
10411 my $Kstart = $Kinner_opening;
10412 my $Kstop = $Kinner_closing;
10413 foreach my $KK ( $Kstart .. $Kstop ) {
10414 $rLL->[$KK]->[_LEVEL_] += $dlevel;
10417 # Copy opening ci level to help break at = for -lp mode (case b1124)
10418 $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
10419 $rLL->[$Kouter_opening]->[_CI_LEVEL_];
10421 # But do not copy the closing ci level ... it can give poor results
10422 ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
10423 ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
10428 } ## end sub weld_nested_containers
10430 sub weld_nested_quotes {
10432 # Called once per file for option '--weld-nested-containers'. This
10433 # does welding on qw quotes.
10437 # See if quotes are excluded from welding
10438 my $rflags = $weld_nested_exclusion_rules{'q'};
10439 return if ( defined($rflags) && defined( $rflags->[1] ) );
10441 my $rK_weld_left = $self->[_rK_weld_left_];
10442 my $rK_weld_right = $self->[_rK_weld_right_];
10444 my $rLL = $self->[_rLL_];
10445 return unless ( defined($rLL) && @{$rLL} );
10448 my $K_opening_container = $self->[_K_opening_container_];
10449 my $K_closing_container = $self->[_K_closing_container_];
10450 my $rlines = $self->[_rlines_];
10452 my $starting_lentot;
10453 my $maximum_text_length;
10455 my $is_single_quote = sub {
10456 my ( $Kbeg, $Kend, $quote_type ) = @_;
10457 foreach my $K ( $Kbeg .. $Kend ) {
10458 my $test_type = $rLL->[$K]->[_TYPE_];
10459 next if ( $test_type eq 'b' );
10460 return if ( $test_type ne $quote_type );
10465 # Length tolerance - same as previously used for sub weld_nested
10466 my $multiline_tol =
10467 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
10469 # look for single qw quotes nested in containers
10470 my $KNEXT = $self->[_K_first_seq_item_];
10471 while ( defined($KNEXT) ) {
10473 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
10474 my $rtoken_vars = $rLL->[$KK];
10475 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
10476 if ( !$outer_seqno ) {
10477 next if ( $KK == 0 ); # first token in file may not be container
10479 # A fault here implies that an error was made in the little loop at
10480 # the bottom of sub 'respace_tokens' which set the values of
10481 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
10482 # loop control lines above.
10483 Fault("sequence = $outer_seqno not defined at K=$KK")
10488 my $token = $rtoken_vars->[_TOKEN_];
10489 if ( $is_opening_token{$token} ) {
10491 # see if the next token is a quote of some type
10494 if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
10495 next unless ( $Kn < $Num );
10497 my $next_token = $rLL->[$Kn]->[_TOKEN_];
10498 my $next_type = $rLL->[$Kn]->[_TYPE_];
10500 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
10501 && substr( $next_token, 0, 1 ) eq 'q' );
10503 # The token before the closing container must also be a quote
10504 my $Kouter_closing = $K_closing_container->{$outer_seqno};
10505 my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
10506 next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
10508 # This is an inner opening container
10509 my $Kinner_opening = $Kn;
10511 # Do not weld to single-line quotes. Nothing is gained, and it may
10513 next if ( $Kinner_closing == $Kinner_opening );
10515 # Only weld to quotes delimited with container tokens. This is
10516 # because welding to arbitrary quote delimiters can produce code
10517 # which is less readable than without welding.
10518 my $closing_delimiter =
10519 substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
10521 unless ( $is_closing_token{$closing_delimiter}
10522 || $closing_delimiter eq '>' );
10524 # Now make sure that there is just a single quote in the container
10527 $is_single_quote->(
10528 $Kinner_opening + 1,
10529 $Kinner_closing - 1,
10534 # OK: This is a candidate for welding
10535 my $Msg = EMPTY_STRING;
10538 my $Kouter_opening = $K_opening_container->{$outer_seqno};
10539 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
10540 my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
10541 my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
10542 my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
10544 ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
10546 # Fix for case b1189. If quote is marked as type 'Q' then only weld
10547 # if the two closing tokens are on the same input line. Otherwise,
10548 # the closing line will be output earlier in the pipeline than
10549 # other CODE lines and welding will not actually occur. This will
10550 # leave a half-welded structure with potential formatting
10551 # instability. This might be fixed by adding a check for a weld on
10552 # a closing Q token and sending it down the normal channel, but it
10553 # would complicate the code and is potentially risky.
10556 && $next_type eq 'Q'
10557 && $iline_ic != $iline_oc );
10559 # If welded, the line must not exceed allowed line length
10560 ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
10561 = $self->setup_new_weld_measurements( $Kouter_opening,
10563 if ( !$ok_to_weld ) {
10564 if (DEBUG_WELD) { print $msg}
10569 $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
10570 my $excess = $length + $multiline_tol - $maximum_text_length;
10572 my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
10573 if ( $excess >= $excess_max ) {
10578 if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
10580 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
10583 # Check weld exclusion rules for outer container
10584 if ( !$do_not_weld ) {
10585 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
10586 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
10589 "No qw weld due to weld exclusion rules for outer container\n";
10595 # Check the length of the last line (fixes case b1039)
10596 if ( !$do_not_weld ) {
10597 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
10598 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
10600 $self->excess_line_length_for_Krange( $Kfirst_ic,
10603 # Allow extra space for additional welded closing container(s)
10604 # and a space and comma or semicolon.
10605 # NOTE: weld len has not been computed yet. Use 2 spaces
10606 # for now, correct for a single weld. This estimate could
10607 # be made more accurate if necessary.
10609 defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
10610 if ( $excess_ic + $weld_len + 2 > 0 ) {
10613 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
10619 if ($do_not_weld) {
10621 $Msg .= "Not Welding QW\n";
10629 $Msg .= "Welding QW\n";
10633 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
10634 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
10636 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
10637 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
10639 # Undo one indentation level if an extra level was added to this
10642 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
10644 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
10646 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
10647 $rLL->[$K]->[_LEVEL_] -= 1;
10649 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
10650 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
10653 # undo CI for other welded quotes
10656 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
10657 $rLL->[$K]->[_CI_LEVEL_] = 0;
10661 # Change the level of a closing qw token to be that of the outer
10662 # containing token. This will allow -lp indentation to function
10663 # correctly in the vertical aligner.
10664 # Patch to fix c002: but not if it contains text
10665 if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
10666 $rLL->[$Kinner_closing]->[_LEVEL_] =
10667 $rLL->[$Kouter_closing]->[_LEVEL_];
10672 } ## end sub weld_nested_quotes
10674 sub is_welded_at_seqno {
10676 my ( $self, $seqno ) = @_;
10678 # given a sequence number:
10679 # return true if it is welded either left or right
10680 # return false otherwise
10681 return unless ( $total_weld_count && defined($seqno) );
10682 my $KK_o = $self->[_K_opening_container_]->{$seqno};
10683 return unless defined($KK_o);
10684 return defined( $self->[_rK_weld_left_]->{$KK_o} )
10685 || defined( $self->[_rK_weld_right_]->{$KK_o} );
10686 } ## end sub is_welded_at_seqno
10688 sub mark_short_nested_blocks {
10690 # This routine looks at the entire file and marks any short nested blocks
10691 # which should not be broken. The results are stored in the hash
10692 # $rshort_nested->{$type_sequence}
10693 # which will be true if the container should remain intact.
10695 # For example, consider the following line:
10697 # sub cxt_two { sort { $a <=> $b } test_if_list() }
10699 # The 'sort' block is short and nested within an outer sub block.
10700 # Normally, the existence of the 'sort' block will force the sub block to
10701 # break open, but this is not always desirable. Here we will set a flag for
10702 # the sort block to prevent this. To give the user control, we will
10703 # follow the input file formatting. If either of the blocks is broken in
10704 # the input file then we will allow it to remain broken. Otherwise we will
10705 # set a flag to keep it together in later formatting steps.
10707 # The flag which is set here will be checked in two places:
10708 # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
10711 return if $rOpts->{'indent-only'};
10713 my $rLL = $self->[_rLL_];
10714 return unless ( defined($rLL) && @{$rLL} );
10716 return unless ( $rOpts->{'one-line-block-nesting'} );
10718 my $K_opening_container = $self->[_K_opening_container_];
10719 my $K_closing_container = $self->[_K_closing_container_];
10720 my $rbreak_container = $self->[_rbreak_container_];
10721 my $rshort_nested = $self->[_rshort_nested_];
10722 my $rlines = $self->[_rlines_];
10723 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10725 # Variables needed for estimating line lengths
10726 my $maximum_text_length;
10727 my $starting_lentot;
10728 my $length_tol = 1;
10730 my $excess_length_to_K = sub {
10733 # Estimate the length from the line start to a given token
10734 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
10735 my $excess_length = $length + $length_tol - $maximum_text_length;
10736 return ($excess_length);
10739 my $is_broken_block = sub {
10741 # a block is broken if the input line numbers of the braces differ
10743 my $K_opening = $K_opening_container->{$seqno};
10744 return unless ( defined($K_opening) );
10745 my $K_closing = $K_closing_container->{$seqno};
10746 return unless ( defined($K_closing) );
10747 return $rbreak_container->{$seqno}
10748 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
10749 $rLL->[$K_opening]->[_LINE_INDEX_];
10752 # loop over all containers
10753 my @open_block_stack;
10755 my $KNEXT = $self->[_K_first_seq_item_];
10756 while ( defined($KNEXT) ) {
10758 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
10759 my $rtoken_vars = $rLL->[$KK];
10760 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
10761 if ( !$type_sequence ) {
10762 next if ( $KK == 0 ); # first token in file may not be container
10764 # A fault here implies that an error was made in the little loop at
10765 # the bottom of sub 'respace_tokens' which set the values of
10766 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
10767 # loop control lines above.
10768 Fault("sequence = $type_sequence not defined at K=$KK")
10773 # Patch: do not mark short blocks with welds.
10774 # In some cases blinkers can form (case b690).
10775 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
10779 # We are just looking at code blocks
10780 my $token = $rtoken_vars->[_TOKEN_];
10781 my $type = $rtoken_vars->[_TYPE_];
10782 next unless ( $type eq $token );
10783 next unless ( $rblock_type_of_seqno->{$type_sequence} );
10785 # Keep a stack of all acceptable block braces seen.
10786 # Only consider blocks entirely on one line so dump the stack when line
10788 my $iline_last = $iline;
10789 $iline = $rLL->[$KK]->[_LINE_INDEX_];
10790 if ( $iline != $iline_last ) { @open_block_stack = () }
10792 if ( $token eq '}' ) {
10793 if (@open_block_stack) { pop @open_block_stack }
10795 next unless ( $token eq '{' );
10797 # block must be balanced (bad scripts may be unbalanced)
10798 my $K_opening = $K_opening_container->{$type_sequence};
10799 my $K_closing = $K_closing_container->{$type_sequence};
10800 next unless ( defined($K_opening) && defined($K_closing) );
10802 # require that this block be entirely on one line
10803 next if ( $is_broken_block->($type_sequence) );
10805 # See if this block fits on one line of allowed length (which may
10806 # be different from the input script)
10808 $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
10809 my $level = $rLL->[$KK]->[_LEVEL_];
10810 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
10811 $maximum_text_length =
10812 $maximum_text_length_at_level[$level] -
10813 $ci_level * $rOpts_continuation_indentation;
10815 # Dump the stack if block is too long and skip this block
10816 if ( $excess_length_to_K->($K_closing) > 0 ) {
10817 @open_block_stack = ();
10821 # OK, Block passes tests, remember it
10822 push @open_block_stack, $type_sequence;
10824 # We are only marking nested code blocks,
10825 # so check for a previous block on the stack
10826 next unless ( @open_block_stack > 1 );
10828 # Looks OK, mark this as a short nested block
10829 $rshort_nested->{$type_sequence} = 1;
10833 } ## end sub mark_short_nested_blocks
10835 sub special_indentation_adjustments {
10839 # Called once per file to do special indentation adjustments.
10840 # These routines adjust levels either by changing _CI_LEVEL_ directly or
10841 # by setting modified levels in the array $self->[_radjusted_levels_].
10843 # Initialize the adjusted levels. These will be the levels actually used
10844 # for computing indentation.
10846 # NOTE: This routine is called after the weld routines, which may have
10847 # already adjusted _LEVEL_, so we are making adjustments on top of those
10848 # levels. It would be much nicer to have the weld routines also use this
10849 # adjustment, but that gets complicated when we combine -gnu -wn and have
10850 # some welded quotes.
10851 my $Klimit = $self->[_Klimit_];
10852 my $rLL = $self->[_rLL_];
10853 my $radjusted_levels = $self->[_radjusted_levels_];
10855 return unless ( defined($Klimit) );
10857 foreach my $KK ( 0 .. $Klimit ) {
10858 $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
10861 # First set adjusted levels for any non-indenting braces.
10862 $self->do_non_indenting_braces();
10864 # Adjust breaks and indentation list containers
10865 $self->break_before_list_opening_containers();
10867 # Set adjusted levels for the whitespace cycle option.
10868 $self->whitespace_cycle_adjustment();
10870 $self->braces_left_setup();
10872 # Adjust continuation indentation if -bli is set
10873 $self->bli_adjustment();
10875 $self->extended_ci()
10876 if ($rOpts_extended_continuation_indentation);
10878 # Now clip any adjusted levels to be non-negative
10879 $self->clip_adjusted_levels();
10882 } ## end sub special_indentation_adjustments
10884 sub clip_adjusted_levels {
10886 # Replace any negative adjusted levels with zero.
10887 # Negative levels can occur in files with brace errors.
10889 my $radjusted_levels = $self->[_radjusted_levels_];
10890 return unless defined($radjusted_levels) && @{$radjusted_levels};
10891 my $min = min( @{$radjusted_levels} ); # fast check for min
10894 # slow loop, but rarely needed
10895 foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
10898 } ## end sub clip_adjusted_levels
10900 sub do_non_indenting_braces {
10902 # Called once per file to handle the --non-indenting-braces parameter.
10903 # Remove indentation within marked braces if requested
10906 # Any non-indenting braces have been found by sub find_non_indenting_braces
10907 # and are defined by the following hash:
10908 my $rseqno_non_indenting_brace_by_ix =
10909 $self->[_rseqno_non_indenting_brace_by_ix_];
10910 return unless ( %{$rseqno_non_indenting_brace_by_ix} );
10912 my $rLL = $self->[_rLL_];
10913 my $rlines = $self->[_rlines_];
10914 my $K_opening_container = $self->[_K_opening_container_];
10915 my $K_closing_container = $self->[_K_closing_container_];
10916 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
10917 my $radjusted_levels = $self->[_radjusted_levels_];
10919 # First locate all of the marked blocks
10921 foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
10922 my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
10923 my $KK = $K_opening_container->{$seqno};
10924 my $line_of_tokens = $rlines->[$ix];
10925 my $rK_range = $line_of_tokens->{_rK_range};
10926 my ( $Kfirst, $Klast ) = @{$rK_range};
10927 $rspecial_side_comment_type->{$Klast} = 'NIB';
10928 push @K_stack, [ $KK, 1 ];
10929 my $Kc = $K_closing_container->{$seqno};
10930 push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
10932 return unless (@K_stack);
10933 @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
10935 # Then loop to remove indentation within marked blocks
10938 foreach my $item (@K_stack) {
10939 my ( $KK, $inc ) = @{$item};
10940 if ( $ndeep > 0 ) {
10942 foreach ( $KK_last + 1 .. $KK ) {
10943 $radjusted_levels->[$_] -= $ndeep;
10946 # We just subtracted the old $ndeep value, which only applies to a
10947 # '{'. The new $ndeep applies to a '}', so we undo the error.
10948 if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
10955 } ## end sub do_non_indenting_braces
10957 sub whitespace_cycle_adjustment {
10961 # Called once per file to implement the --whitespace-cycle option
10962 my $rLL = $self->[_rLL_];
10963 return unless ( defined($rLL) && @{$rLL} );
10964 my $radjusted_levels = $self->[_radjusted_levels_];
10965 my $maximum_level = $self->[_maximum_level_];
10967 if ( $rOpts_whitespace_cycle
10968 && $rOpts_whitespace_cycle > 0
10969 && $rOpts_whitespace_cycle < $maximum_level )
10972 my $Kmax = @{$rLL} - 1;
10974 my $whitespace_last_level = -1;
10975 my @whitespace_level_stack = ();
10976 my $last_nonblank_type = 'b';
10977 my $last_nonblank_token = EMPTY_STRING;
10978 foreach my $KK ( 0 .. $Kmax ) {
10979 my $level_abs = $radjusted_levels->[$KK];
10980 my $level = $level_abs;
10981 if ( $level_abs < $whitespace_last_level ) {
10982 pop(@whitespace_level_stack);
10984 if ( !@whitespace_level_stack ) {
10985 push @whitespace_level_stack, $level_abs;
10987 elsif ( $level_abs > $whitespace_last_level ) {
10988 $level = $whitespace_level_stack[-1] +
10989 ( $level_abs - $whitespace_last_level );
10992 # 1 Try to break at a block brace
10994 $level > $rOpts_whitespace_cycle
10995 && $last_nonblank_type eq '{'
10996 && $last_nonblank_token eq '{'
10999 # 2 Then either a brace or bracket
11000 || ( $level > $rOpts_whitespace_cycle + 1
11001 && $last_nonblank_token =~ /^[\{\[]$/ )
11003 # 3 Then a paren too
11004 || $level > $rOpts_whitespace_cycle + 2
11009 push @whitespace_level_stack, $level;
11011 $level = $whitespace_level_stack[-1];
11012 $radjusted_levels->[$KK] = $level;
11014 $whitespace_last_level = $level_abs;
11015 my $type = $rLL->[$KK]->[_TYPE_];
11016 my $token = $rLL->[$KK]->[_TOKEN_];
11017 if ( $type ne 'b' ) {
11018 $last_nonblank_type = $type;
11019 $last_nonblank_token = $token;
11024 } ## end sub whitespace_cycle_adjustment
11026 use constant DEBUG_BBX => 0;
11028 sub break_before_list_opening_containers {
11032 # This routine is called once per batch to implement parameters
11033 # --break-before-hash-brace=n and similar -bbx=n flags
11034 # and their associated indentation flags:
11035 # --break-before-hash-brace-and-indent and similar -bbxi=n
11037 # Nothing to do if none of the -bbx=n parameters has been set
11038 return unless %break_before_container_types;
11040 my $rLL = $self->[_rLL_];
11041 return unless ( defined($rLL) && @{$rLL} );
11043 # Loop over all opening container tokens
11044 my $K_opening_container = $self->[_K_opening_container_];
11045 my $K_closing_container = $self->[_K_closing_container_];
11046 my $ris_broken_container = $self->[_ris_broken_container_];
11047 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
11048 my $rhas_list = $self->[_rhas_list_];
11049 my $rhas_broken_list = $self->[_rhas_broken_list_];
11050 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
11051 my $radjusted_levels = $self->[_radjusted_levels_];
11052 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
11053 my $rlines = $self->[_rlines_];
11054 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
11055 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
11056 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
11057 my $rK_weld_right = $self->[_rK_weld_right_];
11058 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11061 max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
11062 if ($rOpts_ignore_old_breakpoints) {
11064 # Patch suggested by b1231; the old tol was excessive.
11065 ## $length_tol += $rOpts_maximum_line_length;
11069 my $rbreak_before_container_by_seqno = {};
11070 my $rwant_reduced_ci = {};
11071 foreach my $seqno ( keys %{$K_opening_container} ) {
11073 #----------------------------------------------------------------
11074 # Part 1: Examine any -bbx=n flags
11075 #----------------------------------------------------------------
11077 next if ( $rblock_type_of_seqno->{$seqno} );
11078 my $KK = $K_opening_container->{$seqno};
11080 # This must be a list or contain a list.
11081 # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
11082 # Note2: 'has_list' holds the depth to the sub-list. We will require
11083 # a depth of just 1
11084 my $is_list = $self->is_list_by_seqno($seqno);
11085 my $has_list = $rhas_list->{$seqno};
11087 # Fix for b1173: if welded opening container, use flag of innermost
11088 # seqno. Otherwise, the restriction $has_list==1 prevents triple and
11089 # higher welds from following the -BBX parameters.
11090 if ($total_weld_count) {
11091 my $KK_test = $rK_weld_right->{$KK};
11092 if ( defined($KK_test) ) {
11093 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
11094 $is_list ||= $self->is_list_by_seqno($seqno_inner);
11095 $has_list = $rhas_list->{$seqno_inner};
11099 next unless ( $is_list || $has_list && $has_list == 1 );
11101 my $has_broken_list = $rhas_broken_list->{$seqno};
11102 my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
11104 # Only for types of container tokens with a non-default break option
11105 my $token = $rLL->[$KK]->[_TOKEN_];
11106 my $break_option = $break_before_container_types{$token};
11107 next unless ($break_option);
11109 # Do not use -bbx under stress for stability ... fixes b1300
11110 # TODO: review this; do we also need to look at stress_level_lalpha?
11111 my $level = $rLL->[$KK]->[_LEVEL_];
11112 if ( $level >= $stress_level_beta ) {
11115 "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
11119 # Require previous nonblank to be '=' or '=>'
11120 my $Kprev = $KK - 1;
11121 next if ( $Kprev < 0 );
11122 my $prev_type = $rLL->[$Kprev]->[_TYPE_];
11123 if ( $prev_type eq 'b' ) {
11125 next if ( $Kprev < 0 );
11126 $prev_type = $rLL->[$Kprev]->[_TYPE_];
11128 next unless ( $is_equal_or_fat_comma{$prev_type} );
11130 my $ci = $rLL->[$KK]->[_CI_LEVEL_];
11132 #--------------------------------------------
11133 # New coding for option 2 (break if complex).
11134 #--------------------------------------------
11135 # This new coding uses clues which are invariant under formatting to
11136 # decide if a list is complex. For now it is only applied when -lp
11137 # and -vmll are used, but eventually it may become the standard method.
11138 # Fixes b1274, b1275, and others, including b1099.
11139 if ( $break_option == 2 ) {
11141 if ( $rOpts_line_up_parentheses
11142 || $rOpts_variable_maximum_line_length )
11145 # Start with the basic definition of a complex list...
11146 my $is_complex = $is_list && $has_list;
11148 # and it is also complex if the parent is a list
11149 if ( !$is_complex ) {
11150 my $parent = $rparent_of_seqno->{$seqno};
11151 if ( $self->is_list_by_seqno($parent) ) {
11156 # finally, we will call it complex if there are inner opening
11157 # and closing container tokens, not parens, within the outer
11158 # container tokens.
11159 if ( !$is_complex ) {
11160 my $Kp = $self->K_next_nonblank($KK);
11161 my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
11162 if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
11164 my $Kc = $K_closing_container->{$seqno};
11165 my $Km = $self->K_previous_nonblank($Kc);
11167 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
11169 # ignore any optional ending comma
11170 if ( $token_m eq ',' ) {
11171 $Km = $self->K_previous_nonblank($Km);
11173 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
11177 $is_closing_token{$token_m} && $token_m ne ')';
11181 # Convert to option 3 (always break) if complex
11182 next unless ($is_complex);
11187 # Fix for b1231: the has_list_with_lec does not cover all cases.
11188 # A broken container containing a list and with line-ending commas
11189 # will stay broken, so can be treated as if it had a list with lec.
11190 $has_list_with_lec ||=
11192 && $ris_broken_container->{$seqno}
11193 && $rlec_count_by_seqno->{$seqno};
11197 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
11199 # -bbx=1 = stable, try to follow input
11200 if ( $break_option == 1 ) {
11202 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
11203 my $rK_range = $rlines->[$iline]->{_rK_range};
11204 my ( $Kfirst, $Klast ) = @{$rK_range};
11205 next unless ( $KK == $Kfirst );
11208 # -bbx=2 => apply this style only for a 'complex' list
11209 elsif ( $break_option == 2 ) {
11211 # break if this list contains a broken list with line-ending comma
11213 my $Msg = EMPTY_STRING;
11214 if ($has_list_with_lec) {
11216 DEBUG_BBX && do { $Msg = "has list with lec;" };
11219 if ( !$ok_to_break ) {
11221 # Turn off -xci if -bbx=2 and this container has a sublist but
11222 # not a broken sublist. This avoids creating blinkers. The
11223 # problem is that -xci can cause one-line lists to break open,
11224 # and thereby creating formatting instability.
11225 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
11226 # b1045 b1046 b1047 b1051 b1052 b1061.
11227 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
11229 my $parent = $rparent_of_seqno->{$seqno};
11230 if ( $self->is_list_by_seqno($parent) ) {
11231 DEBUG_BBX && do { $Msg = "parent is list" };
11236 if ( !$ok_to_break ) {
11238 && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
11243 && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
11245 # Patch: turn off -xci if -bbx=2 and -lp
11246 # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
11247 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
11250 # -bbx=3 = always break
11251 elsif ( $break_option == 3 ) {
11256 # Shouldn't happen! Bad flag, but make behavior same as 3
11261 # Set a flag for actual implementation later in
11262 # sub insert_breaks_before_list_opening_containers
11263 $rbreak_before_container_by_seqno->{$seqno} = 1;
11265 && print STDOUT "BBX: ok to break at seqno=$seqno\n";
11267 # -bbxi=0: Nothing more to do if the ci value remains unchanged
11268 my $ci_flag = $container_indentation_options{$token};
11269 next unless ($ci_flag);
11271 # -bbxi=1: This option removes ci and is handled in
11272 # later sub get_final_indentation
11273 if ( $ci_flag == 1 ) {
11274 $rwant_reduced_ci->{$seqno} = 1;
11278 # -bbxi=2: This option changes the level ...
11279 # This option can conflict with -xci in some cases. We can turn off
11280 # -xci for this container to avoid blinking. For now, only do this if
11281 # -vmll is set. ( fixes b1335, b1336 )
11282 if ($rOpts_variable_maximum_line_length) {
11283 $rno_xci_by_seqno->{$seqno} = 1;
11286 #----------------------------------------------------------------
11287 # Part 2: Perform tests before committing to changing ci and level
11288 #----------------------------------------------------------------
11290 # Before changing the ci level of the opening container, we need
11291 # to be sure that the container will be broken in the later stages of
11292 # formatting. We have to do this because we are working early in the
11293 # formatting pipeline. A problem can occur if we change the ci or
11294 # level of the opening token but do not actually break the container
11295 # open as expected. In most cases it wouldn't make any difference if
11296 # we changed ci or not, but there are some edge cases where this
11297 # can cause blinking states, so we need to try to only change ci if
11298 # the container will really be broken.
11300 # Only consider containers already broken
11301 next if ( !$ris_broken_container->{$seqno} );
11303 # Patch to fix issue b1305: the combination of -naws and ci>i appears
11304 # to cause an instability. It should almost never occur in practice.
11306 if (!$rOpts_add_whitespace
11307 && $rOpts_continuation_indentation > $rOpts_indent_columns );
11309 # Always ok to change ci for permanently broken containers
11310 if ( $ris_permanently_broken->{$seqno} ) { }
11312 # Always OK if this list contains a broken sub-container with
11313 # a non-terminal line-ending comma
11314 elsif ($has_list_with_lec) { }
11316 # Otherwise, we are considering a single container...
11319 # A single container must have at least 1 line-ending comma:
11320 next unless ( $rlec_count_by_seqno->{$seqno} );
11324 # Since it has a line-ending comma, it will stay broken if the
11326 if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 }
11328 # OK if the container contains multiple fat commas
11329 # Better: multiple lines with fat commas
11330 if ( !$OK && !$rOpts_ignore_old_breakpoints ) {
11331 my $rtype_count = $rtype_count_by_seqno->{$seqno};
11332 next unless ($rtype_count);
11333 my $fat_comma_count = $rtype_count->{'=>'};
11335 && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
11336 if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 }
11339 # The last check we can make is to see if this container could
11340 # fit on a single line. Use the least possible indentation
11341 # estimate, ci=0, so we are not subtracting $ci *
11342 # $rOpts_continuation_indentation from tabulated
11343 # $maximum_text_length value.
11345 my $maximum_text_length = $maximum_text_length_at_level[$level];
11346 my $K_closing = $K_closing_container->{$seqno};
11347 my $length = $self->cumulative_length_before_K($K_closing) -
11348 $self->cumulative_length_before_K($KK);
11349 my $excess_length = $length - $maximum_text_length;
11352 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
11354 # OK if the net container definitely breaks on length
11355 if ( $excess_length > $length_tol ) {
11358 && print STDOUT "BBX: excess_length=$excess_length\n";
11361 # Otherwise skip it
11366 #------------------------------------------------------------
11367 # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
11368 #------------------------------------------------------------
11370 DEBUG_BBX && print STDOUT "BBX: OK to break\n";
11378 # n=0 default indentation (usually one ci)
11379 # n=1 outdent one ci
11380 # n=2 indent one level (minus one ci)
11381 # n=3 indent one extra ci [This may be dropped]
11383 # NOTE: We are adjusting indentation of the opening container. The
11384 # closing container will normally follow the indentation of the opening
11385 # container automatically, so this is not currently done.
11388 # option 1: outdent
11389 if ( $ci_flag == 1 ) {
11393 # option 2: indent one level
11394 elsif ( $ci_flag == 2 ) {
11396 $radjusted_levels->[$KK] += 1;
11401 # Shouldn't happen - leave ci unchanged
11404 $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
11407 $self->[_rbreak_before_container_by_seqno_] =
11408 $rbreak_before_container_by_seqno;
11409 $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
11411 } ## end sub break_before_list_opening_containers
11413 use constant DEBUG_XCI => 0;
11417 # This routine implements the -xci (--extended-continuation-indentation)
11418 # flag. We add CI to interior tokens of a container which itself has CI but
11419 # only if a token does not already have CI.
11421 # To do this, we will locate opening tokens which themselves have
11422 # continuation indentation (CI). We track them with their sequence
11423 # numbers. These sequence numbers are called 'controlling sequence
11424 # numbers'. They apply continuation indentation to the tokens that they
11425 # contain. These inner tokens remember their controlling sequence numbers.
11426 # Later, when these inner tokens are output, they have to see if the output
11427 # lines with their controlling tokens were output with CI or not. If not,
11428 # then they must remove their CI too.
11430 # The controlling CI concept works hierarchically. But CI itself is not
11431 # hierarchical; it is either on or off. There are some rare instances where
11432 # it would be best to have hierarchical CI too, but not enough to be worth
11433 # the programming effort.
11435 # The operations to remove unwanted CI are done in sub 'undo_ci'.
11439 my $rLL = $self->[_rLL_];
11440 return unless ( defined($rLL) && @{$rLL} );
11442 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
11443 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
11444 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
11445 my $rlines = $self->[_rlines_];
11446 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
11447 my $ris_bli_container = $self->[_ris_bli_container_];
11448 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11450 my %available_space;
11452 # Loop over all opening container tokens
11453 my $K_opening_container = $self->[_K_opening_container_];
11454 my $K_closing_container = $self->[_K_closing_container_];
11455 my $ris_broken_container = $self->[_ris_broken_container_];
11459 my $KNEXT = $self->[_K_first_seq_item_];
11461 # The following variable can be used to allow a little extra space to
11462 # avoid blinkers. A value $len_tol = 20 fixed the following
11463 # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
11464 # It turned out that the real problem was mis-parsing a list brace as
11465 # a code block in a 'use' statement when the line length was extremely
11466 # small. A value of 0 works now, but a slightly larger value can
11467 # be used to minimize the chance of a blinker.
11470 while ( defined($KNEXT) ) {
11472 # Fix all tokens up to the next sequence item if we are changing CI
11475 my $is_list = $ris_list_by_seqno->{$seqno_top};
11476 my $space = $available_space{$seqno_top};
11477 my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
11479 foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
11481 # But do not include tokens which might exceed the line length
11482 # and are not in a list.
11483 # ... This fixes case b1031
11484 my $length_before = $length;
11485 $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
11487 !$rLL->[$Kt]->[_CI_LEVEL_]
11489 || $length - $length_before < $space
11490 || $rLL->[$Kt]->[_TYPE_] eq '#' )
11493 $rLL->[$Kt]->[_CI_LEVEL_] = 1;
11494 $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
11498 $ris_seqno_controlling_ci->{$seqno_top} += $count;
11503 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
11505 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
11507 # see if we have reached the end of the current controlling container
11508 if ( $seqno_top && $seqno == $seqno_top ) {
11509 $seqno_top = pop @seqno_stack;
11512 # Patch to fix some block types...
11513 # Certain block types arrive from the tokenizer without CI but should
11514 # have it for this option. These include anonymous subs and
11515 # do sort map grep eval
11516 my $block_type = $rblock_type_of_seqno->{$seqno};
11517 if ( $block_type && $is_block_with_ci{$block_type} ) {
11518 $rLL->[$KK]->[_CI_LEVEL_] = 1;
11520 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
11521 $ris_seqno_controlling_ci->{$seqno_top}++;
11525 # If this does not have ci, update ci if necessary and continue looking
11526 if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
11528 $rLL->[$KK]->[_CI_LEVEL_] = 1;
11529 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
11530 $ris_seqno_controlling_ci->{$seqno_top}++;
11535 # We are looking for opening container tokens with ci
11536 my $K_opening = $K_opening_container->{$seqno};
11537 next unless ( defined($K_opening) && $KK == $K_opening );
11539 # Make sure there is a corresponding closing container
11540 # (could be missing if the script has a brace error)
11541 my $K_closing = $K_closing_container->{$seqno};
11542 next unless defined($K_closing);
11544 # Skip if requested by -bbx to avoid blinkers
11545 next if ( $rno_xci_by_seqno->{$seqno} );
11547 # Skip if this is a -bli container (this fixes case b1065) Note: case
11548 # b1065 is also fixed by the update for b1055, so this update is not
11549 # essential now. But there does not seem to be a good reason to add
11550 # xci and bli together, so the update is retained.
11551 next if ( $ris_bli_container->{$seqno} );
11553 # Require different input lines. This will filter out a large number
11554 # of small hash braces and array brackets. If we accidentally filter
11555 # out an important container, it will get fixed on the next pass.
11557 $rLL->[$K_opening]->[_LINE_INDEX_] ==
11558 $rLL->[$K_closing]->[_LINE_INDEX_]
11559 && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
11560 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
11561 $rOpts_maximum_line_length )
11565 && print "XCI: Skipping seqno=$seqno, require different lines\n";
11569 # Do not apply -xci if adding extra ci will put the container contents
11570 # beyond the line length limit (fixes cases b899 b935)
11571 my $level = $rLL->[$K_opening]->[_LEVEL_];
11572 my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
11573 my $maximum_text_length =
11574 $maximum_text_length_at_level[$level] -
11575 $ci_level * $rOpts_continuation_indentation;
11577 # Fix for b1197 b1198 b1199 b1200 b1201 b1202
11578 # Do not apply -xci if we are running out of space
11579 # TODO: review this; do we also need to look at stress_level_alpha?
11580 if ( $level >= $stress_level_beta ) {
11583 "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
11587 # remember how much space is available for patch b1031 above
11589 $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
11591 if ( $space < 0 ) {
11592 DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
11595 DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
11597 $available_space{$seqno} = $space;
11599 # This becomes the next controlling container
11600 push @seqno_stack, $seqno_top if ($seqno_top);
11601 $seqno_top = $seqno;
11604 } ## end sub extended_ci
11606 sub braces_left_setup {
11608 # Called once per file to mark all -bl, -sbl, and -asbl containers
11611 my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
11612 my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
11613 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
11614 return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
11616 my $rLL = $self->[_rLL_];
11617 return unless ( defined($rLL) && @{$rLL} );
11619 # We will turn on this hash for braces controlled by these flags:
11620 my $rbrace_left = $self->[_rbrace_left_];
11622 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11623 my $ris_asub_block = $self->[_ris_asub_block_];
11624 my $ris_sub_block = $self->[_ris_sub_block_];
11625 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
11627 my $block_type = $rblock_type_of_seqno->{$seqno};
11629 # use -asbl flag for an anonymous sub block
11630 if ( $ris_asub_block->{$seqno} ) {
11632 $rbrace_left->{$seqno} = 1;
11636 # use -sbl flag for a named sub
11637 elsif ( $ris_sub_block->{$seqno} ) {
11639 $rbrace_left->{$seqno} = 1;
11643 # use -bl flag if not a sub block of any type
11646 && $block_type =~ /$bl_pattern/
11647 && $block_type !~ /$bl_exclusion_pattern/ )
11649 $rbrace_left->{$seqno} = 1;
11654 } ## end sub braces_left_setup
11656 sub bli_adjustment {
11658 # Called once per file to implement the --brace-left-and-indent option.
11659 # If -bli is set, adds one continuation indentation for certain braces
11661 return unless ( $rOpts->{'brace-left-and-indent'} );
11662 my $rLL = $self->[_rLL_];
11663 return unless ( defined($rLL) && @{$rLL} );
11665 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11666 my $ris_bli_container = $self->[_ris_bli_container_];
11667 my $rbrace_left = $self->[_rbrace_left_];
11668 my $K_opening_container = $self->[_K_opening_container_];
11669 my $K_closing_container = $self->[_K_closing_container_];
11671 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
11672 my $block_type = $rblock_type_of_seqno->{$seqno};
11674 && $block_type =~ /$bli_pattern/
11675 && $block_type !~ /$bli_exclusion_pattern/ )
11677 $ris_bli_container->{$seqno} = 1;
11678 $rbrace_left->{$seqno} = 1;
11679 my $Ko = $K_opening_container->{$seqno};
11680 my $Kc = $K_closing_container->{$seqno};
11681 if ( defined($Ko) && defined($Kc) ) {
11682 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
11687 } ## end sub bli_adjustment
11689 sub find_multiline_qw {
11691 my ( $self, $rqw_lines ) = @_;
11693 # Multiline qw quotes are not sequenced items like containers { [ (
11694 # but behave in some respects in a similar way. So this routine finds them
11695 # and creates a separate sequence number system for later use.
11697 # This is straightforward because they always begin at the end of one line
11698 # and end at the beginning of a later line. This is true no matter how we
11699 # finally make our line breaks, so we can find them before deciding on new
11703 # if $rqw_lines is defined it is a ref to array of all line index numbers
11704 # for which there is a type 'q' qw quote at either end of the line. This
11705 # was defined by sub resync_lines_and_tokens for efficiency.
11708 my $rlines = $self->[_rlines_];
11710 # if $rqw_lines is not defined (this will occur with -io option) then we
11711 # will have to scan all lines.
11712 if ( !defined($rqw_lines) ) {
11713 $rqw_lines = [ 0 .. @{$rlines} - 1 ];
11716 # if $rqw_lines is defined but empty, just return because there are no
11719 if ( !@{$rqw_lines} ) { return }
11722 my $rstarting_multiline_qw_seqno_by_K = {};
11723 my $rending_multiline_qw_seqno_by_K = {};
11724 my $rKrange_multiline_qw_by_seqno = {};
11725 my $rmultiline_qw_has_extra_level = {};
11727 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
11729 my $rLL = $self->[_rLL_];
11731 my $num_qw_seqno = 0;
11732 my $K_start_multiline_qw;
11734 # For reference, here is the old loop, before $rqw_lines became available:
11735 ## foreach my $line_of_tokens ( @{$rlines} ) {
11736 foreach my $iline ( @{$rqw_lines} ) {
11737 my $line_of_tokens = $rlines->[$iline];
11739 # Note that these first checks are required in case we have to scan
11740 # all lines, not just lines with type 'q' at the ends.
11741 my $line_type = $line_of_tokens->{_line_type};
11742 next unless ( $line_type eq 'CODE' );
11743 my $rK_range = $line_of_tokens->{_rK_range};
11744 my ( $Kfirst, $Klast ) = @{$rK_range};
11745 next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
11747 # Continuing a sequence of qw lines ...
11748 if ( defined($K_start_multiline_qw) ) {
11749 my $type = $rLL->[$Kfirst]->[_TYPE_];
11752 if ( $type ne 'q' ) {
11753 DEVEL_MODE && print STDERR <<EOM;
11754 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
11756 $K_start_multiline_qw = undef;
11759 my $Kprev = $self->K_previous_nonblank($Kfirst);
11760 my $Knext = $self->K_next_nonblank($Kfirst);
11761 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
11762 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
11763 if ( $type_m eq 'q' && $type_p ne 'q' ) {
11764 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
11765 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
11766 [ $K_start_multiline_qw, $Kfirst ];
11767 $K_start_multiline_qw = undef;
11772 # Starting a new a sequence of qw lines ?
11773 if ( !defined($K_start_multiline_qw)
11774 && $rLL->[$Klast]->[_TYPE_] eq 'q' )
11776 my $Kprev = $self->K_previous_nonblank($Klast);
11777 my $Knext = $self->K_next_nonblank($Klast);
11778 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
11779 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
11780 if ( $type_m ne 'q' && $type_p eq 'q' ) {
11782 $qw_seqno = 'q' . $num_qw_seqno;
11783 $K_start_multiline_qw = $Klast;
11784 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
11789 # Give multiline qw lists extra indentation instead of CI. This option
11790 # works well but is currently only activated when the -xci flag is set.
11791 # The reason is to avoid unexpected changes in formatting.
11792 if ($rOpts_extended_continuation_indentation) {
11793 while ( my ( $qw_seqno_x, $rKrange ) =
11794 each %{$rKrange_multiline_qw_by_seqno} )
11796 my ( $Kbeg, $Kend ) = @{$rKrange};
11798 # require isolated closing token
11799 my $token_end = $rLL->[$Kend]->[_TOKEN_];
11801 unless ( length($token_end) == 1
11802 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
11804 # require isolated opening token
11805 my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
11807 # allow space(s) after the qw
11808 if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
11810 $token_beg =~ s/\s+//;
11813 next unless ( length($token_beg) == 3 );
11815 foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
11816 $rLL->[$KK]->[_LEVEL_]++;
11817 $rLL->[$KK]->[_CI_LEVEL_] = 0;
11820 # set flag for -wn option, which will remove the level
11821 $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
11825 # For the -lp option we need to mark all parent containers of
11827 if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
11829 while ( my ( $qw_seqno_x, $rKrange ) =
11830 each %{$rKrange_multiline_qw_by_seqno} )
11832 my ( $Kbeg, $Kend ) = @{$rKrange};
11833 my $parent_seqno = $self->parent_seqno_by_K($Kend);
11834 next unless ($parent_seqno);
11836 # If the parent container exactly surrounds this qw, then -lp
11837 # formatting seems to work so we will not mark it.
11838 my $is_tightly_contained;
11839 my $Kn = $self->K_next_nonblank($Kend);
11840 my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
11841 if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
11843 my $Kp = $self->K_previous_nonblank($Kbeg);
11845 defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
11846 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
11847 $is_tightly_contained = 1;
11851 $ris_excluded_lp_container->{$parent_seqno} = 1
11852 unless ($is_tightly_contained);
11854 # continue up the tree marking parent containers
11856 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
11858 unless ( defined($parent_seqno)
11859 && $parent_seqno ne SEQ_ROOT );
11860 $ris_excluded_lp_container->{$parent_seqno} = 1;
11865 $self->[_rstarting_multiline_qw_seqno_by_K_] =
11866 $rstarting_multiline_qw_seqno_by_K;
11867 $self->[_rending_multiline_qw_seqno_by_K_] =
11868 $rending_multiline_qw_seqno_by_K;
11869 $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
11870 $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
11873 } ## end sub find_multiline_qw
11875 use constant DEBUG_COLLAPSED_LENGTHS => 0;
11877 # Minimum space reserved for contents of a code block. A value of 40 has given
11878 # reasonable results. With a large line length, say -l=120, this will not
11879 # normally be noticeable but it will prevent making a mess in some edge cases.
11880 use constant MIN_BLOCK_LEN => 40;
11882 my %is_handle_type;
11885 my @q = qw( w C U G i k => );
11886 @is_handle_type{@q} = (1) x scalar(@q);
11890 _max_prong_len_ => $i++,
11891 _handle_len_ => $i++,
11896 _interrupted_list_rule_ => $i++,
11900 sub xlp_collapsed_lengths {
11904 #----------------------------------------------------------------
11905 # Define the collapsed lengths of containers for -xlp indentation
11906 #----------------------------------------------------------------
11908 # We need an estimate of the minimum required line length starting at any
11909 # opening container for the -xlp style. This is needed to avoid using too
11910 # much indentation space for lower level containers and thereby running
11911 # out of space for outer container tokens due to the maximum line length
11914 # The basic idea is that at each node in the tree we imagine that we have a
11915 # fork with a handle and collapsible prongs:
11919 # ------------|-------
11920 # handle |------------
11924 # Each prong has a minimum collapsed length. The collapsed length at a node
11925 # is the maximum of these minimum lengths, plus the handle length. Each of
11926 # the prongs may itself be a tree node.
11928 # This is just a rough calculation to get an approximate starting point for
11929 # indentation. Later routines will be more precise. It is important that
11930 # these estimates be independent of the line breaks of the input stream in
11931 # order to avoid instabilities.
11933 my $rLL = $self->[_rLL_];
11934 my $Klimit = $self->[_Klimit_];
11935 my $rlines = $self->[_rlines_];
11936 my $K_opening_container = $self->[_K_opening_container_];
11937 my $K_closing_container = $self->[_K_closing_container_];
11938 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11939 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
11940 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
11941 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
11942 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
11943 my $rhas_broken_list = $self->[_rhas_broken_list_];
11944 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
11946 my $K_start_multiline_qw;
11947 my $level_start_multiline_qw = 0;
11948 my $max_prong_len = 0;
11949 my $handle_len_x = 0;
11952 my $last_nonblank_type = 'b';
11954 [ $max_prong_len, $handle_len_x, SEQ_ROOT, undef, undef, undef, undef ];
11956 #--------------------------------
11957 # Loop over all lines in the file
11958 #--------------------------------
11960 my $skip_next_line;
11961 foreach my $line_of_tokens ( @{$rlines} ) {
11963 if ($skip_next_line) {
11964 $skip_next_line = 0;
11967 my $line_type = $line_of_tokens->{_line_type};
11968 next if ( $line_type ne 'CODE' );
11969 my $CODE_type = $line_of_tokens->{_code_type};
11971 # Always skip blank lines
11972 next if ( $CODE_type eq 'BL' );
11974 # Note on other line types:
11975 # 'FS' (Format Skipping) lines may contain opening/closing tokens so
11976 # we have to process them to keep the stack correctly sequenced.
11977 # 'VB' (Verbatim) lines could be skipped, but testing shows that
11978 # results look better if we include their lengths.
11980 # Also note that we could exclude -xlp formatting of containers with
11981 # 'FS' and 'VB' lines, but in testing that was not really beneficial.
11983 # So we process tokens in 'FS' and 'VB' lines like all the rest...
11985 my $rK_range = $line_of_tokens->{_rK_range};
11986 my ( $K_first, $K_last ) = @{$rK_range};
11987 next unless ( defined($K_first) && defined($K_last) );
11989 my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
11991 # Always ignore block comments
11992 next if ( $has_comment && $K_first == $K_last );
11994 # Handle an intermediate line of a multiline qw quote. These may
11995 # require including some -ci or -i spaces. See cases c098/x063.
11996 # Updated to check all lines (not just $K_first==$K_last) to fix b1316
11997 my $K_begin_loop = $K_first;
11998 if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
12001 my $level = $rLL->[$KK]->[_LEVEL_];
12002 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
12004 # remember the level of the start
12005 if ( !defined($K_start_multiline_qw) ) {
12006 $K_start_multiline_qw = $K_first;
12007 $level_start_multiline_qw = $level;
12009 $self->[_rstarting_multiline_qw_seqno_by_K_]
12010 ->{$K_start_multiline_qw};
12011 if ( !$seqno_qw ) {
12012 my $Kp = $self->K_previous_nonblank($K_first);
12013 if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
12015 $K_start_multiline_qw = $Kp;
12016 $level_start_multiline_qw =
12017 $rLL->[$K_start_multiline_qw]->[_LEVEL_];
12021 # Fix for b1319, b1320
12022 $K_start_multiline_qw = undef;
12027 if ( defined($K_start_multiline_qw) ) {
12028 $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
12029 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
12031 # We may have to add the spaces of one level or ci level ... it
12032 # depends depends on the -xci flag, the -wn flag, and if the qw
12033 # uses a container token as the quote delimiter.
12035 # First rule: add ci if there is a $ci_level
12037 $len += $rOpts_continuation_indentation;
12040 # Second rule: otherwise, look for an extra indentation level
12041 # from the start and add one indentation level if found.
12042 elsif ( $level > $level_start_multiline_qw ) {
12043 $len += $rOpts_indent_columns;
12046 if ( $len > $max_prong_len ) { $max_prong_len = $len }
12048 $last_nonblank_type = 'q';
12050 $K_begin_loop = $K_first + 1;
12052 # We can skip to the next line if more tokens
12053 next if ( $K_begin_loop > $K_last );
12057 $K_start_multiline_qw = undef;
12059 # Find the terminal token, before any side comment
12060 my $K_terminal = $K_last;
12061 if ($has_comment) {
12064 if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
12065 && $K_terminal > $K_first );
12068 # Use length to terminal comma if interrupted list rule applies
12069 if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
12070 my $K_c = $stack[-1]->[_K_c_];
12071 if ( defined($K_c) ) {
12073 #--------------------------------------------------------------
12074 # BEGIN patch for issue b1408: If this line ends in an opening
12075 # token, look for the closing token and comma at the end of the
12076 # next line. If so, combine the two lines to get the correct
12077 # sums. This problem seems to require -xlp -vtc=2 and blank
12079 #--------------------------------------------------------------
12080 if ( $rLL->[$K_terminal]->[_TYPE_] eq '{' && !$has_comment ) {
12081 my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_];
12082 my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_];
12084 # We are looking for a short broken remnant on the next
12085 # line; something like the third line here (b1408):
12087 # Moose::Util::TypeConstraints::find_type_constraint(
12091 # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user(
12092 # $story_set_all_chores),
12093 if ( defined($Kc_test)
12094 && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_]
12095 && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 )
12097 my $line_of_tokens_next = $rlines->[ $iline + 1 ];
12098 my $rtype_count = $rtype_count_by_seqno->{$seqno_end};
12100 defined($rtype_count) ? $rtype_count->{','} : 0;
12101 my ( $K_first_next, $K_terminal_next ) =
12102 @{ $line_of_tokens_next->{_rK_range} };
12104 # NOTE: Do not try to do this if there is a side comment
12105 # because then the instability does not seem to occur.
12107 defined($K_terminal_next)
12109 # next line ends with a comma
12110 && $rLL->[$K_terminal_next]->[_TYPE_] eq ','
12112 # which follows the closing container token
12114 $K_terminal_next - $Kc_test == 1
12115 || ( $K_terminal_next - $Kc_test == 2
12116 && $rLL->[ $K_terminal_next - 1 ]->[_TYPE_]
12120 # no commas in the container
12121 && ( !defined($rtype_count)
12122 || !$rtype_count->{','} )
12124 # for now, restrict this to a container with just 1
12126 && $K_terminal_next - $K_terminal <= 5
12131 # combine the next line with the current line
12132 $K_terminal = $K_terminal_next;
12133 $skip_next_line = 1;
12134 if (DEBUG_COLLAPSED_LENGTHS) {
12135 print "Combining lines at line $iline\n";
12141 #--------------------------
12142 # END patch for issue b1408
12143 #--------------------------
12146 $rLL->[$K_terminal]->[_TYPE_] eq ','
12148 # Ignore if terminal comma, causes instability (b1297, b1330)
12150 $K_c - $K_terminal > 2
12151 || ( $K_c - $K_terminal == 2
12152 && $rLL->[ $K_terminal + 1 ]->[_TYPE_] ne 'b' )
12157 # changed $len to my $leng to fix b1302 b1306 b1317 b1321
12158 my $leng = $rLL->[$K_terminal]->[_CUMULATIVE_LENGTH_] -
12159 $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
12161 # Fix for b1331: at a broken => item, include the length of
12162 # the previous half of the item plus one for the missing
12164 if ( $last_nonblank_type eq '=>' ) {
12167 if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
12172 #----------------------------------
12173 # Loop over tokens on this line ...
12174 #----------------------------------
12175 foreach my $KK ( $K_begin_loop .. $K_terminal ) {
12177 my $type = $rLL->[$KK]->[_TYPE_];
12178 next if ( $type eq 'b' );
12180 #------------------------
12181 # Handle sequenced tokens
12182 #------------------------
12183 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
12186 my $token = $rLL->[$KK]->[_TOKEN_];
12188 #----------------------------
12189 # Entering a new container...
12190 #----------------------------
12191 if ( $is_opening_token{$token}
12192 && defined( $K_closing_container->{$seqno} ) )
12195 # save current prong length
12196 $stack[-1]->[_max_prong_len_] = $max_prong_len;
12197 $max_prong_len = 0;
12199 # Start new prong one level deeper
12200 my $handle_len = 0;
12201 if ( $rblock_type_of_seqno->{$seqno} ) {
12203 # code blocks do not use -lp indentation, but behave as
12204 # if they had a handle of one indentation length
12205 $handle_len = $rOpts_indent_columns;
12208 elsif ( $is_handle_type{$last_nonblank_type} ) {
12209 $handle_len = $len;
12211 if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
12214 # Set a flag if the 'Interrupted List Rule' will be applied
12215 # (see sub copy_old_breakpoints).
12216 # - Added check on has_broken_list to fix issue b1298
12218 my $interrupted_list_rule =
12219 $ris_permanently_broken->{$seqno}
12220 && $ris_list_by_seqno->{$seqno}
12221 && !$rhas_broken_list->{$seqno}
12222 && !$rOpts_ignore_old_breakpoints;
12224 # NOTES: Since we are looking at old line numbers we have
12225 # to be very careful not to introduce an instability.
12227 # This following causes instability (b1288-b1296):
12228 # $interrupted_list_rule ||=
12229 # $rOpts_break_at_old_comma_breakpoints;
12231 # - We could turn off the interrupted list rule if there is
12232 # a broken sublist, to follow 'Compound List Rule 1'.
12233 # - We could use the _rhas_broken_list_ flag for this.
12234 # - But it seems safer not to do this, to avoid
12235 # instability, since the broken sublist could be
12236 # temporary. It seems better to let the formatting
12237 # stabilize by itself after one or two iterations.
12238 # - So, not doing this for now
12240 # Turn off the interrupted list rule if -vmll is set and a
12241 # list has '=>' characters. This avoids instabilities due
12242 # to dependence on old line breaks; issue b1325.
12243 if ( $interrupted_list_rule
12244 && $rOpts_variable_maximum_line_length )
12246 my $rtype_count = $rtype_count_by_seqno->{$seqno};
12247 if ( $rtype_count && $rtype_count->{'=>'} ) {
12248 $interrupted_list_rule = 0;
12252 # Include length to a comma ending this line
12253 # note: any side comments are handled at loop end (b1332)
12254 if ( $interrupted_list_rule
12255 && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
12257 my $Kend = $K_terminal;
12259 # Measure from the next blank if any (fixes b1301)
12261 if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
12267 my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
12268 $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
12269 if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
12272 my $K_c = $K_closing_container->{$seqno};
12276 $max_prong_len, $handle_len,
12279 $interrupted_list_rule
12283 #--------------------
12284 # Exiting a container
12285 #--------------------
12286 elsif ( $is_closing_token{$token} && @stack ) {
12288 # The current prong ends - get its handle
12289 my $item = pop @stack;
12290 my $handle_len = $item->[_handle_len_];
12291 my $seqno_o = $item->[_seqno_o_];
12292 my $iline_o = $item->[_iline_o_];
12293 my $K_o = $item->[_K_o_];
12294 my $K_c_expect = $item->[_K_c_];
12295 my $collapsed_len = $max_prong_len;
12297 if ( $seqno_o ne $seqno ) {
12299 # This can happen if input file has brace errors.
12300 # Otherwise it shouldn't happen. Not fatal but -lp
12301 # formatting could get messed up.
12302 if ( DEVEL_MODE && !get_saw_brace_error() ) {
12304 sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
12309 #------------------------------------------
12310 # Rules to avoid scrunching code blocks ...
12311 #------------------------------------------
12313 # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
12314 my $block_type = $rblock_type_of_seqno->{$seqno};
12318 my $block_length = MIN_BLOCK_LEN;
12319 my $is_one_line_block;
12320 my $level = $rLL->[$K_o]->[_LEVEL_];
12321 if ( defined($K_o) && defined($K_c) ) {
12323 # note: fixed 3 May 2022 (removed 'my')
12325 $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
12326 $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
12327 $is_one_line_block = $iline == $iline_o;
12330 # Code block rule 1: Use the total block length if
12331 # it is less than the minimum.
12332 if ( $block_length < MIN_BLOCK_LEN ) {
12333 $collapsed_len = $block_length;
12336 # Code block rule 2: Use the full length of a
12337 # one-line block to avoid breaking it, unless
12338 # extremely long. We do not need to do a precise
12339 # check here, because if it breaks then it will
12340 # stay broken on later iterations.
12344 $maximum_line_length_at_level[$level]
12346 # But skip this for sort/map/grep/eval blocks
12347 # because they can reform (b1345)
12348 && !$is_sort_map_grep_eval{$block_type}
12351 $collapsed_len = $block_length;
12354 # Code block rule 3: Otherwise the length should be
12355 # at least MIN_BLOCK_LEN to avoid scrunching code
12357 elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
12358 $collapsed_len = MIN_BLOCK_LEN;
12362 # Store the result. Some extra space, '2', allows for
12363 # length of an opening token, inside space, comma, ...
12364 # This constant has been tuned to give good overall
12366 $collapsed_len += 2;
12367 $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
12369 # Restart scanning the lower level prong
12371 $max_prong_len = $stack[-1]->[_max_prong_len_];
12372 $collapsed_len += $handle_len;
12373 if ( $collapsed_len > $max_prong_len ) {
12374 $max_prong_len = $collapsed_len;
12379 # it is a ternary - no special processing for these yet
12385 $last_nonblank_type = $type;
12389 #----------------------------
12390 # Handle non-container tokens
12391 #----------------------------
12392 my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
12394 # Count lengths of things like 'xx => yy' as a single item
12395 if ( $type eq '=>' ) {
12396 $len += $token_length + 1;
12397 if ( $len > $max_prong_len ) { $max_prong_len = $len }
12399 elsif ( $last_nonblank_type eq '=>' ) {
12400 $len += $token_length;
12401 if ( $len > $max_prong_len ) { $max_prong_len = $len }
12403 # but only include one => per item
12404 $len = $token_length;
12407 # include everything to end of line after a here target
12408 elsif ( $type eq 'h' ) {
12409 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
12410 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
12411 if ( $len > $max_prong_len ) { $max_prong_len = $len }
12414 # for everything else just use the token length
12416 $len = $token_length;
12417 if ( $len > $max_prong_len ) { $max_prong_len = $len }
12419 $last_nonblank_type = $type;
12421 } ## end loop over tokens on this line
12423 # Now take care of any side comment;
12424 if ($has_comment) {
12425 if ($rOpts_ignore_side_comment_lengths) {
12430 # For a side comment when -iscl is not set, measure length from
12431 # the start of the previous nonblank token
12434 ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
12436 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
12437 if ( $len > $max_prong_len ) { $max_prong_len = $len }
12441 } ## end loop over lines
12443 if (DEBUG_COLLAPSED_LENGTHS) {
12444 print "\nCollapsed lengths--\n";
12446 my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
12448 my $clen = $rcollapsed_length_by_seqno->{$key};
12449 print "$key -> $clen\n";
12454 } ## end sub xlp_collapsed_lengths
12456 sub is_excluded_lp {
12458 # Decide if this container is excluded by user request:
12459 # returns true if this token is excluded (i.e., may not use -lp)
12460 # returns false otherwise
12462 # The control hash can either describe:
12463 # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
12464 # what to include: $line_up_parentheses_control_is_lxpl = 0
12467 # $KK = index of the container opening token
12469 my ( $self, $KK ) = @_;
12470 my $rLL = $self->[_rLL_];
12471 my $rtoken_vars = $rLL->[$KK];
12472 my $token = $rtoken_vars->[_TOKEN_];
12473 my $rflags = $line_up_parentheses_control_hash{$token};
12475 #-----------------------------------------------
12476 # TEST #1: check match to listed container types
12477 #-----------------------------------------------
12478 if ( !defined($rflags) ) {
12480 # There is no entry for this container, so we are done
12481 return !$line_up_parentheses_control_is_lxpl;
12484 my ( $flag1, $flag2 ) = @{$rflags};
12486 #-----------------------------------------------------------
12487 # TEST #2: check match to flag1, the preceding nonblank word
12488 #-----------------------------------------------------------
12489 my $match_flag1 = !defined($flag1) || $flag1 eq '*';
12490 if ( !$match_flag1 ) {
12492 # Find the previous token
12493 my ( $is_f, $is_k, $is_w );
12494 my $Kp = $self->K_previous_nonblank($KK);
12495 if ( defined($Kp) ) {
12496 my $type_p = $rLL->[$Kp]->[_TYPE_];
12497 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
12500 $is_k = $type_p eq 'k';
12503 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
12505 # either keyword or function call?
12506 $is_w = $is_k || $is_f;
12509 # Check for match based on flag1 and the previous token:
12510 if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
12511 elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
12512 elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
12513 elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
12514 elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
12515 elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
12516 ## else { no match found }
12519 # See if we can exclude this based on the flag1 test...
12520 if ($line_up_parentheses_control_is_lxpl) {
12521 return 1 if ($match_flag1);
12524 return 1 if ( !$match_flag1 );
12527 #-------------------------------------------------------------
12528 # TEST #3: exclusion based on flag2 and the container contents
12529 #-------------------------------------------------------------
12531 # Note that this is an exclusion test for both -lpxl or -lpil input methods
12533 # 0 or blank: ignore container contents
12534 # 1 exclude non-lists or lists with sublists
12535 # 2 same as 1 but also exclude lists with code blocks
12540 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
12542 my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
12543 my $has_list = $self->[_rhas_list_]->{$seqno};
12544 my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
12545 my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
12549 || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
12554 return $match_flag2;
12555 } ## end sub is_excluded_lp
12557 sub set_excluded_lp_containers {
12560 return unless ($rOpts_line_up_parentheses);
12561 my $rLL = $self->[_rLL_];
12562 return unless ( defined($rLL) && @{$rLL} );
12564 my $K_opening_container = $self->[_K_opening_container_];
12565 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
12566 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12568 foreach my $seqno ( keys %{$K_opening_container} ) {
12570 # code blocks are always excluded by the -lp coding so we can skip them
12571 next if ( $rblock_type_of_seqno->{$seqno} );
12573 my $KK = $K_opening_container->{$seqno};
12574 next unless defined($KK);
12576 # see if a user exclusion rule turns off -lp for this container
12577 if ( $self->is_excluded_lp($KK) ) {
12578 $ris_excluded_lp_container->{$seqno} = 1;
12582 } ## end sub set_excluded_lp_containers
12584 ######################################
12585 # CODE SECTION 6: Process line-by-line
12586 ######################################
12588 sub process_all_lines {
12590 #----------------------------------------------------------
12591 # Main loop to format all lines of a file according to type
12592 #----------------------------------------------------------
12595 my $rlines = $self->[_rlines_];
12596 my $sink_object = $self->[_sink_object_];
12597 my $fh_tee = $self->[_fh_tee_];
12598 my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
12599 my $file_writer_object = $self->[_file_writer_object_];
12600 my $logger_object = $self->[_logger_object_];
12601 my $vertical_aligner_object = $self->[_vertical_aligner_object_];
12602 my $save_logfile = $self->[_save_logfile_];
12604 # Flag to prevent blank lines when POD occurs in a format skipping sect.
12605 my $in_format_skipping_section;
12607 # set locations for blanks around long runs of keywords
12608 my $rwant_blank_line_after = $self->keyword_group_scan();
12610 my $line_type = EMPTY_STRING;
12611 my $i_last_POD_END = -10;
12613 foreach my $line_of_tokens ( @{$rlines} ) {
12615 # insert blank lines requested for keyword sequences
12616 if ( defined( $rwant_blank_line_after->{$i} )
12617 && $rwant_blank_line_after->{$i} == 1 )
12619 $self->want_blank_line();
12624 my $last_line_type = $line_type;
12625 $line_type = $line_of_tokens->{_line_type};
12626 my $input_line = $line_of_tokens->{_line_text};
12628 # _line_type codes are:
12629 # SYSTEM - system-specific code before hash-bang line
12630 # CODE - line of perl code (including comments)
12631 # POD_START - line starting pod, such as '=head'
12632 # POD - pod documentation text
12633 # POD_END - last line of pod section, '=cut'
12634 # HERE - text of here-document
12635 # HERE_END - last line of here-doc (target word)
12636 # FORMAT - format section
12637 # FORMAT_END - last line of format section, '.'
12638 # SKIP - code skipping section
12639 # SKIP_END - last line of code skipping section, '#>>V'
12640 # DATA_START - __DATA__ line
12641 # DATA - unidentified text following __DATA__
12642 # END_START - __END__ line
12643 # END - unidentified text following __END__
12644 # ERROR - we are in big trouble, probably not a perl script
12646 # put a blank line after an =cut which comes before __END__ and __DATA__
12647 # (required by podchecker)
12648 if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
12649 $i_last_POD_END = $i;
12650 $file_writer_object->reset_consecutive_blank_lines();
12651 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
12652 $self->want_blank_line();
12656 # handle line of code..
12657 if ( $line_type eq 'CODE' ) {
12659 my $CODE_type = $line_of_tokens->{_code_type};
12660 $in_format_skipping_section = $CODE_type eq 'FS';
12662 # Handle blank lines
12663 if ( $CODE_type eq 'BL' ) {
12665 # Keep this blank? Start with the flag -kbl=n, where
12666 # n=0 ignore all old blank lines
12667 # n=1 stable: keep old blanks, but limited by -mbl=n
12668 # n=2 keep all old blank lines, regardless of -mbl=n
12669 # If n=0 we delete all old blank lines and let blank line
12670 # rules generate any needed blank lines.
12671 my $kgb_keep = $rOpts_keep_old_blank_lines;
12673 # Then delete lines requested by the keyword-group logic if
12675 if ( $kgb_keep == 1
12676 && defined( $rwant_blank_line_after->{$i} )
12677 && $rwant_blank_line_after->{$i} == 2 )
12682 # But always keep a blank line following an =cut
12683 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
12688 $self->flush($CODE_type);
12689 $file_writer_object->write_blank_code_line(
12690 $rOpts_keep_old_blank_lines == 2 );
12691 $self->[_last_line_leading_type_] = 'b';
12697 # Let logger see all non-blank lines of code. This is a slow
12698 # operation so we avoid it if it is not going to be saved.
12699 if ( $save_logfile && $logger_object ) {
12700 $logger_object->black_box( $line_of_tokens,
12701 $vertical_aligner_object->get_output_line_number );
12705 # Handle Format Skipping (FS) and Verbatim (VB) Lines
12706 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
12707 $self->write_unindented_line("$input_line");
12708 $file_writer_object->reset_consecutive_blank_lines();
12712 # Handle all other lines of code
12713 $self->process_line_of_CODE($line_of_tokens);
12716 # handle line of non-code..
12719 # set special flags
12721 if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
12723 # Pod docs should have a preceding blank line. But stay
12724 # out of __END__ and __DATA__ sections, because
12725 # the user may be using this section for any purpose whatsoever
12726 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
12727 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
12729 && !$in_format_skipping_section
12730 && $line_type eq 'POD_START'
12731 && !$self->[_saw_END_or_DATA_] )
12733 $self->want_blank_line();
12737 # leave the blank counters in a predictable state
12738 # after __END__ or __DATA__
12739 elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
12740 $file_writer_object->reset_consecutive_blank_lines();
12741 $self->[_saw_END_or_DATA_] = 1;
12744 # Patch to avoid losing blank lines after a code-skipping block;
12746 elsif ( $line_type eq 'SKIP_END' ) {
12747 $file_writer_object->reset_consecutive_blank_lines();
12750 # write unindented non-code line
12751 if ( !$skip_line ) {
12752 $self->write_unindented_line($input_line);
12758 } ## end sub process_all_lines
12760 sub keyword_group_scan {
12763 #-------------------------------------------------------------------------
12764 # Called once per file to process any --keyword-group-blanks-* parameters.
12765 #-------------------------------------------------------------------------
12767 # Manipulate blank lines around keyword groups (kgb* flags)
12768 # Scan all lines looking for runs of consecutive lines beginning with
12769 # selected keywords. Example keywords are 'my', 'our', 'local', ... but
12770 # they may be anything. We will set flags requesting that blanks be
12771 # inserted around and within them according to input parameters. Note
12772 # that we are scanning the lines as they came in in the input stream, so
12773 # they are not necessarily well formatted.
12775 # The output of this sub is a return hash ref whose keys are the indexes of
12776 # lines after which we desire a blank line. For line index i:
12777 # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
12778 # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
12779 my $rhash_of_desires = {};
12781 # Nothing to do if no blanks can be output. This test added to fix
12783 if ( !$rOpts_maximum_consecutive_blank_lines ) {
12784 return $rhash_of_desires;
12787 my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
12788 my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
12789 my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
12790 my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
12791 my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
12793 # A range of sizes can be input with decimal notation like 'min.max' with
12794 # any number of dots between the two numbers. Examples:
12795 # string => min max matches
12796 # 1.1 1 1 exactly 1
12797 # 1.3 1 3 1,2, or 3
12798 # 1..3 1 3 1,2, or 3
12803 my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
12804 if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
12805 || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
12808 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
12809 ignoring all -kgb flags
12812 # Turn this option off so that this message does not keep repeating
12813 # during iterations and other files.
12814 $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
12815 return $rhash_of_desires;
12817 $Opt_size_min = 1 unless ($Opt_size_min);
12819 if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
12820 return $rhash_of_desires;
12823 # codes for $Opt_blanks_before and $Opt_blanks_after:
12824 # 0 = never (delete if exist)
12825 # 1 = stable (keep unchanged)
12826 # 2 = always (insert if missing)
12828 return $rhash_of_desires
12829 unless $Opt_size_min > 0
12830 && ( $Opt_blanks_before != 1
12831 || $Opt_blanks_after != 1
12832 || $Opt_blanks_inside
12833 || $Opt_blanks_delete );
12835 my $Opt_pattern = $keyword_group_list_pattern;
12836 my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
12837 my $Opt_repeat_count =
12838 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
12840 my $rlines = $self->[_rlines_];
12841 my $rLL = $self->[_rLL_];
12842 my $K_closing_container = $self->[_K_closing_container_];
12843 my $K_opening_container = $self->[_K_opening_container_];
12844 my $rK_weld_right = $self->[_rK_weld_right_];
12846 # variables for the current group and subgroups:
12847 my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
12851 # ($ibeg, $iend) = starting and ending line indexes of this entire group
12852 # $count = total number of keywords seen in this entire group
12853 # $level_beg = indentation level of this group
12854 # @group = [ $i, $token, $count ] =list of all keywords & blanks
12855 # @subgroup = $j, index of group where token changes
12856 # @iblanks = line indexes of blank lines in input stream in this group
12857 # where i=starting line index
12858 # token (the keyword)
12859 # count = number of this token in this subgroup
12860 # j = index in group where token changes
12862 # These vars will contain values for the most recently seen line:
12863 my ( $line_type, $CODE_type, $K_first, $K_last );
12865 my $number_of_groups_seen = 0;
12867 #-------------------
12868 # helper subroutines
12869 #-------------------
12871 my $insert_blank_after = sub {
12873 $rhash_of_desires->{$i} = 1;
12875 if ( defined( $rhash_of_desires->{$ip} )
12876 && $rhash_of_desires->{$ip} == 2 )
12878 $rhash_of_desires->{$ip} = 0;
12883 my $split_into_sub_groups = sub {
12885 # place blanks around long sub-groups of keywords
12887 return unless ($Opt_blanks_inside);
12889 # loop over sub-groups, index k
12890 push @subgroup, scalar @group;
12892 my $kend = @subgroup - 1;
12893 foreach my $k ( $kbeg .. $kend ) {
12895 # index j runs through all keywords found
12896 my $j_b = $subgroup[ $k - 1 ];
12897 my $j_e = $subgroup[$k] - 1;
12899 # index i is the actual line number of a keyword
12900 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
12901 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
12902 my $num = $count_e - $count_b + 1;
12904 # This subgroup runs from line $ib to line $ie-1, but may contain
12906 if ( $num >= $Opt_size_min ) {
12908 # if there are blank lines, we require that at least $num lines
12909 # be non-blank up to the boundary with the next subgroup.
12910 my $nog_b = my $nog_e = 1;
12911 if ( @iblanks && !$Opt_blanks_delete ) {
12912 my $j_bb = $j_b + $num - 1;
12913 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
12914 $nog_b = $count_bb - $count_b + 1 == $num;
12916 my $j_ee = $j_e - ( $num - 1 );
12917 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
12918 $nog_e = $count_e - $count_ee + 1 == $num;
12920 if ( $nog_b && $k > $kbeg ) {
12921 $insert_blank_after->( $i_b - 1 );
12923 if ( $nog_e && $k < $kend ) {
12924 my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
12925 $insert_blank_after->( $i_ep - 1 );
12932 my $delete_if_blank = sub {
12935 # delete line $i if it is blank
12936 return unless ( $i >= 0 && $i < @{$rlines} );
12937 return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
12938 my $code_type = $rlines->[$i]->{_code_type};
12939 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
12943 my $delete_inner_blank_lines = sub {
12945 # always remove unwanted trailing blank lines from our list
12946 return unless (@iblanks);
12947 while ( my $ibl = pop(@iblanks) ) {
12948 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
12952 # now mark mark interior blank lines for deletion if requested
12953 return unless ($Opt_blanks_delete);
12955 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
12960 my $end_group = sub {
12962 # end a group of keywords
12963 my ($bad_ending) = @_;
12964 if ( defined($ibeg) && $ibeg >= 0 ) {
12966 # then handle sufficiently large groups
12967 if ( $count >= $Opt_size_min ) {
12969 $number_of_groups_seen++;
12971 # do any blank deletions regardless of the count
12972 $delete_inner_blank_lines->();
12975 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
12977 # patch for hash bang line which is not currently marked as
12978 # a comment; mark it as a comment
12979 if ( $ibeg == 1 && !$code_type ) {
12980 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
12982 if ( $line_text && $line_text =~ /^#/ );
12985 # Do not insert a blank after a comment
12986 # (this could be subject to a flag in the future)
12987 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
12988 if ( $Opt_blanks_before == INSERT ) {
12989 $insert_blank_after->( $ibeg - 1 );
12992 elsif ( $Opt_blanks_before == DELETE ) {
12993 $delete_if_blank->( $ibeg - 1 );
12998 # We will only put blanks before code lines. We could loosen
12999 # this rule a little, but we have to be very careful because
13000 # for example we certainly don't want to drop a blank line
13001 # after a line like this:
13003 if ( $line_type eq 'CODE' && defined($K_first) ) {
13005 # - Do not put a blank before a line of different level
13006 # - Do not put a blank line if we ended the search badly
13007 # - Do not put a blank at the end of the file
13008 # - Do not put a blank line before a hanging side comment
13009 my $level = $rLL->[$K_first]->[_LEVEL_];
13010 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
13012 if ( $level == $level_beg
13015 && $iend < @{$rlines}
13016 && $CODE_type ne 'HSC' )
13018 if ( $Opt_blanks_after == INSERT ) {
13019 $insert_blank_after->($iend);
13021 elsif ( $Opt_blanks_after == DELETE ) {
13022 $delete_if_blank->( $iend + 1 );
13027 $split_into_sub_groups->();
13030 # reset for another group
13034 $K_closing = undef;
13042 my $find_container_end = sub {
13044 # If the keyword line is continued onto subsequent lines, find the
13045 # closing token '$K_closing' so that we can easily skip past the
13046 # contents of the container.
13048 # We only set this value if we find a simple list, meaning
13049 # -contents only one level deep
13052 # First check: skip if next line is not one deeper
13053 my $Knext_nonblank = $self->K_next_nonblank($K_last);
13054 return if ( !defined($Knext_nonblank) );
13055 my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
13056 return if ( $level_next != $level_beg + 1 );
13058 # Find the parent container of the first token on the next line
13059 my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
13060 return unless ( defined($parent_seqno) );
13062 # Must not be a weld (can be unstable)
13064 if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
13066 # Opening container must exist and be on this line
13067 my $Ko = $K_opening_container->{$parent_seqno};
13068 return unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
13070 # Verify that the closing container exists and is on a later line
13071 my $Kc = $K_closing_container->{$parent_seqno};
13072 return unless ( defined($Kc) && $Kc > $K_last );
13080 my $add_to_group = sub {
13081 my ( $i, $token, $level ) = @_;
13083 # End the previous group if we have reached the maximum
13085 if ( $Opt_size_max && @group >= $Opt_size_max ) {
13089 if ( @group == 0 ) {
13091 $level_beg = $level;
13099 if ( !@group || $token ne $group[-1]->[1] ) {
13100 push @subgroup, scalar(@group);
13102 push @group, [ $i, $token, $count ];
13104 # remember if this line ends in an open container
13105 $find_container_end->();
13110 #----------------------------------
13111 # loop over all lines of the source
13112 #----------------------------------
13115 foreach my $line_of_tokens ( @{$rlines} ) {
13119 if ( $Opt_repeat_count > 0
13120 && $number_of_groups_seen >= $Opt_repeat_count );
13122 $CODE_type = EMPTY_STRING;
13125 $line_type = $line_of_tokens->{_line_type};
13127 # always end a group at non-CODE
13128 if ( $line_type ne 'CODE' ) { $end_group->(); next }
13130 $CODE_type = $line_of_tokens->{_code_type};
13132 # end any group at a format skipping line
13133 if ( $CODE_type && $CODE_type eq 'FS' ) {
13138 # continue in a verbatim (VB) type; it may be quoted text
13139 if ( $CODE_type eq 'VB' ) {
13140 if ( $ibeg >= 0 ) { $iend = $i; }
13144 # and continue in blank (BL) types
13145 if ( $CODE_type eq 'BL' ) {
13146 if ( $ibeg >= 0 ) {
13148 push @{iblanks}, $i;
13150 # propagate current subgroup token
13151 my $tok = $group[-1]->[1];
13152 push @group, [ $i, $tok, $count ];
13157 # examine the first token of this line
13158 my $rK_range = $line_of_tokens->{_rK_range};
13159 ( $K_first, $K_last ) = @{$rK_range};
13160 if ( !defined($K_first) ) {
13162 # Somewhat unexpected blank line..
13163 # $rK_range is normally defined for line type CODE, but this can
13164 # happen for example if the input line was a single semicolon which
13165 # is being deleted. In that case there was code in the input
13166 # file but it is not being retained. So we can silently return.
13167 return $rhash_of_desires;
13170 my $level = $rLL->[$K_first]->[_LEVEL_];
13171 my $type = $rLL->[$K_first]->[_TYPE_];
13172 my $token = $rLL->[$K_first]->[_TOKEN_];
13173 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
13175 # End a group 'badly' at an unexpected level. This will prevent
13176 # blank lines being incorrectly placed after the end of the group.
13177 # We are looking for any deviation from two acceptable patterns:
13178 # PATTERN 1: a simple list; secondary lines are at level+1
13179 # PATTERN 2: a long statement; all secondary lines same level
13180 # This was added as a fix for case b1177, in which a complex structure
13181 # got incorrectly inserted blank lines.
13182 if ( $ibeg >= 0 ) {
13184 # Check for deviation from PATTERN 1, simple list:
13185 if ( defined($K_closing) && $K_first < $K_closing ) {
13186 $end_group->(1) if ( $level != $level_beg + 1 );
13189 # Check for deviation from PATTERN 2, single statement:
13190 elsif ( $level != $level_beg ) { $end_group->(1) }
13193 # Do not look for keywords in lists ( keyword 'my' can occur in lists,
13194 # see case b760); fixed for c048.
13195 if ( $self->is_list_by_K($K_first) ) {
13196 if ( $ibeg >= 0 ) { $iend = $i }
13200 # see if this is a code type we seek (i.e. comment)
13202 && $Opt_comment_pattern
13203 && $CODE_type =~ /$Opt_comment_pattern/ )
13206 my $tok = $CODE_type;
13208 # Continuing a group
13209 if ( $ibeg >= 0 && $level == $level_beg ) {
13210 $add_to_group->( $i, $tok, $level );
13216 # first end old group if any; we might be starting new
13217 # keywords at different level
13218 if ( $ibeg >= 0 ) { $end_group->(); }
13219 $add_to_group->( $i, $tok, $level );
13224 # See if it is a keyword we seek, but never start a group in a
13225 # continuation line; the code may be badly formatted.
13226 if ( $ci_level == 0
13228 && $token =~ /$Opt_pattern/ )
13231 # Continuing a keyword group
13232 if ( $ibeg >= 0 && $level == $level_beg ) {
13233 $add_to_group->( $i, $token, $level );
13236 # Start new keyword group
13239 # first end old group if any; we might be starting new
13240 # keywords at different level
13241 if ( $ibeg >= 0 ) { $end_group->(); }
13242 $add_to_group->( $i, $token, $level );
13247 # This is not one of our keywords, but we are in a keyword group
13248 # so see if we should continue or quit
13249 elsif ( $ibeg >= 0 ) {
13251 # - bail out on a large level change; we may have walked into a
13252 # data structure or anonymous sub code.
13253 if ( $level > $level_beg + 1 || $level < $level_beg ) {
13258 # - keep going on a continuation line of the same level, since
13259 # it is probably a continuation of our previous keyword,
13260 # - and keep going past hanging side comments because we never
13261 # want to interrupt them.
13262 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
13263 || $CODE_type eq 'HSC' )
13269 # - continue if if we are within in a container which started with
13270 # the line of the previous keyword.
13271 if ( defined($K_closing) && $K_first <= $K_closing ) {
13273 # continue if entire line is within container
13274 if ( $K_last <= $K_closing ) { $iend = $i; next }
13276 # continue at ); or }; or ];
13277 my $KK = $K_closing + 1;
13278 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
13279 if ( $KK < $K_last ) {
13280 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
13281 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
13294 # - end the group if none of the above
13299 # not in a keyword group; continue
13303 # end of loop over all lines
13305 return $rhash_of_desires;
13307 } ## end sub keyword_group_scan
13309 #######################################
13310 # CODE SECTION 7: Process lines of code
13311 #######################################
13313 { ## begin closure process_line_of_CODE
13315 # The routines in this closure receive lines of code and combine them into
13316 # 'batches' and send them along. A 'batch' is the unit of code which can be
13317 # processed further as a unit. It has the property that it is the largest
13318 # amount of code into which which perltidy is free to place one or more
13319 # line breaks within it without violating any constraints.
13321 # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
13323 # flags needed by the store routine
13324 my $line_of_tokens;
13325 my $no_internal_newlines;
13328 # range of K of tokens for the current line
13329 my ( $K_first, $K_last );
13331 my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
13332 $rblock_type_of_seqno, $ri_starting_one_line_block );
13334 # past stored nonblank tokens and flags
13336 $K_last_nonblank_code, $looking_for_else,
13337 $is_static_block_comment, $last_CODE_type,
13338 $last_line_had_side_comment, $next_parent_seqno,
13342 # Called once at the start of a new file
13343 sub initialize_process_line_of_CODE {
13344 $K_last_nonblank_code = undef;
13345 $looking_for_else = 0;
13346 $is_static_block_comment = 0;
13347 $last_line_had_side_comment = 0;
13348 $next_parent_seqno = SEQ_ROOT;
13349 $next_slevel = undef;
13353 # Batch variables: these describe the current batch of code being formed
13354 # and sent down the pipeline. They are initialized in the next
13357 $rbrace_follower, $index_start_one_line_block,
13358 $starting_in_quote, $ending_in_quote,
13361 # Called before the start of each new batch
13362 sub initialize_batch_variables {
13364 $max_index_to_go = UNDEFINED_INDEX;
13365 $summed_lengths_to_go[0] = 0;
13366 $nesting_depth_to_go[0] = 0;
13367 $ri_starting_one_line_block = [];
13369 # The initialization code for the remaining batch arrays is as follows
13370 # and can be activated for testing. But profiling shows that it is
13371 # time-consuming to re-initialize the batch arrays and is not necessary
13372 # because the maximum valid token, $max_index_to_go, is carefully
13373 # controlled. This means however that it is not possible to do any
13374 # type of filter or map operation directly on these arrays. And it is
13375 # not possible to use negative indexes. As a precaution against program
13376 # changes which might do this, sub pad_array_to_go adds some undefs at
13377 # the end of the current batch of data.
13379 # So 'long story short': this is a waste of time
13381 @block_type_to_go = ();
13382 @type_sequence_to_go = ();
13383 @forced_breakpoint_to_go = ();
13384 @token_lengths_to_go = ();
13385 @levels_to_go = ();
13386 @mate_index_to_go = ();
13387 @ci_levels_to_go = ();
13388 @nobreak_to_go = ();
13389 @old_breakpoint_to_go = ();
13390 @tokens_to_go = ();
13393 @leading_spaces_to_go = ();
13394 @reduced_spaces_to_go = ();
13397 @parent_seqno_to_go = ();
13400 $rbrace_follower = undef;
13401 $ending_in_quote = 0;
13403 $index_start_one_line_block = undef;
13405 # initialize forced breakpoint vars associated with each output batch
13406 $forced_breakpoint_count = 0;
13407 $index_max_forced_break = UNDEFINED_INDEX;
13408 $forced_breakpoint_undo_count = 0;
13411 } ## end sub initialize_batch_variables
13413 sub leading_spaces_to_go {
13415 # return the number of indentation spaces for a token in the output
13419 return 0 if ( $ii < 0 );
13420 my $indentation = $leading_spaces_to_go[$ii];
13421 return ref($indentation) ? $indentation->get_spaces() : $indentation;
13422 } ## end sub leading_spaces_to_go
13424 sub create_one_line_block {
13426 # set index starting next one-line block
13427 # call with no args to delete the current one-line block
13428 ($index_start_one_line_block) = @_;
13432 # Routine to place the current token into the output stream.
13433 # Called once per output token.
13435 use constant DEBUG_STORE => 0;
13437 sub store_token_to_go {
13439 my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
13441 #-------------------------------------------------------
13442 # Token storage utility for sub process_line_of_CODE.
13443 # Add one token to the next batch of '_to_go' variables.
13444 #-------------------------------------------------------
13446 # Input parameters:
13447 # $Ktoken_vars = the index K in the global token array
13448 # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
13449 # unless they are temporarily being overridden
13451 # NOTE: called once per token so coding efficiency is critical here
13462 ) = @{$rtoken_vars}[
13473 # Check for emergency flush...
13474 # The K indexes in the batch must always be a continuous sequence of
13475 # the global token array. The batch process programming assumes this.
13476 # If storing this token would cause this relation to fail we must dump
13477 # the current batch before storing the new token. It is extremely rare
13478 # for this to happen. One known example is the following two-line
13479 # snippet when run with parameters
13480 # --noadd-newlines --space-terminal-semicolon:
13481 # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
13483 if ( $max_index_to_go >= 0 ) {
13484 if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
13485 $self->flush_batch_of_CODE();
13488 # Do not output consecutive blank tokens ... this should not
13489 # happen, but it is worth checking. Later code can then make the
13490 # simplifying assumption that blank tokens are not consecutive.
13491 elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
13495 # if this happens, it is may be that consecutive blanks
13496 # were inserted into the token stream in 'respace_tokens'
13497 my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
13498 Fault("consecutive blanks near line $lno; please fix");
13504 # Do not start a batch with a blank token.
13505 # Fixes cases b149 b888 b984 b985 b986 b987
13507 if ( $type eq 'b' ) { return }
13510 # Clip levels to zero if there are level errors in the file.
13511 # We had to wait until now for reasons explained in sub 'write_line'.
13512 if ( $level < 0 ) { $level = 0 }
13514 # Safety check that length is defined. Should not be needed now.
13515 # Former patch for indent-only, in which the entire set of tokens is
13516 # turned into type 'q'. Lengths may have not been defined because sub
13517 # 'respace_tokens' is bypassed. We do not need lengths in this case,
13518 # but we will use the character count to have a defined value. In the
13519 # future, it would be nicer to have 'respace_tokens' convert the lines
13520 # to quotes and get correct lengths.
13521 if ( !defined($length) ) { $length = length($token) }
13523 #----------------------------
13524 # add this token to the batch
13525 #----------------------------
13526 $K_to_go[ ++$max_index_to_go ] = $Ktoken_vars;
13527 $types_to_go[$max_index_to_go] = $type;
13528 $old_breakpoint_to_go[$max_index_to_go] = 0;
13529 $forced_breakpoint_to_go[$max_index_to_go] = 0;
13530 $mate_index_to_go[$max_index_to_go] = -1;
13531 $tokens_to_go[$max_index_to_go] = $token;
13532 $ci_levels_to_go[$max_index_to_go] = $ci_level;
13533 $levels_to_go[$max_index_to_go] = $level;
13534 $type_sequence_to_go[$max_index_to_go] = $seqno;
13535 $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
13536 $token_lengths_to_go[$max_index_to_go] = $length;
13538 # We keep a running sum of token lengths from the start of this batch:
13539 # summed_lengths_to_go[$i] = total length to just before token $i
13540 # summed_lengths_to_go[$i+1] = total length to just after token $i
13541 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
13542 $summed_lengths_to_go[$max_index_to_go] + $length;
13544 # Initializations for first token of new batch
13545 if ( !$max_index_to_go ) {
13547 # Reset flag '$starting_in_quote' for a new batch. It must be set
13548 # to the value of '$in_continued_quote', but here for efficiency we
13549 # set it to zero, which is its normal value. Then in coding below
13550 # we will change it if we find we are actually in a continued quote.
13551 $starting_in_quote = 0;
13553 # Update the next parent sequence number for each new batch.
13555 #----------------------------------------
13556 # Begin coding from sub parent_seqno_by_K
13557 #----------------------------------------
13559 # The following is equivalent to this call but much faster:
13560 # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
13562 $next_parent_seqno = SEQ_ROOT;
13564 $next_parent_seqno = $rparent_of_seqno->{$seqno};
13567 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
13568 if ( defined($Kt) ) {
13569 my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
13570 my $type_t = $rLL->[$Kt]->[_TYPE_];
13572 # if next container token is closing, it is the parent seqno
13573 if ( $is_closing_type{$type_t} ) {
13574 $next_parent_seqno = $type_sequence_t;
13577 # otherwise we want its parent container
13579 $next_parent_seqno =
13580 $rparent_of_seqno->{$type_sequence_t};
13584 $next_parent_seqno = SEQ_ROOT
13585 unless ( defined($next_parent_seqno) );
13587 #--------------------------------------
13588 # End coding from sub parent_seqno_by_K
13589 #--------------------------------------
13591 $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
13594 # Initialize some sequence-dependent variables to their normal values
13595 $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
13596 $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
13597 $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
13599 # Then fix them at container tokens:
13602 $block_type_to_go[$max_index_to_go] =
13603 $rblock_type_of_seqno->{$seqno}
13604 if ( $rblock_type_of_seqno->{$seqno} );
13606 if ( $is_opening_token{$token} ) {
13608 my $slevel = $rdepth_of_opening_seqno->[$seqno];
13609 $nesting_depth_to_go[$max_index_to_go] = $slevel;
13610 $next_slevel = $slevel + 1;
13612 $next_parent_seqno = $seqno;
13615 elsif ( $is_closing_token{$token} ) {
13617 $next_slevel = $rdepth_of_opening_seqno->[$seqno];
13618 my $slevel = $next_slevel + 1;
13619 $nesting_depth_to_go[$max_index_to_go] = $slevel;
13621 my $parent_seqno = $rparent_of_seqno->{$seqno};
13622 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
13623 $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
13624 $next_parent_seqno = $parent_seqno;
13628 # ternary token: nothing to do
13632 # Define the indentation that this token will have in two cases:
13633 # Without CI = reduced_spaces_to_go
13634 # With CI = leading_spaces_to_go
13635 if ( ( $Ktoken_vars == $K_first )
13636 && $line_of_tokens->{_starting_in_quote} )
13638 # in a continued quote - correct value set above if first token
13639 if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 }
13641 $leading_spaces_to_go[$max_index_to_go] = 0;
13642 $reduced_spaces_to_go[$max_index_to_go] = 0;
13645 $leading_spaces_to_go[$max_index_to_go] =
13646 $reduced_spaces_to_go[$max_index_to_go] =
13647 $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
13649 $leading_spaces_to_go[$max_index_to_go] +=
13650 $rOpts_continuation_indentation * $ci_level
13654 DEBUG_STORE && do {
13655 my ( $a, $b, $c ) = caller();
13657 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
13660 } ## end sub store_token_to_go
13662 sub flush_batch_of_CODE {
13664 # Finish and process the current batch.
13665 # This must be the only call to grind_batch_of_CODE()
13668 # If a batch has been started ...
13669 if ( $max_index_to_go >= 0 ) {
13671 # Create an array to hold variables for this batch
13672 my $this_batch = [];
13674 $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
13675 $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
13677 if ( $CODE_type || $last_CODE_type ) {
13678 $this_batch->[_batch_CODE_type_] =
13679 $K_to_go[$max_index_to_go] >= $K_first
13684 $last_line_had_side_comment =
13685 ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
13687 # The flag $is_static_block_comment applies to the line which just
13688 # arrived. So it only applies if we are outputting that line.
13689 if ( $is_static_block_comment && !$last_line_had_side_comment ) {
13690 $this_batch->[_is_static_block_comment_] =
13691 $K_to_go[0] == $K_first;
13694 $this_batch->[_ri_starting_one_line_block_] =
13695 $ri_starting_one_line_block;
13697 $self->[_this_batch_] = $this_batch;
13699 #-------------------
13700 # process this batch
13701 #-------------------
13702 $self->grind_batch_of_CODE();
13704 # Done .. this batch is history
13705 $self->[_this_batch_] = undef;
13707 initialize_batch_variables();
13711 } ## end sub flush_batch_of_CODE
13715 # End the current batch, EXCEPT for a few special cases
13718 if ( $max_index_to_go < 0 ) {
13720 # nothing to do .. this is harmless but wastes time.
13722 Fault("sub end_batch called with nothing to do; please fix\n");
13727 # Exceptions when a line does not end with a comment... (fixes c058)
13728 if ( $types_to_go[$max_index_to_go] ne '#' ) {
13730 # Exception 1: Do not end line in a weld
13732 if ( $total_weld_count
13733 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
13735 # Exception 2: just set a tentative breakpoint if we might be in a
13737 if ( defined($index_start_one_line_block) ) {
13738 $self->set_forced_breakpoint($max_index_to_go);
13743 $self->flush_batch_of_CODE();
13745 } ## end sub end_batch
13747 sub flush_vertical_aligner {
13749 my $vao = $self->[_vertical_aligner_object_];
13754 # flush is called to output any tokens in the pipeline, so that
13755 # an alternate source of lines can be written in the correct order
13757 my ( $self, $CODE_type_flush ) = @_;
13759 # end the current batch with 1 exception
13761 $index_start_one_line_block = undef;
13763 # Exception: if we are flushing within the code stream only to insert
13764 # blank line(s), then we can keep the batch intact at a weld. This
13765 # improves formatting of -ce. See test 'ce1.ce'
13766 if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
13767 $self->end_batch() if ( $max_index_to_go >= 0 );
13770 # otherwise, we have to shut things down completely.
13771 else { $self->flush_batch_of_CODE() }
13773 $self->flush_vertical_aligner();
13777 sub process_line_of_CODE {
13779 my ( $self, $my_line_of_tokens ) = @_;
13781 #----------------------------------------------------------------
13782 # This routine is called once per INPUT line to format all of the
13783 # tokens on that line.
13784 #----------------------------------------------------------------
13786 # It outputs full-line comments and blank lines immediately.
13788 # For lines of code:
13789 # - Tokens are copied one-by-one from the global token
13790 # array $rLL to a set of '_to_go' arrays which collect batches of
13791 # tokens. This is done with calls to 'store_token_to_go'.
13792 # - A batch is closed and processed upon reaching a well defined
13793 # structural break point (i.e. code block boundary) or forced
13794 # breakpoint (i.e. side comment or special user controls).
13795 # - Subsequent stages of formatting make additional line breaks
13796 # appropriate for lists and logical structures, and as necessary to
13797 # keep line lengths below the requested maximum line length.
13799 #-----------------------------------
13800 # begin initialize closure variables
13801 #-----------------------------------
13802 $line_of_tokens = $my_line_of_tokens;
13803 my $rK_range = $line_of_tokens->{_rK_range};
13804 if ( !defined( $rK_range->[0] ) ) {
13806 # Empty line: This can happen if tokens are deleted, for example
13807 # with the -mangle parameter
13811 ( $K_first, $K_last ) = @{$rK_range};
13812 $last_CODE_type = $CODE_type;
13813 $CODE_type = $line_of_tokens->{_code_type};
13815 $rLL = $self->[_rLL_];
13816 $radjusted_levels = $self->[_radjusted_levels_];
13817 $rparent_of_seqno = $self->[_rparent_of_seqno_];
13818 $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
13819 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13821 #---------------------------------
13822 # end initialize closure variables
13823 #---------------------------------
13825 # This flag will become nobreak_to_go and should be set to 2 to prevent
13826 # a line break AFTER the current token.
13827 $no_internal_newlines = 0;
13828 if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
13829 $no_internal_newlines = 2;
13832 my $input_line = $line_of_tokens->{_line_text};
13834 my ( $is_block_comment, $has_side_comment );
13835 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
13836 if ( $K_last == $K_first ) { $is_block_comment = 1 }
13837 else { $has_side_comment = 1 }
13840 my $is_static_block_comment_without_leading_space =
13841 $CODE_type eq 'SBCX';
13842 $is_static_block_comment =
13843 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
13845 # check for a $VERSION statement
13846 if ( $CODE_type eq 'VER' ) {
13847 $self->[_saw_VERSION_in_this_file_] = 1;
13848 $no_internal_newlines = 2;
13851 # Add interline blank if any
13852 my $last_old_nonblank_type = "b";
13853 my $first_new_nonblank_token = EMPTY_STRING;
13854 my $K_first_true = $K_first;
13855 if ( $max_index_to_go >= 0 ) {
13856 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
13857 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
13858 if ( !$is_block_comment
13859 && $types_to_go[$max_index_to_go] ne 'b'
13861 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
13867 my $rtok_first = $rLL->[$K_first];
13869 my $in_quote = $line_of_tokens->{_ending_in_quote};
13870 $ending_in_quote = $in_quote;
13872 #------------------------------------
13873 # Handle a block (full-line) comment.
13874 #------------------------------------
13875 if ($is_block_comment) {
13877 if ( $rOpts->{'delete-block-comments'} ) {
13882 $index_start_one_line_block = undef;
13883 $self->end_batch() if ( $max_index_to_go >= 0 );
13885 # output a blank line before block comments
13887 # unless we follow a blank or comment line
13888 $self->[_last_line_leading_type_] ne '#'
13889 && $self->[_last_line_leading_type_] ne 'b'
13892 && $rOpts->{'blanks-before-comments'}
13894 # if this is NOT an empty comment, unless it follows a side
13895 # comment and could become a hanging side comment.
13897 $rtok_first->[_TOKEN_] ne '#'
13898 || ( $last_line_had_side_comment
13899 && $rLL->[$K_first]->[_LEVEL_] > 0 )
13902 # not after a short line ending in an opening token
13903 # because we already have space above this comment.
13904 # Note that the first comment in this if block, after
13905 # the 'if (', does not get a blank line because of this.
13906 && !$self->[_last_output_short_opening_token_]
13908 # never before static block comments
13909 && !$is_static_block_comment
13912 $self->flush(); # switching to new output stream
13913 my $file_writer_object = $self->[_file_writer_object_];
13914 $file_writer_object->write_blank_code_line();
13915 $self->[_last_line_leading_type_] = 'b';
13919 $rOpts->{'indent-block-comments'}
13920 && ( !$rOpts->{'indent-spaced-block-comments'}
13921 || $input_line =~ /^\s+/ )
13922 && !$is_static_block_comment_without_leading_space
13925 my $Ktoken_vars = $K_first;
13926 my $rtoken_vars = $rLL->[$Ktoken_vars];
13927 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13928 $self->end_batch();
13932 # switching to new output stream
13935 # Note that last arg in call here is 'undef' for comments
13936 my $file_writer_object = $self->[_file_writer_object_];
13937 $file_writer_object->write_code_line(
13938 $rtok_first->[_TOKEN_] . "\n", undef );
13939 $self->[_last_line_leading_type_] = '#';
13944 #--------------------------------------------
13945 # Compare input/output indentation in logfile
13946 #--------------------------------------------
13947 if ( $self->[_save_logfile_] ) {
13949 # Compare input/output indentation except for:
13950 # - hanging side comments
13951 # - continuation lines (have unknown leading blank space)
13952 # - and lines which are quotes (they may have been outdented)
13953 my $guessed_indentation_level =
13954 $line_of_tokens->{_guessed_indentation_level};
13956 unless ( $CODE_type eq 'HSC'
13957 || $rtok_first->[_CI_LEVEL_] > 0
13958 || $guessed_indentation_level == 0
13959 && $rtok_first->[_TYPE_] eq 'Q' )
13961 my $input_line_number = $line_of_tokens->{_line_number};
13962 $self->compare_indentation_levels( $K_first,
13963 $guessed_indentation_level, $input_line_number );
13967 #-----------------------------------------
13968 # Handle a line marked as indentation-only
13969 #-----------------------------------------
13971 if ( $CODE_type eq 'IO' ) {
13973 my $line = $input_line;
13975 # Fix for rt #125506 Unexpected string formating
13976 # in which leading space of a terminal quote was removed
13978 $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
13980 my $Ktoken_vars = $K_first;
13982 # We work with a copy of the token variables and change the
13983 # first token to be the entire line as a quote variable
13984 my $rtoken_vars = $rLL->[$Ktoken_vars];
13985 $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
13987 # Patch: length is not really important here
13988 $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
13990 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13991 $self->end_batch();
13995 #---------------------------
13996 # Handle all other lines ...
13997 #---------------------------
13999 # If we just saw the end of an elsif block, write nag message
14000 # if we do not see another elseif or an else.
14001 if ($looking_for_else) {
14003 ## /^(elsif|else)$/
14004 if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) {
14005 write_logfile_entry("(No else block)\n");
14007 $looking_for_else = 0;
14010 # This is a good place to kill incomplete one-line blocks
14011 if ( $max_index_to_go >= 0 ) {
14014 # this check needed -mangle (for example rt125012)
14016 ( !$index_start_one_line_block )
14017 && ( $last_old_nonblank_type eq ';' )
14018 && ( $first_new_nonblank_token ne '}' )
14021 # Patch for RT #98902. Honor request to break at old commas.
14022 || ( $rOpts_break_at_old_comma_breakpoints
14023 && $last_old_nonblank_type eq ',' )
14026 $forced_breakpoint_to_go[$max_index_to_go] = 1
14027 if ($rOpts_break_at_old_comma_breakpoints);
14028 $index_start_one_line_block = undef;
14029 $self->end_batch();
14032 # Keep any requested breaks before this line. Note that we have to
14033 # use the original K_first because it may have been reduced above
14034 # to add a blank. The value of the flag is as follows:
14035 # 1 => hard break, flush the batch
14036 # 2 => soft break, set breakpoint and continue building the batch
14037 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
14038 $index_start_one_line_block = undef;
14039 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
14040 $self->set_forced_breakpoint($max_index_to_go);
14043 $self->end_batch() if ( $max_index_to_go >= 0 );
14048 #--------------------------------------
14049 # loop to process the tokens one-by-one
14050 #--------------------------------------
14051 $self->process_line_inner_loop($has_side_comment);
14053 # if there is anything left in the output buffer ...
14054 if ( $max_index_to_go >= 0 ) {
14056 my $type = $rLL->[$K_last]->[_TYPE_];
14057 my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
14059 # we have to flush ..
14062 # if there is a side comment...
14065 # if this line ends in a quote
14066 # NOTE: This is critically important for insuring that quoted
14067 # lines do not get processed by things like -sot and -sct
14070 # if this is a VERSION statement
14071 || $CODE_type eq 'VER'
14073 # to keep a label at the end of a line
14074 || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
14076 # if we have a hard break request
14077 || $break_flag && $break_flag != 2
14079 # if we are instructed to keep all old line breaks
14080 || !$rOpts->{'delete-old-newlines'}
14082 # if this is a line of the form 'use overload'. A break here in
14083 # the input file is a good break because it will allow the
14084 # operators which follow to be formatted well. Without this
14085 # break the formatting with -ci=4 -xci is poor, for example.
14089 # print length $_[2], "\n";
14090 # my ( $x, $y ) = _order(@_);
14091 # Number::Roman->new( int $x + $y );
14094 # my ( $x, $y ) = _order(@_);
14095 # Number::Roman->new( int $x - $y );
14097 || ( $max_index_to_go == 2
14098 && $types_to_go[0] eq 'k'
14099 && $tokens_to_go[0] eq 'use'
14100 && $tokens_to_go[$max_index_to_go] eq 'overload' )
14103 $index_start_one_line_block = undef;
14104 $self->end_batch();
14109 # Check for a soft break request
14110 if ( $break_flag && $break_flag == 2 ) {
14111 $self->set_forced_breakpoint($max_index_to_go);
14114 # mark old line breakpoints in current output stream
14115 if ( !$rOpts_ignore_old_breakpoints
14116 || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
14118 my $jobp = $max_index_to_go;
14119 if ( $types_to_go[$max_index_to_go] eq 'b'
14120 && $max_index_to_go > 0 )
14124 $old_breakpoint_to_go[$jobp] = 1;
14130 } ## end sub process_line_of_CODE
14132 sub process_line_inner_loop {
14134 my ( $self, $has_side_comment ) = @_;
14136 #--------------------------------------------------------------------
14137 # Loop to move all tokens from one input line to a newly forming batch
14138 #--------------------------------------------------------------------
14140 # Do not start a new batch with a blank space
14141 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
14145 foreach my $Ktoken_vars ( $K_first .. $K_last ) {
14147 my $rtoken_vars = $rLL->[$Ktoken_vars];
14152 if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
14153 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14157 #------------------
14158 # handle non-blanks
14159 #------------------
14160 my $type = $rtoken_vars->[_TYPE_];
14162 # If we are continuing after seeing a right curly brace, flush
14163 # buffer unless we see what we are looking for, as in
14165 if ($rbrace_follower) {
14166 my $token = $rtoken_vars->[_TOKEN_];
14167 unless ( $rbrace_follower->{$token} ) {
14168 $self->end_batch() if ( $max_index_to_go >= 0 );
14170 $rbrace_follower = undef;
14174 $block_type, $type_sequence,
14175 $is_opening_BLOCK, $is_closing_BLOCK,
14176 $nobreak_BEFORE_BLOCK
14179 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
14181 my $token = $rtoken_vars->[_TOKEN_];
14182 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
14183 $block_type = $rblock_type_of_seqno->{$type_sequence};
14187 && $block_type ne 't'
14188 && !$self->[_rshort_nested_]->{$type_sequence} )
14191 if ( $type eq '{' ) {
14192 $is_opening_BLOCK = 1;
14193 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
14195 elsif ( $type eq '}' ) {
14196 $is_closing_BLOCK = 1;
14197 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
14202 #---------------------
14203 # handle side comments
14204 #---------------------
14205 if ($has_side_comment) {
14207 # if at last token ...
14208 if ( $Ktoken_vars == $K_last ) {
14209 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14213 # if before last token ... do not allow breaks which would
14214 # promote a side comment to a block comment
14215 elsif ($Ktoken_vars == $K_last - 1
14216 || $Ktoken_vars == $K_last - 2
14217 && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
14219 $no_internal_newlines = 2;
14223 # Process non-blank and non-comment tokens ...
14228 if ( $type eq ';' ) {
14230 my $next_nonblank_token_type = 'b';
14231 my $next_nonblank_token = EMPTY_STRING;
14232 if ( $Ktoken_vars < $K_last ) {
14233 my $Knnb = $Ktoken_vars + 1;
14234 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
14235 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
14236 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
14239 if ( $rOpts_break_at_old_semicolon_breakpoints
14240 && ( $Ktoken_vars == $K_first )
14241 && $max_index_to_go >= 0
14242 && !defined($index_start_one_line_block) )
14244 $self->end_batch();
14247 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14251 $no_internal_newlines
14252 || ( $rOpts_keep_interior_semicolons
14253 && $Ktoken_vars < $K_last )
14254 || ( $next_nonblank_token eq '}' )
14261 elsif ($is_opening_BLOCK) {
14263 # Tentatively output this token. This is required before
14264 # calling starting_one_line_block. We may have to unstore
14265 # it, though, if we have to break before it.
14266 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14268 # Look ahead to see if we might form a one-line block..
14270 $self->starting_one_line_block( $Ktoken_vars,
14271 $K_last_nonblank_code, $K_last );
14272 $self->clear_breakpoint_undo_stack();
14274 # to simplify the logic below, set a flag to indicate if
14275 # this opening brace is far from the keyword which introduces it
14276 my $keyword_on_same_line = 1;
14278 $max_index_to_go >= 0
14279 && defined($K_last_nonblank_code)
14280 && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
14281 && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
14285 $keyword_on_same_line = 0;
14288 # Break before '{' if requested with -bl or -bli flag
14289 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
14291 # But do not break if this token is welded to the left
14292 if ( $total_weld_count
14293 && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
14298 # Break BEFORE an opening '{' ...
14304 # and we were unable to start looking for a block,
14305 && !defined($index_start_one_line_block)
14307 # or if it will not be on same line as its keyword, so that
14308 # it will be outdented (eval.t, overload.t), and the user
14309 # has not insisted on keeping it on the right
14310 || ( !$keyword_on_same_line
14311 && !$rOpts_opening_brace_always_on_right )
14315 # but only if allowed
14316 unless ($nobreak_BEFORE_BLOCK) {
14318 # since we already stored this token, we must unstore it
14319 $self->unstore_token_to_go();
14321 # then output the line
14322 $self->end_batch() if ( $max_index_to_go >= 0 );
14324 # and now store this token at the start of a new line
14325 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14329 # now output this line
14331 if ( $max_index_to_go >= 0 && !$no_internal_newlines );
14337 elsif ($is_closing_BLOCK) {
14339 my $next_nonblank_token_type = 'b';
14340 my $next_nonblank_token = EMPTY_STRING;
14342 if ( $Ktoken_vars < $K_last ) {
14343 $Knnb = $Ktoken_vars + 1;
14344 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
14345 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
14346 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
14349 # If there is a pending one-line block ..
14350 if ( defined($index_start_one_line_block) ) {
14352 # Fix for b1208: if a side comment follows this closing
14353 # brace then we must include its length in the length test
14354 # ... unless the -issl flag is set (fixes b1307-1309).
14355 # Assume a minimum of 1 blank space to the comment.
14356 my $added_length = 0;
14357 if ( $has_side_comment
14358 && !$rOpts_ignore_side_comment_lengths
14359 && $next_nonblank_token_type eq '#' )
14361 $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
14364 # we have to terminate it if..
14367 # it is too long (final length may be different from
14368 # initial estimate). note: must allow 1 space for this
14370 $self->excess_line_length( $index_start_one_line_block,
14371 $max_index_to_go ) + $added_length >= 0
14374 $index_start_one_line_block = undef;
14378 # put a break before this closing curly brace if appropriate
14380 if ( $max_index_to_go >= 0
14381 && !$nobreak_BEFORE_BLOCK
14382 && !defined($index_start_one_line_block) );
14384 # store the closing curly brace
14385 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14387 # ok, we just stored a closing curly brace. Often, but
14388 # not always, we want to end the line immediately.
14389 # So now we have to check for special cases.
14391 # if this '}' successfully ends a one-line block..
14392 my $one_line_block_type = EMPTY_STRING;
14394 if ( defined($index_start_one_line_block) ) {
14396 # Remember the type of token just before the
14397 # opening brace. It would be more general to use
14398 # a stack, but this will work for one-line blocks.
14399 $one_line_block_type =
14400 $types_to_go[$index_start_one_line_block];
14402 # we have to actually make it by removing tentative
14403 # breaks that were set within it
14404 $self->undo_forced_breakpoint_stack(0);
14406 # For -lp, extend the nobreak to include a trailing
14407 # terminal ','. This is because the -lp indentation was
14408 # not known when making one-line blocks, so we may be able
14409 # to move the line back to fit. Otherwise we may create a
14410 # needlessly stranded comma on the next line.
14411 my $iend_nobreak = $max_index_to_go - 1;
14412 if ( $rOpts_line_up_parentheses
14413 && $next_nonblank_token_type eq ','
14414 && $Knnb eq $K_last )
14416 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
14418 $self->[_ris_excluded_lp_container_]->{$p_seqno};
14419 $iend_nobreak = $max_index_to_go if ( !$is_excluded );
14422 $self->set_nobreaks( $index_start_one_line_block,
14425 # save starting block indexes so that sub correct_lp can
14426 # check and adjust -lp indentation (c098)
14427 push @{$ri_starting_one_line_block},
14428 $index_start_one_line_block;
14430 # then re-initialize for the next one-line block
14431 $index_start_one_line_block = undef;
14433 # then decide if we want to break after the '}' ..
14434 # We will keep going to allow certain brace followers as in:
14435 # do { $ifclosed = 1; last } unless $losing;
14437 # But make a line break if the curly ends a
14438 # significant block:
14441 $is_block_without_semicolon{$block_type}
14443 # Follow users break point for
14444 # one line block types U & G, such as a 'try' block
14445 || $one_line_block_type =~ /^[UG]$/
14446 && $Ktoken_vars == $K_last
14449 # if needless semicolon follows we handle it later
14450 && $next_nonblank_token ne ';'
14454 unless ($no_internal_newlines);
14458 # set string indicating what we need to look for brace follower
14460 if ( $is_if_unless_elsif_else{$block_type} ) {
14461 $rbrace_follower = undef;
14463 elsif ( $block_type eq 'do' ) {
14464 $rbrace_follower = \%is_do_follower;
14466 $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
14469 $rbrace_follower = { ')' => 1 };
14473 # added eval for borris.t
14474 elsif ($is_sort_map_grep_eval{$block_type}
14475 || $one_line_block_type eq 'G' )
14477 $rbrace_follower = undef;
14482 elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
14483 if ($one_line_block_type) {
14485 $rbrace_follower = \%is_anon_sub_1_brace_follower;
14487 # Exceptions to help keep -lp intact, see git #74 ...
14488 # Exception 1: followed by '}' on this line
14489 if ( $Ktoken_vars < $K_last
14490 && $next_nonblank_token eq '}' )
14492 $rbrace_follower = undef;
14496 # Exception 2: followed by '}' on next line if -lp set.
14497 # The -lp requirement allows the formatting to follow
14498 # old breaks when -lp is not used, minimizing changes.
14499 # Fixes issue c087.
14500 elsif ($Ktoken_vars == $K_last
14501 && $rOpts_line_up_parentheses )
14503 my $K_closing_container =
14504 $self->[_K_closing_container_];
14505 my $K_opening_container =
14506 $self->[_K_opening_container_];
14507 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
14508 my $Kc = $K_closing_container->{$p_seqno};
14510 $self->[_ris_excluded_lp_container_]->{$p_seqno};
14513 && $rLL->[$Kc]->[_TOKEN_] eq '}'
14515 && $Kc - $Ktoken_vars <= 2 );
14516 $rbrace_follower = undef if ($keep_going);
14520 $rbrace_follower = \%is_anon_sub_brace_follower;
14524 # None of the above: specify what can follow a closing
14525 # brace of a block which is not an
14526 # if/elsif/else/do/sort/map/grep/eval
14528 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
14530 $rbrace_follower = \%is_other_brace_follower;
14533 # See if an elsif block is followed by another elsif or else;
14535 if ( $block_type eq 'elsif' ) {
14537 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
14538 $looking_for_else = 1; # ok, check on next line
14541 ## /^(elsif|else)$/
14542 if ( !$is_elsif_else{$next_nonblank_token} ) {
14543 write_logfile_entry("No else block :(\n");
14548 # keep going after certain block types (map,sort,grep,eval)
14549 # added eval for borris.t
14553 $rbrace_follower = undef;
14557 # if no more tokens, postpone decision until re-entering
14558 elsif ( ( $next_nonblank_token_type eq 'b' )
14559 && $rOpts_add_newlines )
14561 unless ($rbrace_follower) {
14563 unless ( $no_internal_newlines
14564 || $max_index_to_go < 0 );
14567 elsif ($rbrace_follower) {
14569 if ( $rbrace_follower->{$next_nonblank_token} ) {
14571 # Fix for b1385: keep break after a comma following a
14572 # 'do' block. This could also be used for other block
14573 # types, but that would cause a significant change in
14574 # existing formatting without much benefit.
14575 if ( $next_nonblank_token eq ','
14576 && $Knnb eq $K_last
14577 && $block_type eq 'do'
14578 && $rOpts_add_newlines
14579 && $self->is_trailing_comma($Knnb) )
14581 $self->[_rbreak_after_Klast_]->{$K_last} = 1;
14586 unless ( $no_internal_newlines
14587 || $max_index_to_go < 0 );
14590 $rbrace_follower = undef;
14595 unless ( $no_internal_newlines
14596 || $max_index_to_go < 0 );
14599 } ## end treatment of closing block token
14601 #------------------------------
14602 # handle here_doc target string
14603 #------------------------------
14604 elsif ( $type eq 'h' ) {
14606 # no newlines after seeing here-target
14607 $no_internal_newlines = 2;
14608 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14611 #-----------------------------
14612 # handle all other token types
14613 #-----------------------------
14616 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14618 # break after a label if requested
14619 if ( $rOpts_break_after_labels
14621 && $rOpts_break_after_labels == 1 )
14624 unless ($no_internal_newlines);
14628 # remember previous nonblank, non-comment OUTPUT token
14629 $K_last_nonblank_code = $Ktoken_vars;
14631 } ## end of loop over all tokens in this line
14633 } ## end sub process_line_inner_loop
14635 } ## end closure process_line_of_CODE
14637 sub is_trailing_comma {
14638 my ( $self, $KK ) = @_;
14641 # $KK - index of a comma in token list
14643 # true if the comma at index $KK is a trailing comma
14646 my $rLL = $self->[_rLL_];
14647 my $type_KK = $rLL->[$KK]->[_TYPE_];
14648 if ( $type_KK ne ',' ) {
14650 && Fault("Bad call: expected type ',' but received '$type_KK'\n");
14653 my $Knnb = $self->K_next_nonblank($KK);
14654 if ( defined($Knnb) ) {
14655 my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_];
14656 my $type_Knnb = $rLL->[$Knnb]->[_TYPE_];
14657 if ( $type_sequence && $is_closing_type{$type_Knnb} ) {
14662 } ## end sub is_trailing_comma
14664 sub tight_paren_follows {
14666 my ( $self, $K_to_go_0, $K_ic ) = @_;
14668 # Input parameters:
14669 # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
14670 # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
14671 # Return parameter:
14672 # false if we want a break after the closing do brace
14673 # true if we do not want a break after the closing do brace
14675 # We are at the closing brace of a 'do' block. See if this brace is
14676 # followed by a closing paren, and if so, set a flag which indicates
14677 # that we do not want a line break between the '}' and ')'.
14679 # xxxxx ( ...... do { ... } ) {
14680 # ^-------looking at this brace, K_ic
14682 # Subscript notation:
14683 # _i = inner container (braces in this case)
14684 # _o = outer container (parens in this case)
14685 # _io = inner opening = '{'
14686 # _ic = inner closing = '}'
14687 # _oo = outer opening = '('
14688 # _oc = outer closing = ')'
14690 # |--K_oo |--K_oc = outer container
14691 # xxxxx ( ...... do { ...... } ) {
14692 # |--K_io |--K_ic = inner container
14694 # In general, the safe thing to do is return a 'false' value
14695 # if the statement appears to be complex. This will have
14696 # the downstream side-effect of opening up outer containers
14697 # to help make complex code readable. But for simpler
14698 # do blocks it can be preferable to keep the code compact
14699 # by returning a 'true' value.
14701 return unless defined($K_ic);
14702 my $rLL = $self->[_rLL_];
14704 # we should only be called at a closing block
14705 my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
14706 return unless ($seqno_i); # shouldn't happen;
14708 # This only applies if the next nonblank is a ')'
14709 my $K_oc = $self->K_next_nonblank($K_ic);
14710 return unless defined($K_oc);
14711 my $token_next = $rLL->[$K_oc]->[_TOKEN_];
14712 return unless ( $token_next eq ')' );
14714 my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
14715 my $K_io = $self->[_K_opening_container_]->{$seqno_i};
14716 my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
14717 return unless ( defined($K_io) && defined($K_oo) );
14719 # RULE 1: Do not break before a closing signature paren
14720 # (regardless of complexity). This is a fix for issue git#22.
14721 # Looking for something like:
14722 # sub xxx ( ... do { ... } ) {
14723 # ^----- next block_type
14724 my $K_test = $self->K_next_nonblank($K_oc);
14725 if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
14726 my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
14728 if ( $self->[_ris_asub_block_]->{$seqno_test}
14729 || $self->[_ris_sub_block_]->{$seqno_test} )
14736 # RULE 2: Break if the contents within braces appears to be 'complex'. We
14737 # base this decision on the number of tokens between braces.
14739 # xxxxx ( ... do { ... } ) {
14742 # Although very simple, it has the advantages of (1) being insensitive to
14743 # changes in lengths of identifier names, (2) easy to understand, implement
14744 # and test. A test case for this is 't/snippets/long_line.in'.
14746 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
14747 # if ( do { $2 !~ /&/ } ) { ... }
14749 # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
14750 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
14752 # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
14753 # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
14755 return if ( $K_ic - $K_io > 16 );
14757 # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
14758 # As with the previous rule, we decide based on the token count
14760 # xxxxx ( ... do { ... } ) {
14763 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
14764 # $K_io - $K_oo = 4 [Pass Rule 3]
14765 # if ( do { $2 !~ /&/ } ) { ... }
14767 # Example: $K_ic - $K_oo = 10 [Pass rule 2]
14768 # $K_io - $K_oo = 9 [Pass rule 3]
14769 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
14771 return if ( $K_io - $K_oo > 9 );
14773 # RULE 4: Break if we have already broken this batch of output tokens
14774 return if ( $K_oo < $K_to_go_0 );
14776 # RULE 5: Break if input is not on one line
14777 # For example, we will set the flag for the following expression
14778 # written in one line:
14780 # This has: $K_ic - $K_oo = 10 [Pass rule 2]
14781 # $K_io - $K_oo = 8 [Pass rule 3]
14782 # $self->debug( 'Error: ' . do { local $/; <$err> } );
14784 # but we break after the brace if it is on multiple lines on input, since
14785 # the user may prefer it on multiple lines:
14789 # 'Error: ' . do { local $/; <$err> }
14792 if ( !$rOpts_ignore_old_breakpoints ) {
14793 my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
14794 my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
14795 return if ( $iline_oo != $iline_oc );
14798 # OK to keep the paren tight
14800 } ## end sub tight_paren_follows
14802 my %is_brace_semicolon_colon;
14805 my @q = qw( { } ; : );
14806 @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
14809 sub starting_one_line_block {
14811 # After seeing an opening curly brace, look for the closing brace and see
14812 # if the entire block will fit on a line. This routine is not always right
14813 # so a check is made later (at the closing brace) to make sure we really
14814 # have a one-line block. We have to do this preliminary check, though,
14815 # because otherwise we would always break at a semicolon within a one-line
14816 # block if the block contains multiple statements.
14819 # $Kj = index of opening brace
14820 # $K_last_nonblank = index of previous nonblank code token
14821 # $K_last = index of last token of input line
14823 # Calls 'create_one_line_block' if one-line block might be formed.
14825 # Also returns a flag '$too_long':
14826 # true = distance from opening keyword to OPENING brace exceeds
14827 # the maximum line length.
14828 # false (simple return) => not too long
14829 # Note that this flag is for distance from the statement start to the
14830 # OPENING brace, not the closing brace.
14832 my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
14834 my $rbreak_container = $self->[_rbreak_container_];
14835 my $rshort_nested = $self->[_rshort_nested_];
14836 my $rLL = $self->[_rLL_];
14837 my $K_opening_container = $self->[_K_opening_container_];
14838 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
14840 # kill any current block - we can only go 1 deep
14841 create_one_line_block();
14845 # This routine should not have been called if there are no tokens in the
14846 # 'to_go' arrays of previously stored tokens. A previous call to
14847 # 'store_token_to_go' should have stored an opening brace. An error here
14848 # indicates that a programming change may have caused a flush operation to
14849 # clean out the previously stored tokens.
14850 if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
14851 Fault("program bug: store_token_to_go called incorrectly\n")
14856 # Return if block should be broken
14857 my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
14858 if ( $rbreak_container->{$type_sequence_j} ) {
14862 my $ris_bli_container = $self->[_ris_bli_container_];
14863 my $is_bli = $ris_bli_container->{$type_sequence_j};
14865 my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
14866 $block_type = EMPTY_STRING unless ( defined($block_type) );
14868 my $previous_nonblank_token = EMPTY_STRING;
14869 my $i_last_nonblank = -1;
14870 if ( defined($K_last_nonblank) ) {
14871 $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
14872 if ( $i_last_nonblank >= 0 ) {
14873 $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
14877 #---------------------------------------------------------------------
14878 # find the starting keyword for this block (such as 'if', 'else', ...)
14879 #---------------------------------------------------------------------
14881 $max_index_to_go == 0
14882 ##|| $block_type =~ /^[\{\}\;\:]$/
14883 || $is_brace_semicolon_colon{$block_type}
14884 || substr( $block_type, 0, 7 ) eq 'package'
14887 $i_start = $max_index_to_go;
14890 # the previous nonblank token should start these block types
14892 $i_last_nonblank >= 0
14893 && ( $previous_nonblank_token eq $block_type
14894 || $self->[_ris_asub_block_]->{$type_sequence_j}
14895 || $self->[_ris_sub_block_]->{$type_sequence_j}
14896 || substr( $block_type, -2, 2 ) eq '()' )
14899 $i_start = $i_last_nonblank;
14901 # For signatures and extended syntax ...
14902 # If this brace follows a parenthesized list, we should look back to
14903 # find the keyword before the opening paren because otherwise we might
14904 # form a one line block which stays intact, and cause the parenthesized
14905 # expression to break open. That looks bad.
14906 if ( $tokens_to_go[$i_start] eq ')' ) {
14908 # Find the opening paren
14909 my $K_start = $K_to_go[$i_start];
14910 return unless defined($K_start);
14911 my $seqno = $type_sequence_to_go[$i_start];
14912 return unless ($seqno);
14913 my $K_opening = $K_opening_container->{$seqno};
14914 return unless defined($K_opening);
14915 my $i_opening = $i_start + ( $K_opening - $K_start );
14917 # give up if not on this line
14918 return unless ( $i_opening >= 0 );
14919 $i_start = $i_opening;
14921 # go back one token before the opening paren
14922 if ( $i_start > 0 ) { $i_start-- }
14923 if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
14924 my $lev = $levels_to_go[$i_start];
14925 if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return }
14929 elsif ( $previous_nonblank_token eq ')' ) {
14931 # For something like "if (xxx) {", the keyword "if" will be
14932 # just after the most recent break. This will be 0 unless
14933 # we have just killed a one-line block and are starting another.
14935 # Note: cannot use inext_index_to_go[] here because that array
14936 # is still being constructed.
14937 $i_start = $index_max_forced_break + 1;
14938 if ( $types_to_go[$i_start] eq 'b' ) {
14942 # Patch to avoid breaking short blocks defined with extended_syntax:
14943 # Strip off any trailing () which was added in the parser to mark
14944 # the opening keyword. For example, in the following
14945 # create( TypeFoo $e) {$bubba}
14946 # the blocktype would be marked as create()
14947 my $stripped_block_type = $block_type;
14948 if ( substr( $block_type, -2, 2 ) eq '()' ) {
14949 $stripped_block_type = substr( $block_type, 0, -2 );
14951 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
14956 # patch for SWITCH/CASE to retain one-line case/when blocks
14957 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
14959 # Note: cannot use inext_index_to_go[] here because that array
14960 # is still being constructed.
14961 $i_start = $index_max_forced_break + 1;
14962 if ( $types_to_go[$i_start] eq 'b' ) {
14965 unless ( $tokens_to_go[$i_start] eq $block_type ) {
14971 #-------------------------------------------
14972 # Couldn't find start - return too_long flag
14973 #-------------------------------------------
14977 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
14979 my $maximum_line_length =
14980 $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
14982 # see if distance to the opening container is too great to even start
14983 if ( $pos > $maximum_line_length ) {
14985 #------------------------------
14986 # too long to the opening token
14987 #------------------------------
14991 #-----------------------------------------------------------------------
14992 # OK so far: the statement is not to long just to the OPENING token. Now
14993 # see if everything to the closing token will fit on one line
14994 #-----------------------------------------------------------------------
14996 # This is part of an update to fix cases b562 .. b983
14997 my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
14998 return unless ( defined($K_closing) );
14999 my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
15000 $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
15002 my $excess = $pos + 1 + $container_length - $maximum_line_length;
15004 # Add a small tolerance for welded tokens (case b901)
15005 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
15009 if ( $excess > 0 ) {
15011 # line is too long... there is no chance of forming a one line block
15012 # if the excess is more than 1 char
15013 return if ( $excess > 1 );
15015 # ... and give up if it is not a one-line block on input.
15016 # note: for a one-line block on input, it may be possible to keep
15017 # it as a one-line block (by removing a needless semicolon ).
15018 my $K_start = $K_to_go[$i_start];
15020 $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
15021 return if ($ldiff);
15024 #------------------------------------------------------------------
15025 # Loop to check contents and length of the potential one-line block
15026 #------------------------------------------------------------------
15027 foreach my $Ki ( $Kj + 1 .. $K_last ) {
15029 # old whitespace could be arbitrarily large, so don't use it
15030 if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
15031 else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
15033 # ignore some small blocks
15034 my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
15035 my $nobreak = $rshort_nested->{$type_sequence_i};
15037 # Return false result if we exceed the maximum line length,
15038 if ( $pos > $maximum_line_length ) {
15042 # keep going for non-containers
15043 elsif ( !$type_sequence_i ) {
15047 # return if we encounter another opening brace before finding the
15049 elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
15050 && $rLL->[$Ki]->[_TYPE_] eq '{'
15051 && $rblock_type_of_seqno->{$type_sequence_i}
15057 # if we find our closing brace..
15058 elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
15059 && $rLL->[$Ki]->[_TYPE_] eq '}'
15060 && $rblock_type_of_seqno->{$type_sequence_i}
15064 # be sure any trailing comment also fits on the line
15065 my $Ki_nonblank = $Ki;
15066 if ( $Ki_nonblank < $K_last ) {
15068 if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
15069 && $Ki_nonblank < $K_last )
15075 # Patch for one-line sort/map/grep/eval blocks with side comments:
15076 # We will ignore the side comment length for sort/map/grep/eval
15077 # because this can lead to statements which change every time
15078 # perltidy is run. Here is an example from Denis Moskowitz which
15079 # oscillates between these two states without this patch:
15082 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
15086 ## $_->foo ne 'bar'
15087 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
15091 # When the first line is input it gets broken apart by the main
15092 # line break logic in sub process_line_of_CODE.
15093 # When the second line is input it gets recombined by
15094 # process_line_of_CODE and passed to the output routines. The
15095 # output routines (break_long_lines) do not break it apart
15096 # because the bond strengths are set to the highest possible value
15097 # for grep/map/eval/sort blocks, so the first version gets output.
15098 # It would be possible to fix this by changing bond strengths,
15099 # but they are high to prevent errors in older versions of perl.
15100 # See c100 for eval test.
15102 && $rLL->[$K_last]->[_TYPE_] eq '#'
15103 && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
15104 && !$rOpts_ignore_side_comment_lengths
15105 && !$is_sort_map_grep_eval{$block_type}
15106 && $K_last - $Ki_nonblank <= 2 )
15108 # Only include the side comment for if/else/elsif/unless if it
15109 # immediately follows (because the current '$rbrace_follower'
15110 # logic for these will give an immediate brake after these
15111 # closing braces). So for example a line like this
15112 # if (...) { ... } ; # very long comment......
15113 # will already break like this:
15115 # ; # very long comment......
15116 # so we do not need to include the length of the comment, which
15117 # would break the block. Project 'bioperl' has coding like this.
15118 ## !~ /^(if|else|elsif|unless)$/
15119 if ( !$is_if_unless_elsif_else{$block_type}
15120 || $K_last == $Ki_nonblank )
15122 $Ki_nonblank = $K_last;
15123 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
15125 if ( $Ki_nonblank > $Ki + 1 ) {
15127 # source whitespace could be anything, assume
15128 # at least one space before the hash on output
15129 if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
15132 else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
15135 if ( $pos >= $maximum_line_length ) {
15141 #--------------------------
15142 # ok, it's a one-line block
15143 #--------------------------
15144 create_one_line_block($i_start);
15148 # just keep going for other characters
15153 #--------------------------------------------------
15154 # End Loop to examine tokens in potential one-block
15155 #--------------------------------------------------
15157 # We haven't hit the closing brace, but there is still space. So the
15158 # question here is, should we keep going to look at more lines in hopes of
15159 # forming a new one-line block, or should we stop right now. The problem
15160 # with continuing is that we will not be able to honor breaks before the
15161 # opening brace if we continue.
15163 # Typically we will want to keep trying to make one-line blocks for things
15164 # like sort/map/grep/eval. But it is not always a good idea to make as
15165 # many one-line blocks as possible, so other types are not done. The user
15166 # can always use -mangle.
15168 # If we want to keep going, we will create a new one-line block.
15169 # The blocks which we can keep going are in a hash, but we never want
15170 # to continue if we are at a '-bli' block.
15171 if ( $want_one_line_block{$block_type} && !$is_bli ) {
15172 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j};
15173 my $semicolon_count = $rtype_count
15174 && $rtype_count->{';'} ? $rtype_count->{';'} : 0;
15176 # Ignore a terminal semicolon in the count
15177 if ( $semicolon_count <= 2 ) {
15178 my $K_closing_container = $self->[_K_closing_container_];
15179 my $K_closing_j = $K_closing_container->{$type_sequence_j};
15180 my $Kp = $self->K_previous_nonblank($K_closing_j);
15182 && $rLL->[$Kp]->[_TYPE_] eq ';' )
15184 $semicolon_count -= 1;
15187 if ( $semicolon_count <= 0 ) {
15188 create_one_line_block($i_start);
15190 elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) {
15192 # Mark short broken eval blocks for possible later use in
15193 # avoiding adding spaces before a 'package' line. This is not
15194 # essential but helps keep newer and older formatting the same.
15195 $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1;
15199 } ## end sub starting_one_line_block
15201 sub unstore_token_to_go {
15203 # remove most recent token from output stream
15205 if ( $max_index_to_go > 0 ) {
15206 $max_index_to_go--;
15209 $max_index_to_go = UNDEFINED_INDEX;
15212 } ## end sub unstore_token_to_go
15214 sub compare_indentation_levels {
15216 # Check to see if output line tabbing agrees with input line
15217 # this can be very useful for debugging a script which has an extra
15218 # or missing brace.
15220 my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
15221 return unless ( defined($K_first) );
15223 my $rLL = $self->[_rLL_];
15225 my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
15226 my $radjusted_levels = $self->[_radjusted_levels_];
15227 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
15228 $structural_indentation_level = $radjusted_levels->[$K_first];
15231 # record max structural depth for log file
15232 if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
15233 $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
15234 $self->[_maximum_BLOCK_level_at_line_] = $line_number;
15237 my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
15238 my $is_closing_block =
15240 && $self->[_rblock_type_of_seqno_]->{$type_sequence}
15241 && $rLL->[$K_first]->[_TYPE_] eq '}';
15243 if ( $guessed_indentation_level ne $structural_indentation_level ) {
15244 $self->[_last_tabbing_disagreement_] = $line_number;
15246 if ($is_closing_block) {
15248 if ( !$self->[_in_brace_tabbing_disagreement_] ) {
15249 $self->[_in_brace_tabbing_disagreement_] = $line_number;
15251 if ( !$self->[_first_brace_tabbing_disagreement_] ) {
15252 $self->[_first_brace_tabbing_disagreement_] = $line_number;
15256 if ( !$self->[_in_tabbing_disagreement_] ) {
15257 $self->[_tabbing_disagreement_count_]++;
15259 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
15260 write_logfile_entry(
15261 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
15264 $self->[_in_tabbing_disagreement_] = $line_number;
15265 $self->[_first_tabbing_disagreement_] = $line_number
15266 unless ( $self->[_first_tabbing_disagreement_] );
15271 $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
15273 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
15274 if ($in_tabbing_disagreement) {
15276 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
15277 write_logfile_entry(
15278 "End indentation disagreement from input line $in_tabbing_disagreement\n"
15281 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
15283 write_logfile_entry(
15284 "No further tabbing disagreements will be noted\n");
15287 $self->[_in_tabbing_disagreement_] = 0;
15292 } ## end sub compare_indentation_levels
15294 ###################################################
15295 # CODE SECTION 8: Utilities for setting breakpoints
15296 ###################################################
15298 { ## begin closure set_forced_breakpoint
15300 my @forced_breakpoint_undo_stack;
15302 # These are global vars for efficiency:
15303 # my $forced_breakpoint_count;
15304 # my $forced_breakpoint_undo_count;
15305 # my $index_max_forced_break;
15307 # Break before or after certain tokens based on user settings
15308 my %break_before_or_after_token;
15312 # Updated to use all operators. This fixes case b1054
15313 # Here is the previous simplified version:
15314 ## my @q = qw( . : ? and or xor && || );
15315 my @q = @all_operators;
15318 @break_before_or_after_token{@q} = (1) x scalar(@q);
15321 sub set_fake_breakpoint {
15323 # Just bump up the breakpoint count as a signal that there are breaks.
15324 # This is useful if we have breaks but may want to postpone deciding
15325 # where to make them.
15326 $forced_breakpoint_count++;
15330 use constant DEBUG_FORCE => 0;
15332 sub set_forced_breakpoint {
15333 my ( $self, $i ) = @_;
15335 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
15338 # - If the token at index $i is a blank, backup to $i-1 to
15339 # get to the previous nonblank token.
15340 # - For certain tokens, the break may be placed BEFORE the token
15341 # at index $i, depending on user break preference settings.
15342 # - If a break is made after an opening token, then a break will
15343 # also be made before the corresponding closing token.
15345 # Returns '$i_nonblank':
15346 # = index of the token after which the breakpoint was actually placed
15347 # = undef if breakpoint was not set.
15350 if ( !defined($i) || $i < 0 ) {
15352 # Calls with bad index $i are harmless but waste time and should
15353 # be caught and eliminated during code development.
15355 my ( $a, $b, $c ) = caller();
15357 "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
15363 # Break after token $i
15364 $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
15366 # If we break at an opening container..break at the closing
15368 if ( defined($i_nonblank)
15369 && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
15372 $self->set_closing_breakpoint($i_nonblank);
15375 DEBUG_FORCE && do {
15376 my ( $a, $b, $c ) = caller();
15378 "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
15379 if ( !defined($i_nonblank) ) {
15380 $i = EMPTY_STRING unless defined($i);
15381 $msg .= " but could not set break after i='$i'\n";
15385 set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
15387 if ( defined($set_closing) ) {
15389 " Also set closing breakpoint corresponding to this token\n";
15395 return $i_nonblank;
15396 } ## end sub set_forced_breakpoint
15398 sub set_forced_breakpoint_AFTER {
15399 my ( $self, $i ) = @_;
15401 # This routine is only called by sub set_forced_breakpoint and
15402 # sub set_closing_breakpoint.
15404 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
15407 # - If the token at index $i is a blank, backup to $i-1 to
15408 # get to the previous nonblank token.
15409 # - For certain tokens, the break may be placed BEFORE the token
15410 # at index $i, depending on user break preference settings.
15413 # - the index of the token after which the break was set, or
15414 # - undef if no break was set
15416 return unless ( defined($i) && $i >= 0 );
15418 # Back up at a blank so we have a token to examine.
15419 # This was added to fix for cases like b932 involving an '=' break.
15420 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
15422 # Never break between welded tokens
15424 if ( $total_weld_count
15425 && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
15427 my $token = $tokens_to_go[$i];
15428 my $type = $types_to_go[$i];
15430 # For certain tokens, use user settings to decide if we break before or
15432 if ( $break_before_or_after_token{$token}
15433 && ( $type eq $token || $type eq 'k' ) )
15435 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15438 # breaks are forced before 'if' and 'unless'
15439 elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
15441 if ( $i >= 0 && $i <= $max_index_to_go ) {
15442 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15444 if ( $i_nonblank >= 0
15445 && $nobreak_to_go[$i_nonblank] == 0
15446 && !$forced_breakpoint_to_go[$i_nonblank] )
15448 $forced_breakpoint_to_go[$i_nonblank] = 1;
15450 if ( $i_nonblank > $index_max_forced_break ) {
15451 $index_max_forced_break = $i_nonblank;
15453 $forced_breakpoint_count++;
15454 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
15458 return $i_nonblank;
15462 } ## end sub set_forced_breakpoint_AFTER
15464 sub clear_breakpoint_undo_stack {
15466 $forced_breakpoint_undo_count = 0;
15470 use constant DEBUG_UNDOBP => 0;
15472 sub undo_forced_breakpoint_stack {
15474 my ( $self, $i_start ) = @_;
15476 # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
15477 # remove all breakpoints from the top of the 'undo stack' down to and
15478 # including index $i_start.
15480 # The 'undo stack' is a stack of all breakpoints made for a batch of
15483 if ( $i_start < 0 ) {
15485 my ( $a, $b, $c ) = caller();
15487 # Bad call, can only be due to a recent programming change.
15489 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
15494 while ( $forced_breakpoint_undo_count > $i_start ) {
15496 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15497 if ( $i >= 0 && $i <= $max_index_to_go ) {
15498 $forced_breakpoint_to_go[$i] = 0;
15499 $forced_breakpoint_count--;
15501 DEBUG_UNDOBP && do {
15502 my ( $a, $b, $c ) = caller();
15504 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
15508 # shouldn't happen, but not a critical error
15511 my ( $a, $b, $c ) = caller();
15513 Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go
15519 } ## end sub undo_forced_breakpoint_stack
15520 } ## end closure set_forced_breakpoint
15522 { ## begin closure set_closing_breakpoint
15524 my %postponed_breakpoint;
15526 sub initialize_postponed_breakpoint {
15527 %postponed_breakpoint = ();
15531 sub has_postponed_breakpoint {
15533 return $postponed_breakpoint{$seqno};
15536 sub set_closing_breakpoint {
15538 # set a breakpoint at a matching closing token
15539 my ( $self, $i_break ) = @_;
15541 if ( $mate_index_to_go[$i_break] >= 0 ) {
15543 # Don't reduce the '2' in the statement below.
15544 # Test files: attrib.t, BasicLyx.pm.html
15545 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
15547 # break before } ] and ), but sub set_forced_breakpoint will decide
15548 # to break before or after a ? and :
15549 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
15550 $self->set_forced_breakpoint_AFTER(
15551 $mate_index_to_go[$i_break] - $inc );
15555 my $type_sequence = $type_sequence_to_go[$i_break];
15556 if ($type_sequence) {
15557 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
15558 $postponed_breakpoint{$type_sequence} = 1;
15562 } ## end sub set_closing_breakpoint
15563 } ## end closure set_closing_breakpoint
15565 #########################################
15566 # CODE SECTION 9: Process batches of code
15567 #########################################
15569 { ## begin closure grind_batch_of_CODE
15571 # The routines in this closure begin the processing of a 'batch' of code.
15573 # A variable to keep track of consecutive nonblank lines so that we can
15574 # insert occasional blanks
15575 my @nonblank_lines_at_depth;
15577 # A variable to remember maximum size of previous batches; this is needed
15578 # by the logical padding routine
15579 my $peak_batch_size;
15582 # variables to keep track of indentation of unmatched containers.
15583 my %saved_opening_indentation;
15585 sub initialize_grind_batch_of_CODE {
15586 @nonblank_lines_at_depth = ();
15587 $peak_batch_size = 0;
15589 %saved_opening_indentation = ();
15593 # sub grind_batch_of_CODE receives sections of code which are the longest
15594 # possible lines without a break. In other words, it receives what is left
15595 # after applying all breaks forced by blank lines, block comments, side
15596 # comments, pod text, and structural braces. Its job is to break this code
15597 # down into smaller pieces, if necessary, which fit within the maximum
15598 # allowed line length. Then it sends the resulting lines of code on down
15599 # the pipeline to the VerticalAligner package, breaking the code into
15600 # continuation lines as necessary. The batch of tokens are in the "to_go"
15601 # arrays. The name 'grind' is slightly suggestive of a machine continually
15602 # breaking down long lines of code, but mainly it is unique and easy to
15603 # remember and find with an editor search.
15605 # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
15606 # together in the following way:
15608 # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
15609 # combines them into the largest sequences of tokens which might form a new
15611 # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
15614 # So sub 'process_line_of_CODE' builds up the longest possible continuous
15615 # sequences of tokens, regardless of line length, and then
15616 # grind_batch_of_CODE breaks these sequences back down into the new output
15619 # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
15621 use constant DEBUG_GRIND => 0;
15623 sub check_grind_input {
15625 # Check for valid input to sub grind_batch_of_CODE. An error here
15626 # would most likely be due to an error in 'sub store_token_to_go'.
15629 # Be sure there are tokens in the batch
15630 if ( $max_index_to_go < 0 ) {
15632 sub grind incorrectly called with max_index_to_go=$max_index_to_go
15635 my $Klimit = $self->[_Klimit_];
15637 # The local batch tokens must be a continuous part of the global token
15640 foreach my $ii ( 0 .. $max_index_to_go ) {
15644 $KK = $K_to_go[$ii];
15645 if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
15646 $KK = '(undef)' unless defined($KK);
15648 at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
15652 if ( $ii > 0 && $KK != $Km + 1 ) {
15655 Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
15660 } ## end sub check_grind_input
15662 # This filter speeds up a critical if-test
15666 my @q = qw# L { ( [ R ] ) } ? : f => #;
15668 @quick_filter{@q} = (1) x scalar(@q);
15671 sub grind_batch_of_CODE {
15675 #-----------------------------------------------------------------
15676 # This sub directs the formatting of one complete batch of tokens.
15677 # The tokens of the batch are in the '_to_go' arrays.
15678 #-----------------------------------------------------------------
15680 my $this_batch = $self->[_this_batch_];
15681 $this_batch->[_peak_batch_size_] = $peak_batch_size;
15682 $this_batch->[_batch_count_] = ++$batch_count;
15684 $self->check_grind_input() if (DEVEL_MODE);
15686 # This routine is only called from sub flush_batch_of_code, so that
15687 # routine is a better spot for debugging.
15688 DEBUG_GRIND && do {
15689 my $token = my $type = EMPTY_STRING;
15690 if ( $max_index_to_go >= 0 ) {
15691 $token = $tokens_to_go[$max_index_to_go];
15692 $type = $types_to_go[$max_index_to_go];
15694 my $output_str = EMPTY_STRING;
15695 if ( $max_index_to_go > 20 ) {
15696 my $mm = $max_index_to_go - 10;
15698 join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
15699 . join( EMPTY_STRING,
15700 @tokens_to_go[ $mm .. $max_index_to_go ] );
15703 $output_str = join EMPTY_STRING,
15704 @tokens_to_go[ 0 .. $max_index_to_go ];
15706 print STDERR <<EOM;
15707 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
15712 return if ( $max_index_to_go < 0 );
15714 if ($rOpts_line_up_parentheses) {
15715 $self->set_lp_indentation();
15718 #--------------------------------------------------
15719 # Shortcut for block comments
15720 # Note that this shortcut does not work for -lp yet
15721 #--------------------------------------------------
15722 elsif ( !$max_index_to_go && $types_to_go[0] eq '#' ) {
15724 $this_batch->[_ri_first_] = [$ibeg];
15725 $this_batch->[_ri_last_] = [$ibeg];
15726 $this_batch->[_rix_seqno_controlling_ci_] = [];
15728 $self->convey_batch_to_vertical_aligner();
15730 my $level = $levels_to_go[$ibeg];
15731 $self->[_last_last_line_leading_level_] =
15732 $self->[_last_line_leading_level_];
15733 $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
15734 $self->[_last_line_leading_level_] = $level;
15735 $nonblank_lines_at_depth[$level] = 1;
15743 my $rLL = $self->[_rLL_];
15745 #-------------------------------------------------------
15746 # Loop over the batch to initialize some batch variables
15747 #-------------------------------------------------------
15748 my $comma_count_in_batch = 0;
15749 my $ilast_nonblank = -1;
15751 my @ix_seqno_controlling_ci;
15752 my %comma_arrow_count;
15753 my $comma_arrow_count_contained = 0;
15754 my @unmatched_closing_indexes_in_this_batch;
15755 my @unmatched_opening_indexes_in_this_batch;
15757 my @i_for_semicolon;
15758 foreach my $i ( 0 .. $max_index_to_go ) {
15759 $iprev_to_go[$i] = $ilast_nonblank; # correct value
15760 $inext_to_go[$i] = $i + 1; # just a first guess
15762 next if ( $types_to_go[$i] eq 'b' );
15764 if ( $ilast_nonblank >= 0 ) {
15765 $inext_to_go[$ilast_nonblank] = $i; # correction
15767 $ilast_nonblank = $i;
15769 # This is an optional shortcut to save a bit of time by skipping
15770 # most tokens. Note: the filter may need to be updated if the
15771 # next 'if' tests are ever changed to include more token types.
15772 next if ( !$quick_filter{ $types_to_go[$i] } );
15774 my $type = $types_to_go[$i];
15776 # gather info needed by sub break_long_lines
15777 if ( $type_sequence_to_go[$i] ) {
15778 my $seqno = $type_sequence_to_go[$i];
15779 my $token = $tokens_to_go[$i];
15781 # remember indexes of any tokens controlling xci
15782 # in this batch. This list is needed by sub undo_ci.
15783 if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) {
15784 push @ix_seqno_controlling_ci, $i;
15787 if ( $is_opening_sequence_token{$token} ) {
15788 if ( $self->[_rwant_container_open_]->{$seqno} ) {
15789 $self->set_forced_breakpoint($i);
15791 push @unmatched_opening_indexes_in_this_batch, $i;
15792 if ( $type eq '?' ) {
15793 push @colon_list, $type;
15796 elsif ( $is_closing_sequence_token{$token} ) {
15798 if ( $i > 0 && $self->[_rwant_container_open_]->{$seqno} ) {
15799 $self->set_forced_breakpoint( $i - 1 );
15802 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
15803 if ( defined($i_mate) && $i_mate >= 0 ) {
15804 if ( $type_sequence_to_go[$i_mate] ==
15805 $type_sequence_to_go[$i] )
15807 $mate_index_to_go[$i] = $i_mate;
15808 $mate_index_to_go[$i_mate] = $i;
15809 my $cac = $comma_arrow_count{$seqno};
15810 $comma_arrow_count_contained += $cac if ($cac);
15813 push @unmatched_opening_indexes_in_this_batch,
15815 push @unmatched_closing_indexes_in_this_batch, $i;
15819 push @unmatched_closing_indexes_in_this_batch, $i;
15821 if ( $type eq ':' ) {
15822 push @colon_list, $type;
15824 } ## end elsif ( $is_closing_sequence_token...)
15826 } ## end if ($seqno)
15828 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
15829 elsif ( $type eq '=>' ) {
15830 if (@unmatched_opening_indexes_in_this_batch) {
15831 my $j = $unmatched_opening_indexes_in_this_batch[-1];
15832 my $seqno = $type_sequence_to_go[$j];
15833 $comma_arrow_count{$seqno}++;
15836 elsif ( $type eq 'f' ) {
15837 push @i_for_semicolon, $i;
15840 } ## end for ( my $i = 0 ; $i <=...)
15842 # Break at a single interior C-style for semicolon in this batch (c154)
15843 if ( @i_for_semicolon && @i_for_semicolon == 1 ) {
15844 my $i = $i_for_semicolon[0];
15845 my $inext = $inext_to_go[$i];
15846 if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) {
15847 $self->set_forced_breakpoint($i);
15851 my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
15852 @unmatched_closing_indexes_in_this_batch;
15854 if (@unmatched_opening_indexes_in_this_batch) {
15855 $this_batch->[_runmatched_opening_indexes_] =
15856 \@unmatched_opening_indexes_in_this_batch;
15859 #------------------------
15860 # Set special breakpoints
15861 #------------------------
15862 # If this line ends in a code block brace, set breaks at any
15863 # previous closing code block braces to breakup a chain of code
15864 # blocks on one line. This is very rare but can happen for
15865 # user-defined subs. For example we might be looking at this:
15866 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
15867 my $saw_good_break; # flag to force breaks even if short line
15870 # looking for opening or closing block brace
15871 $block_type_to_go[$max_index_to_go]
15873 # never any good breaks if just one token
15874 && $max_index_to_go > 0
15876 # but not one of these which are never duplicated on a line:
15877 # until|while|for|if|elsif|else
15878 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
15882 my $lev = $nesting_depth_to_go[$max_index_to_go];
15884 # Walk backwards from the end and
15885 # set break at any closing block braces at the same level.
15886 # But quit if we are not in a chain of blocks.
15887 foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
15888 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
15889 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
15891 if ( $block_type_to_go[$i] ) {
15892 if ( $tokens_to_go[$i] eq '}' ) {
15893 $self->set_forced_breakpoint($i);
15894 $saw_good_break = 1;
15898 # quit if we see anything besides words, function, blanks
15900 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
15904 #-----------------------------------------------
15905 # insertion of any blank lines before this batch
15906 #-----------------------------------------------
15909 my $imax = $max_index_to_go;
15911 # trim any blank tokens
15912 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
15913 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
15915 if ( $imin > $imax ) {
15917 my $K0 = $K_to_go[0];
15918 my $lno = EMPTY_STRING;
15919 if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
15921 Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
15927 my $last_line_leading_type = $self->[_last_line_leading_type_];
15928 my $last_line_leading_level = $self->[_last_line_leading_level_];
15929 my $last_last_line_leading_level =
15930 $self->[_last_last_line_leading_level_];
15932 # add blank line(s) before certain key types but not after a comment
15933 if ( $last_line_leading_type ne '#' ) {
15934 my $blank_count = 0;
15935 my $leading_token = $tokens_to_go[$imin];
15936 my $leading_type = $types_to_go[$imin];
15938 # break before certain key blocks except one-liners
15939 if ( $leading_type eq 'k' ) {
15940 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
15941 $blank_count = $rOpts->{'blank-lines-before-subs'}
15942 if ( terminal_type_i( $imin, $imax ) ne '}' );
15945 # Break before certain block types if we haven't had a
15946 # break at this level for a while. This is the
15947 # difficult decision..
15948 elsif ($last_line_leading_type ne 'b'
15949 && $is_if_unless_while_until_for_foreach{$leading_token} )
15951 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
15952 if ( !defined($lc) ) { $lc = 0 }
15954 # patch for RT #128216: no blank line inserted at a level
15956 if ( $levels_to_go[$imin] != $last_line_leading_level ) {
15960 if ( $rOpts->{'blanks-before-blocks'}
15961 && $lc >= $rOpts->{'long-block-line-count'}
15962 && $self->consecutive_nonblank_lines() >=
15963 $rOpts->{'long-block-line-count'}
15964 && terminal_type_i( $imin, $imax ) ne '}' )
15971 # blank lines before subs except declarations and one-liners
15972 elsif ( $leading_type eq 'i' ) {
15977 substr( $leading_token, 0, 3 ) eq 'sub'
15978 || $rOpts_sub_alias_list
15982 && $leading_token =~ /$SUB_PATTERN/
15985 $blank_count = $rOpts->{'blank-lines-before-subs'}
15986 if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
15989 # break before all package declarations
15990 elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
15992 # ... except in a very short eval block
15993 my $pseqno = $parent_seqno_to_go[$imin];
15994 $blank_count = $rOpts->{'blank-lines-before-packages'}
15995 if ( !$self->[_ris_short_broken_eval_block_]->{$pseqno} );
15999 # Check for blank lines wanted before a closing brace
16000 elsif ( $leading_token eq '}' ) {
16001 if ( $rOpts->{'blank-lines-before-closing-block'}
16002 && $block_type_to_go[$imin]
16003 && $block_type_to_go[$imin] =~
16004 /$blank_lines_before_closing_block_pattern/ )
16006 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
16007 if ( $nblanks > $blank_count ) {
16008 $blank_count = $nblanks;
16013 if ($blank_count) {
16015 # future: send blank line down normal path to VerticalAligner?
16016 $self->flush_vertical_aligner();
16017 my $file_writer_object = $self->[_file_writer_object_];
16018 $file_writer_object->require_blank_code_lines($blank_count);
16022 # update blank line variables and count number of consecutive
16023 # non-blank, non-comment lines at this level
16024 $last_last_line_leading_level = $last_line_leading_level;
16025 $last_line_leading_level = $levels_to_go[$imin];
16026 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
16027 $last_line_leading_type = $types_to_go[$imin];
16028 if ( $last_line_leading_level == $last_last_line_leading_level
16029 && $last_line_leading_type ne 'b'
16030 && $last_line_leading_type ne '#'
16031 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
16033 $nonblank_lines_at_depth[$last_line_leading_level]++;
16036 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
16039 $self->[_last_line_leading_type_] = $last_line_leading_type;
16040 $self->[_last_line_leading_level_] = $last_line_leading_level;
16041 $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
16043 #--------------------------
16044 # scan lists and long lines
16045 #--------------------------
16047 # Flag to remember if we called sub 'pad_array_to_go'.
16048 # Some routines (break_lists(), break_long_lines() ) need some
16049 # extra tokens added at the end of the batch. Most batches do not
16050 # use these routines, so we will avoid calling 'pad_array_to_go'
16051 # unless it is needed.
16052 my $called_pad_array_to_go;
16054 # set all forced breakpoints for good list formatting
16056 my $multiple_old_lines_in_batch;
16057 if ( $max_index_to_go > 0 ) {
16059 $self->excess_line_length( $imin, $max_index_to_go ) > 0;
16061 my $Kbeg = $K_to_go[0];
16062 my $Kend = $K_to_go[$max_index_to_go];
16063 $multiple_old_lines_in_batch =
16064 $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
16067 my $rbond_strength_bias = [];
16070 || $multiple_old_lines_in_batch
16072 # must always call break_lists() with unbalanced batches because
16073 # it is maintaining some stacks
16074 || $is_unbalanced_batch
16076 # call break_lists if we might want to break at commas
16078 $comma_count_in_batch
16079 && ( $rOpts_maximum_fields_per_table > 0
16080 && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
16081 || $rOpts_comma_arrow_breakpoints == 0 )
16084 # call break_lists if user may want to break open some one-line
16086 || ( $comma_arrow_count_contained
16087 && $rOpts_comma_arrow_breakpoints != 3 )
16090 # add a couple of extra terminal blank tokens
16091 $self->pad_array_to_go();
16092 $called_pad_array_to_go = 1;
16094 my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
16095 $saw_good_break ||= $sgb;
16098 # let $ri_first and $ri_last be references to lists of
16099 # first and last tokens of line fragments to output..
16100 my ( $ri_first, $ri_last );
16102 #-----------------------------
16103 # a single token uses one line
16104 #-----------------------------
16105 if ( !$max_index_to_go ) {
16106 $ri_first = [$imin];
16107 $ri_last = [$imax];
16110 # for multiple tokens
16113 #-------------------------
16114 # write a single line if..
16115 #-------------------------
16119 # this line is 'short'
16122 # and we didn't see a good breakpoint
16123 && !$saw_good_break
16125 # and we don't already have an interior breakpoint
16126 && !$forced_breakpoint_count
16129 # or, we aren't allowed to add any newlines
16130 || !$rOpts_add_newlines
16134 $ri_first = [$imin];
16135 $ri_last = [$imax];
16138 #-----------------------------
16139 # otherwise use multiple lines
16140 #-----------------------------
16143 # add a couple of extra terminal blank tokens if we haven't
16145 $self->pad_array_to_go() unless ($called_pad_array_to_go);
16147 ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
16148 $self->break_long_lines( $saw_good_break, \@colon_list,
16149 $rbond_strength_bias );
16151 $self->break_all_chain_tokens( $ri_first, $ri_last );
16153 $self->break_equals( $ri_first, $ri_last )
16154 if @{$ri_first} >= 3;
16156 # now we do a correction step to clean this up a bit
16157 # (The only time we would not do this is for debugging)
16158 $self->recombine_breakpoints( $ri_first, $ri_last,
16159 $rbond_strength_to_go )
16160 if ( $rOpts_recombine && @{$ri_first} > 1 );
16162 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
16166 $self->insert_breaks_before_list_opening_containers( $ri_first,
16168 if ( %break_before_container_types && $max_index_to_go > 0 );
16170 # Check for a phantom semicolon at the end of the batch
16171 if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
16172 $self->unmask_phantom_token($imax);
16175 if ( $rOpts_one_line_block_semicolons == 0 ) {
16176 $self->delete_one_line_semicolons( $ri_first, $ri_last );
16179 # Remember the largest batch size processed. This is needed by the
16180 # logical padding routine to avoid padding the first nonblank token
16181 if ( $max_index_to_go > $peak_batch_size ) {
16182 $peak_batch_size = $max_index_to_go;
16186 #-------------------
16187 # -lp corrector step
16188 #-------------------
16189 if ($rOpts_line_up_parentheses) {
16191 $self->correct_lp_indentation( $ri_first, $ri_last );
16192 $this_batch->[_do_not_pad_] = $do_not_pad;
16195 #--------------------
16196 # ship this batch out
16197 #--------------------
16198 $this_batch->[_ri_first_] = $ri_first;
16199 $this_batch->[_ri_last_] = $ri_last;
16200 $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
16202 $self->convey_batch_to_vertical_aligner();
16204 #-------------------------------------------------------------------
16205 # Write requested number of blank lines after an opening block brace
16206 #-------------------------------------------------------------------
16207 if ($rOpts_blank_lines_after_opening_block) {
16209 if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
16211 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
16216 if ( $types_to_go[$iterm] eq '{'
16217 && $block_type_to_go[$iterm]
16218 && $block_type_to_go[$iterm] =~
16219 /$blank_lines_after_opening_block_pattern/ )
16221 my $nblanks = $rOpts_blank_lines_after_opening_block;
16222 $self->flush_vertical_aligner();
16223 my $file_writer_object = $self->[_file_writer_object_];
16224 $file_writer_object->require_blank_code_lines($nblanks);
16229 } ## end sub grind_batch_of_CODE
16231 sub unmask_phantom_token {
16232 my ( $self, $iend ) = @_;
16234 # Turn a phantom token into a real token.
16237 # $iend = the index in the output batch array of this token.
16239 # Phantom tokens are specially marked token types (such as ';') with
16240 # no token text which only become real tokens if they occur at the end
16241 # of an output line. At one time phantom ',' tokens were handled
16242 # here, but now they are processed elsewhere.
16244 my $rLL = $self->[_rLL_];
16245 my $KK = $K_to_go[$iend];
16246 my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
16248 my $type = $types_to_go[$iend];
16249 return unless ( $type eq ';' );
16251 my $tok_len = length($tok);
16252 if ( $want_left_space{$type} != WS_NO ) {
16253 $tok = SPACE . $tok;
16257 $tokens_to_go[$iend] = $tok;
16258 $token_lengths_to_go[$iend] = $tok_len;
16260 $rLL->[$KK]->[_TOKEN_] = $tok;
16261 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
16263 $self->note_added_semicolon($line_number);
16265 # This changes the summed lengths of the rest of this batch
16266 foreach ( $iend .. $max_index_to_go ) {
16267 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
16272 sub save_opening_indentation {
16274 # This should be called after each batch of tokens is output. It
16275 # saves indentations of lines of all unmatched opening tokens.
16276 # These will be used by sub get_opening_indentation.
16278 my ( $self, $ri_first, $ri_last, $rindentation_list,
16279 $runmatched_opening_indexes )
16282 $runmatched_opening_indexes = []
16283 if ( !defined($runmatched_opening_indexes) );
16285 # QW INDENTATION PATCH 1:
16286 # Also save indentation for multiline qw quotes
16288 my $seqno_qw_opening;
16289 if ( $types_to_go[$max_index_to_go] eq 'q' ) {
16290 my $KK = $K_to_go[$max_index_to_go];
16291 $seqno_qw_opening =
16292 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
16293 if ($seqno_qw_opening) {
16294 push @i_qw, $max_index_to_go;
16298 # we need to save indentations of any unmatched opening tokens
16299 # in this batch because we may need them in a subsequent batch.
16300 foreach ( @{$runmatched_opening_indexes}, @i_qw ) {
16302 my $seqno = $type_sequence_to_go[$_];
16305 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
16306 $seqno = $seqno_qw_opening;
16311 $seqno = 'UNKNOWN';
16312 DEVEL_MODE && Fault("unable to find sequence number\n");
16316 $saved_opening_indentation{$seqno} = [
16317 lookup_opening_indentation(
16318 $_, $ri_first, $ri_last, $rindentation_list
16323 } ## end sub save_opening_indentation
16325 sub get_saved_opening_indentation {
16327 my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
16330 if ( $saved_opening_indentation{$seqno} ) {
16331 ( $indent, $offset, $is_leading ) =
16332 @{ $saved_opening_indentation{$seqno} };
16337 # some kind of serious error it doesn't exist
16338 # (example is badfile.t)
16340 return ( $indent, $offset, $is_leading, $exists );
16341 } ## end sub get_saved_opening_indentation
16342 } ## end closure grind_batch_of_CODE
16344 sub lookup_opening_indentation {
16346 # get the indentation of the line in the current output batch
16347 # which output a selected opening token
16350 # $i_opening - index of an opening token in the current output batch
16351 # whose line indentation we need
16352 # $ri_first - reference to list of the first index $i for each output
16353 # line in this batch
16354 # $ri_last - reference to list of the last index $i for each output line
16356 # $rindentation_list - reference to a list containing the indentation
16357 # used for each line. (NOTE: the first slot in
16358 # this list is the last returned line number, and this is
16359 # followed by the list of indentations).
16362 # -the indentation of the line which contained token $i_opening
16363 # -and its offset (number of columns) from the start of the line
16365 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
16367 if ( !@{$ri_last} ) {
16369 # An error here implies a bug introduced by a recent program change.
16370 # Every batch of code has lines, so this should never happen.
16372 Fault("Error in opening_indentation: no lines");
16374 return ( 0, 0, 0 );
16377 my $nline = $rindentation_list->[0]; # line number of previous lookup
16379 # reset line location if necessary
16380 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
16382 # find the correct line
16383 unless ( $i_opening > $ri_last->[-1] ) {
16384 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
16387 # Error - token index is out of bounds - shouldn't happen
16388 # A program bug has been introduced in one of the calling routines.
16389 # We better stop here.
16391 my $i_last_line = $ri_last->[-1];
16394 Program bug in call to lookup_opening_indentation - index out of range
16395 called with index i_opening=$i_opening > $i_last_line = max index of last line
16396 This batch has max index = $max_index_to_go,
16399 $nline = $#{$ri_last};
16402 $rindentation_list->[0] =
16403 $nline; # save line number to start looking next call
16404 my $ibeg = $ri_start->[$nline];
16405 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
16406 my $is_leading = ( $ibeg == $i_opening );
16407 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
16408 } ## end sub lookup_opening_indentation
16410 sub terminal_type_i {
16412 # returns type of last token on this line (terminal token), as follows:
16413 # returns # for a full-line comment
16414 # returns ' ' for a blank line
16415 # otherwise returns final token type
16417 my ( $ibeg, $iend ) = @_;
16419 # Start at the end and work backwards
16421 my $type_i = $types_to_go[$i];
16423 # Check for side comment
16424 if ( $type_i eq '#' ) {
16426 if ( $i < $ibeg ) {
16427 return wantarray ? ( $type_i, $ibeg ) : $type_i;
16429 $type_i = $types_to_go[$i];
16432 # Skip past a blank
16433 if ( $type_i eq 'b' ) {
16435 if ( $i < $ibeg ) {
16436 return wantarray ? ( $type_i, $ibeg ) : $type_i;
16438 $type_i = $types_to_go[$i];
16441 # Found it..make sure it is a BLOCK termination,
16442 # but hide a terminal } after sort/map/grep/eval/do because it is not
16443 # necessarily the end of the line. (terminal.t)
16444 my $block_type = $block_type_to_go[$i];
16448 || $is_sort_map_grep_eval_do{$block_type} )
16453 return wantarray ? ( $type_i, $i ) : $type_i;
16454 } ## end sub terminal_type_i
16456 sub pad_array_to_go {
16458 # To simplify coding in break_lists and set_bond_strengths, it helps to
16459 # create some extra blank tokens at the end of the arrays. We also add
16460 # some undef's to help guard against using invalid data.
16462 $K_to_go[ $max_index_to_go + 1 ] = undef;
16463 $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
16464 $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
16465 $tokens_to_go[ $max_index_to_go + 3 ] = undef;
16466 $types_to_go[ $max_index_to_go + 1 ] = 'b';
16467 $types_to_go[ $max_index_to_go + 2 ] = 'b';
16468 $types_to_go[ $max_index_to_go + 3 ] = undef;
16469 $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
16470 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
16471 $nesting_depth_to_go[$max_index_to_go];
16474 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
16475 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
16477 # Nesting depths are set to be >=0 in sub write_line, so it should
16478 # not be possible to get here unless the code has a bracing error
16479 # which leaves a closing brace with zero nesting depth.
16480 unless ( get_saw_brace_error() ) {
16483 Program bug in pad_array_to_go: hit nesting error which should have been caught
16489 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
16494 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
16495 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
16498 } ## end sub pad_array_to_go
16500 sub break_all_chain_tokens {
16502 # scan the current breakpoints looking for breaks at certain "chain
16503 # operators" (. : && || + etc) which often occur repeatedly in a long
16504 # statement. If we see a break at any one, break at all similar tokens
16505 # within the same container.
16507 my ( $self, $ri_left, $ri_right ) = @_;
16509 my %saw_chain_type;
16510 my %left_chain_type;
16511 my %right_chain_type;
16512 my %interior_chain_type;
16513 my $nmax = @{$ri_right} - 1;
16515 # scan the left and right end tokens of all lines
16517 for my $n ( 0 .. $nmax ) {
16518 my $il = $ri_left->[$n];
16519 my $ir = $ri_right->[$n];
16520 my $typel = $types_to_go[$il];
16521 my $typer = $types_to_go[$ir];
16522 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
16523 $typer = '+' if ( $typer eq '-' );
16524 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
16525 $typer = '*' if ( $typer eq '/' );
16527 my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
16528 my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
16529 if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
16530 next if ( $typel eq '?' );
16531 push @{ $left_chain_type{$keyl} }, $il;
16532 $saw_chain_type{$keyl} = 1;
16535 if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
16536 next if ( $typer eq '?' );
16537 push @{ $right_chain_type{$keyr} }, $ir;
16538 $saw_chain_type{$keyr} = 1;
16542 return unless $count;
16544 # now look for any interior tokens of the same types
16546 my $has_interior_dot_or_plus;
16547 for my $n ( 0 .. $nmax ) {
16548 my $il = $ri_left->[$n];
16549 my $ir = $ri_right->[$n];
16550 foreach my $i ( $il + 1 .. $ir - 1 ) {
16551 my $type = $types_to_go[$i];
16552 my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
16553 $key = '+' if ( $key eq '-' );
16554 $key = '*' if ( $key eq '/' );
16555 if ( $saw_chain_type{$key} ) {
16556 push @{ $interior_chain_type{$key} }, $i;
16558 $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' );
16562 return unless $count;
16564 my @keys = keys %saw_chain_type;
16566 # quit if just ONE continuation line with leading . For example--
16567 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16570 if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) {
16574 # now make a list of all new break points
16577 # loop over all chain types
16578 foreach my $key (@keys) {
16580 # loop over all interior chain tokens
16581 foreach my $itest ( @{ $interior_chain_type{$key} } ) {
16583 # loop over all left end tokens of same type
16584 if ( $left_chain_type{$key} ) {
16585 next if $nobreak_to_go[ $itest - 1 ];
16586 foreach my $i ( @{ $left_chain_type{$key} } ) {
16587 next unless $self->in_same_container_i( $i, $itest );
16588 push @insert_list, $itest - 1;
16590 # Break at matching ? if this : is at a different level.
16591 # For example, the ? before $THRf_DEAD in the following
16592 # should get a break if its : gets a break.
16595 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16596 # : ( $_ & 4 ) ? $THRf_R_DETACHED
16597 # : $THRf_R_JOINABLE;
16599 && $levels_to_go[$i] != $levels_to_go[$itest] )
16601 my $i_question = $mate_index_to_go[$itest];
16602 if ( $i_question > 0 ) {
16603 push @insert_list, $i_question - 1;
16610 # loop over all right end tokens of same type
16611 if ( $right_chain_type{$key} ) {
16612 next if $nobreak_to_go[$itest];
16613 foreach my $i ( @{ $right_chain_type{$key} } ) {
16614 next unless $self->in_same_container_i( $i, $itest );
16615 push @insert_list, $itest;
16617 # break at matching ? if this : is at a different level
16619 && $levels_to_go[$i] != $levels_to_go[$itest] )
16621 my $i_question = $mate_index_to_go[$itest];
16622 if ( $i_question >= 0 ) {
16623 push @insert_list, $i_question;
16632 # insert any new break points
16633 if (@insert_list) {
16634 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16637 } ## end sub break_all_chain_tokens
16639 sub insert_additional_breaks {
16641 # this routine will add line breaks at requested locations after
16642 # sub break_long_lines has made preliminary breaks.
16644 my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
16647 my $line_number = 0;
16648 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
16650 next if ( $nobreak_to_go[$i_break_left] );
16652 $i_f = $ri_first->[$line_number];
16653 $i_l = $ri_last->[$line_number];
16654 while ( $i_break_left >= $i_l ) {
16657 # shouldn't happen unless caller passes bad indexes
16658 if ( $line_number >= @{$ri_last} ) {
16661 Non-fatal program bug: couldn't set break at $i_break_left
16666 $i_f = $ri_first->[$line_number];
16667 $i_l = $ri_last->[$line_number];
16670 # Do not leave a blank at the end of a line; back up if necessary
16671 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
16673 my $i_break_right = $inext_to_go[$i_break_left];
16674 if ( $i_break_left >= $i_f
16675 && $i_break_left < $i_l
16676 && $i_break_right > $i_f
16677 && $i_break_right <= $i_l )
16679 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
16680 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
16684 } ## end sub insert_additional_breaks
16686 { ## begin closure in_same_container_i
16687 my $ris_break_token;
16688 my $ris_comma_token;
16692 # all cases break on seeing commas at same level
16695 @{$ris_comma_token}{@q} = (1) x scalar(@q);
16697 # Non-ternary text also breaks on seeing any of qw(? : || or )
16698 # Example: we would not want to break at any of these .'s
16699 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
16700 push @q, qw( or || ? : );
16701 @{$ris_break_token}{@q} = (1) x scalar(@q);
16704 sub in_same_container_i {
16706 # Check to see if tokens at i1 and i2 are in the same container, and
16707 # not separated by certain characters: => , ? : || or
16708 # This is an interface between the _to_go arrays to the rLL array
16709 my ( $self, $i1, $i2 ) = @_;
16712 my $parent_seqno_1 = $parent_seqno_to_go[$i1];
16713 return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
16715 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
16716 my $K1 = $K_to_go[$i1];
16717 my $K2 = $K_to_go[$i2];
16718 my $rLL = $self->[_rLL_];
16720 my $depth_1 = $nesting_depth_to_go[$i1];
16721 return if ( $depth_1 < 0 );
16723 # Shouldn't happen since i1 and i2 have same parent:
16724 return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
16726 # Select character set to scan for
16727 my $type_1 = $types_to_go[$i1];
16728 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
16730 # Fast preliminary loop to verify that tokens are in the same container
16733 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
16734 last if !defined($KK);
16735 last if ( $KK >= $K2 );
16736 my $ii = $i1 + $KK - $K1;
16737 my $depth_i = $nesting_depth_to_go[$ii];
16738 return if ( $depth_i < $depth_1 );
16739 next if ( $depth_i > $depth_1 );
16740 if ( $type_1 ne ':' ) {
16741 my $tok_i = $tokens_to_go[$ii];
16742 return if ( $tok_i eq '?' || $tok_i eq ':' );
16746 # Slow loop checking for certain characters
16748 #-----------------------------------------------------
16749 # This is potentially a slow routine and not critical.
16750 # For safety just give up for large differences.
16751 # See test file 'infinite_loop.txt'
16752 #-----------------------------------------------------
16753 return if ( $i2 - $i1 > 200 );
16755 foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
16757 my $depth_i = $nesting_depth_to_go[$ii];
16758 next if ( $depth_i > $depth_1 );
16759 return if ( $depth_i < $depth_1 );
16760 my $tok_i = $tokens_to_go[$ii];
16761 return if ( $rbreak->{$tok_i} );
16764 } ## end sub in_same_container_i
16765 } ## end closure in_same_container_i
16769 # Look for assignment operators that could use a breakpoint.
16770 # For example, in the following snippet
16772 # $HOME = $ENV{HOME}
16775 # || die "no home directory for user $<";
16777 # we could break at the = to get this, which is a little nicer:
16782 # || die "no home directory for user $<";
16784 # The logic here follows the logic in set_logical_padding, which
16785 # will add the padding in the second line to improve alignment.
16787 my ( $self, $ri_left, $ri_right ) = @_;
16788 my $nmax = @{$ri_right} - 1;
16789 return unless ( $nmax >= 2 );
16791 # scan the left ends of first two lines
16792 my $tokbeg = EMPTY_STRING;
16794 for my $n ( 1 .. 2 ) {
16795 my $il = $ri_left->[$n];
16796 my $typel = $types_to_go[$il];
16797 my $tokenl = $tokens_to_go[$il];
16798 my $keyl = $typel eq 'k' ? $tokenl : $typel;
16800 my $has_leading_op = $is_chain_operator{$keyl};
16801 return unless ($has_leading_op);
16804 unless ( $tokenl eq $tokbeg
16805 && $nesting_depth_to_go[$il] eq $depth_beg );
16808 $depth_beg = $nesting_depth_to_go[$il];
16811 # now look for any interior tokens of the same types
16812 my $il = $ri_left->[0];
16813 my $ir = $ri_right->[0];
16815 # now make a list of all new break points
16817 foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
16818 my $type = $types_to_go[$i];
16819 if ( $is_assignment{$type}
16820 && $nesting_depth_to_go[$i] eq $depth_beg )
16822 if ( $want_break_before{$type} ) {
16823 push @insert_list, $i - 1;
16826 push @insert_list, $i;
16831 # Break after a 'return' followed by a chain of operators
16832 # return ( $^O !~ /win32|dos/i )
16833 # && ( $^O ne 'VMS' )
16834 # && ( $^O ne 'OS2' )
16835 # && ( $^O ne 'MacOS' );
16838 # ( $^O !~ /win32|dos/i )
16839 # && ( $^O ne 'VMS' )
16840 # && ( $^O ne 'OS2' )
16841 # && ( $^O ne 'MacOS' );
16843 if ( $types_to_go[$i] eq 'k'
16844 && $tokens_to_go[$i] eq 'return'
16846 && $nesting_depth_to_go[$i] eq $depth_beg )
16848 push @insert_list, $i;
16851 return unless (@insert_list);
16853 # One final check...
16854 # scan second and third lines and be sure there are no assignments
16855 # we want to avoid breaking at an = to make something like this:
16857 # $html_icons{"$type-$state"}
16858 # or $icon = $html_icons{$type}
16859 # or $icon = $html_icons{$state} )
16860 for my $n ( 1 .. 2 ) {
16861 my $il_n = $ri_left->[$n];
16862 my $ir_n = $ri_right->[$n];
16863 foreach my $i ( $il_n + 1 .. $ir_n ) {
16864 my $type = $types_to_go[$i];
16866 if ( $is_assignment{$type}
16867 && $nesting_depth_to_go[$i] eq $depth_beg );
16871 # ok, insert any new break point
16872 if (@insert_list) {
16873 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16876 } ## end sub break_equals
16878 { ## begin closure recombine_breakpoints
16880 # This routine is called once per batch to see if it would be better
16881 # to combine some of the lines into which the batch has been broken.
16892 @is_amp_amp{@q} = (1) x scalar(@q);
16894 @q = qw( + - * / );
16895 @is_math_op{@q} = (1) x scalar(@q);
16898 @is_plus_minus{@q} = (1) x scalar(@q);
16901 @is_mult_div{@q} = (1) x scalar(@q);
16904 sub Debug_dump_breakpoints {
16906 # Debug routine to dump current breakpoints...not normally called
16907 # We are given indexes to the current lines:
16908 # $ri_beg = ref to array of BEGinning indexes of each line
16909 # $ri_end = ref to array of ENDing indexes of each line
16910 my ( $self, $ri_beg, $ri_end, $msg ) = @_;
16911 print STDERR "----Dumping breakpoints from: $msg----\n";
16912 for my $n ( 0 .. @{$ri_end} - 1 ) {
16913 my $ibeg = $ri_beg->[$n];
16914 my $iend = $ri_end->[$n];
16915 my $text = EMPTY_STRING;
16916 foreach my $i ( $ibeg .. $iend ) {
16917 $text .= $tokens_to_go[$i];
16919 print STDERR "$n ($ibeg:$iend) $text\n";
16921 print STDERR "----\n";
16923 } ## end sub Debug_dump_breakpoints
16925 sub delete_one_line_semicolons {
16927 my ( $self, $ri_beg, $ri_end ) = @_;
16928 my $rLL = $self->[_rLL_];
16929 my $K_opening_container = $self->[_K_opening_container_];
16931 # Walk down the lines of this batch and delete any semicolons
16932 # terminating one-line blocks;
16933 my $nmax = @{$ri_end} - 1;
16935 foreach my $n ( 0 .. $nmax ) {
16936 my $i_beg = $ri_beg->[$n];
16937 my $i_e = $ri_end->[$n];
16938 my $K_beg = $K_to_go[$i_beg];
16939 my $K_e = $K_to_go[$i_e];
16941 my $type_end = $rLL->[$K_end]->[_TYPE_];
16942 if ( $type_end eq '#' ) {
16943 $K_end = $self->K_previous_nonblank($K_end);
16944 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
16947 # we are looking for a line ending in closing brace
16949 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
16951 # ...and preceded by a semicolon on the same line
16952 my $K_semicolon = $self->K_previous_nonblank($K_end);
16953 next unless defined($K_semicolon);
16954 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
16955 next if ( $i_semicolon <= $i_beg );
16956 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
16958 # Safety check - shouldn't happen - not critical
16959 # This is not worth throwing a Fault, except in DEVEL_MODE
16960 if ( $types_to_go[$i_semicolon] ne ';' ) {
16962 && Fault("unexpected type looking for semicolon");
16966 # ... with the corresponding opening brace on the same line
16967 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
16968 my $K_opening = $K_opening_container->{$type_sequence};
16969 next unless ( defined($K_opening) );
16970 my $i_opening = $i_beg + ( $K_opening - $K_beg );
16971 next if ( $i_opening < $i_beg );
16973 # ... and only one semicolon between these braces
16974 my $semicolon_count = 0;
16975 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
16976 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
16977 $semicolon_count++;
16981 next if ($semicolon_count);
16983 # ...ok, then make the semicolon invisible
16984 my $len = $token_lengths_to_go[$i_semicolon];
16985 $tokens_to_go[$i_semicolon] = EMPTY_STRING;
16986 $token_lengths_to_go[$i_semicolon] = 0;
16987 $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
16988 $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
16989 foreach ( $i_semicolon .. $max_index_to_go ) {
16990 $summed_lengths_to_go[ $_ + 1 ] -= $len;
16994 } ## end sub delete_one_line_semicolons
16996 use constant DEBUG_RECOMBINE => 0;
16998 sub recombine_breakpoints {
17000 # We are given indexes to the current lines:
17001 # $ri_beg = ref to array of BEGinning indexes of each line
17002 # $ri_end = ref to array of ENDing indexes of each line
17003 my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
17005 # sub break_long_lines is very liberal in setting line breaks
17006 # for long lines, always setting breaks at good breakpoints, even
17007 # when that creates small lines. Sometimes small line fragments
17008 # are produced which would look better if they were combined.
17009 # That's the task of this routine.
17011 # do nothing under extreme stress
17012 return if ( $high_stress_level < 1 );
17014 my $rK_weld_right = $self->[_rK_weld_right_];
17015 my $rK_weld_left = $self->[_rK_weld_left_];
17017 my $nmax_start = @{$ri_end} - 1;
17018 return if ( $nmax_start <= 0 );
17020 #----------------------------------------------------------------
17021 # Break into small sub-sections to decrease the maximum n-squared
17022 # operations and avoid excess run time. See comments below.
17023 #----------------------------------------------------------------
17025 # Also make a list of all good joining tokens between the lines
17029 my $rsections = [];
17032 my $nmax_section = 0;
17033 foreach my $nn ( 1 .. $nmax_start ) {
17034 my $ibeg_1 = $ri_beg->[ $nn - 1 ];
17035 my $iend_1 = $ri_end->[ $nn - 1 ];
17036 my $iend_2 = $ri_end->[$nn];
17037 my $ibeg_2 = $ri_beg->[$nn];
17039 # Define certain good joint tokens
17040 my ( $itok, $itokp, $itokm );
17041 foreach my $itest ( $iend_1, $ibeg_2 ) {
17042 my $type = $types_to_go[$itest];
17043 if ( $is_math_op{$type}
17044 || $is_amp_amp{$type}
17045 || $is_assignment{$type}
17051 $joint[$nn] = [$itok];
17053 # Update the section list
17054 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
17058 # The number 5 here is an arbitrary small number intended
17059 # to keep most small matches in one sub-section.
17060 || ( defined($nend_sec)
17061 && ( $nn < 5 || $nmax_start - $nn < 5 ) )
17067 if ( defined($nend_sec) ) {
17068 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
17069 my $num = $nend_sec - $nbeg_sec;
17070 if ( $num > $nmax_section ) { $nmax_section = $num }
17078 if ( defined($nend_sec) ) {
17079 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
17080 my $num = $nend_sec - $nbeg_sec;
17081 if ( $num > $nmax_section ) { $nmax_section = $num }
17084 my $num_sections = @{$rsections};
17086 # This is potentially an O(n-squared) loop, but not critical, so we can
17087 # put a finite limit on the total number of iterations. This is
17088 # suggested by issue c118, which pushed about 5.e5 lines through here
17089 # and caused an excessive run time.
17091 # Three lines of defense have been put in place to prevent excessive
17093 # 1. do nothing if formatting under stress (c118 was under stress)
17094 # 2. break into small sub-sections to decrease the maximum n-squared.
17095 # 3. put a finite limit on the number of iterations.
17097 # Testing shows that most batches only require one or two iterations.
17098 # A very large batch which is broken into sub-sections can require one
17099 # iteration per section. This suggests the limit here, which allows
17100 # up to 10 iterations plus one pass per sub-section.
17103 10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
17105 if ( DEBUG_RECOMBINE > 1 ) {
17108 "-----\n$num_sections sections found for nmax=$nmax_start\n";
17109 foreach my $sect ( @{$rsections} ) {
17110 my ( $nbeg, $nend ) = @{$sect};
17111 my $num = $nend - $nbeg;
17112 if ( $num > $max ) { $max = $num }
17113 print STDERR "$nbeg $nend\n";
17115 print STDERR "max size=$max of $nmax_start lines\n";
17118 # Loop over all sub-sections. Note that we have to work backwards
17119 # from the end of the batch since the sections use original line
17120 # numbers, and the line numbers change as we go.
17122 while ( my $section = pop @{$rsections} ) {
17123 my ( $nbeg, $nend ) = @{$section};
17125 # number of ending lines to leave untouched in this pass
17126 my $nmax_sec = @{$ri_end} - 1;
17127 my $num_freeze = $nmax_sec - $nend;
17129 my $more_to_do = 1;
17131 # We keep looping over all of the lines of this batch
17132 # until there are no more possible recombinations
17133 my $nmax_last = $nmax_sec + 1;
17136 while ($more_to_do) {
17138 # Safety check for excess total iterations
17140 if ( $it_count > $it_count_max ) {
17146 my $nmax = @{$ri_end} - 1;
17148 # Safety check for infinite loop: the line count must decrease
17149 unless ( $nmax < $nmax_last ) {
17151 # Shouldn't happen because splice below decreases nmax on
17152 # each iteration. An error can only be due to a recent
17153 # programming change. We better stop here.
17156 "Program bug-infinite loop in recombine breakpoints\n"
17162 $nmax_last = $nmax;
17165 # Count lines with leading &&, ||, :, at any level.
17166 # This is used to avoid some recombinations which might
17168 my $rleading_amp_count;
17169 ${$rleading_amp_count} = 0;
17171 my $this_line_is_semicolon_terminated;
17173 # loop over all remaining lines in this batch
17174 my $nstop = $nmax - $num_freeze;
17175 for my $iter ( $nbeg + 1 .. $nstop ) {
17177 # alternating sweep direction gives symmetric results
17178 # for recombining lines which exceed the line length
17179 # such as eval {{{{.... }}}}
17181 if ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
17182 else { $n = $iter }
17184 #----------------------------------------------------------
17185 # If we join the current pair of lines,
17186 # line $n-1 will become the left part of the joined line
17187 # line $n will become the right part of the joined line
17189 # Here are Indexes of the endpoint tokens of the two lines:
17191 # -----line $n-1--- | -----line $n-----
17192 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
17195 # We want to decide if we should remove the line break
17196 # between the tokens at $iend_1 and $ibeg_2
17198 # We will apply a number of ad-hoc tests to see if joining
17199 # here will look ok. The code will just move to the next
17200 # pair if the join doesn't look good. If we get through
17201 # the gauntlet of tests, the lines will be recombined.
17202 #----------------------------------------------------------
17204 # beginning and ending tokens of the lines we are working on
17205 my $ibeg_1 = $ri_beg->[ $n - 1 ];
17206 my $iend_1 = $ri_end->[ $n - 1 ];
17207 my $iend_2 = $ri_end->[$n];
17208 my $ibeg_2 = $ri_beg->[$n];
17209 my $ibeg_nmax = $ri_beg->[$nmax];
17211 # combined line cannot be too long
17213 $self->excess_line_length( $ibeg_1, $iend_2, 1 );
17214 next if ( $excess > 0 );
17216 my $type_iend_1 = $types_to_go[$iend_1];
17217 my $type_iend_2 = $types_to_go[$iend_2];
17218 my $type_ibeg_1 = $types_to_go[$ibeg_1];
17219 my $type_ibeg_2 = $types_to_go[$ibeg_2];
17221 # terminal token of line 2 if any side comment is ignored:
17222 my $iend_2t = $iend_2;
17223 my $type_iend_2t = $type_iend_2;
17225 DEBUG_RECOMBINE > 1 && do {
17227 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
17230 # If line $n is the last line, we set some flags and
17231 # do any special checks for it
17232 if ( $n == $nmax ) {
17234 # a terminal '{' should stay where it is
17235 # unless preceded by a fat comma
17236 next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
17238 if ( $type_iend_2 eq '#'
17239 && $iend_2 - $ibeg_2 >= 2
17240 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
17242 $iend_2t = $iend_2 - 2;
17243 $type_iend_2t = $types_to_go[$iend_2t];
17246 $this_line_is_semicolon_terminated =
17247 $type_iend_2t eq ';';
17250 #----------------------------------------------------------
17251 # Recombine Section 0:
17252 # Examine the special token joining this line pair, if any.
17253 # Put as many tests in this section to avoid duplicate code
17254 # and to make formatting independent of whether breaks are
17255 # to the left or right of an operator.
17256 #----------------------------------------------------------
17258 # Note that parens around ($itok) are essential here:
17259 my ($itok) = @{ $joint[$n] };
17262 recombine_section_0( $itok, $ri_beg, $ri_end, $n,
17263 $rleading_amp_count );
17264 next if ( !$ok_0 );
17267 #----------------------------------------------------------
17268 # Recombine Section 1:
17269 # Join welded nested containers immediately
17270 #----------------------------------------------------------
17274 && ( $type_sequence_to_go[$iend_1]
17275 && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
17276 || $type_sequence_to_go[$ibeg_2]
17277 && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
17286 #----------------------------------------------------------
17287 # Recombine Section 2:
17288 # Examine token at $iend_1 (right end of first line of pair)
17289 #----------------------------------------------------------
17291 my ( $ok_2, $skip_Section_3 ) =
17292 recombine_section_2( $ri_beg, $ri_end, $n,
17293 $this_line_is_semicolon_terminated,
17294 $rleading_amp_count );
17295 next if ( !$ok_2 );
17297 #----------------------------------------------------------
17298 # Recombine Section 3:
17299 # Examine token at $ibeg_2 (left end of second line of pair)
17300 #----------------------------------------------------------
17302 # Join lines identified above as capable of
17303 # causing an outdented line with leading closing paren.
17304 # Note that we are skipping the rest of this section
17305 # and the rest of the loop to do the join.
17306 if ($skip_Section_3) {
17307 $forced_breakpoint_to_go[$iend_1] = 0;
17312 my ( $ok_3, $bs_tweak ) =
17313 recombine_section_3( $ri_beg, $ri_end, $n,
17314 $this_line_is_semicolon_terminated,
17315 $rleading_amp_count );
17316 next if ( !$ok_3 );
17318 #----------------------------------------------------------
17319 # Recombine Section 4:
17320 # Combine the lines if we arrive here and it is possible
17321 #----------------------------------------------------------
17323 # honor hard breakpoints
17324 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
17326 my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
17328 # Require a few extra spaces before recombining lines if we
17329 # are at an old breakpoint unless this is a simple list or
17330 # terminal line. The goal is to avoid oscillating between
17331 # two quasi-stable end states. For example this snippet
17336 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
17340 if ( $old_breakpoint_to_go[$iend_1]
17341 && !$this_line_is_semicolon_terminated
17344 && $type_iend_2 ne ',' );
17346 # do not recombine if we would skip in indentation levels
17347 if ( $n < $nmax ) {
17348 my $if_next = $ri_beg->[ $n + 1 ];
17351 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
17352 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
17354 # but an isolated 'if (' is undesirable
17357 && $iend_1 - $ibeg_1 <= 2
17358 && $type_ibeg_1 eq 'k'
17359 && $tokens_to_go[$ibeg_1] eq 'if'
17360 && $tokens_to_go[$iend_1] ne '('
17365 ## OLD: honor no-break's
17366 ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
17368 # remember the pair with the greatest bond strength
17375 if ( $bs > $bs_best ) {
17382 # recombine the pair with the greatest bond strength
17384 splice @{$ri_beg}, $n_best, 1;
17385 splice @{$ri_end}, $n_best - 1, 1;
17386 splice @joint, $n_best, 1;
17388 # keep going if we are still making progress
17391 } # end iteration loop
17393 } # end loop over sections
17395 if (DEBUG_RECOMBINE) {
17396 my $nmax_last = @{$ri_end} - 1;
17398 "exiting recombine with $nmax_last lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
17401 } ## end sub recombine_breakpoints
17403 sub recombine_section_0 {
17404 my ( $itok, $ri_beg, $ri_end, $n, $rleading_amp_count ) = @_;
17406 # Recombine Section 0:
17407 # Examine special candidate joining token $itok
17410 # $itok = index of token at a possible join of lines $n-1 and $n
17413 # true => ok to combine
17414 # false => do not combine lines
17416 # Here are Indexes of the endpoint tokens of the two lines:
17418 # -----line $n-1--- | -----line $n-----
17419 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
17422 # ------------$itok is one of these tokens
17424 # Put as many tests in this section to avoid duplicate code
17425 # and to make formatting independent of whether breaks are
17426 # to the left or right of an operator.
17428 my $nmax = @{$ri_end} - 1;
17429 my $ibeg_1 = $ri_beg->[ $n - 1 ];
17430 my $iend_1 = $ri_end->[ $n - 1 ];
17431 my $ibeg_2 = $ri_beg->[$n];
17432 my $iend_2 = $ri_end->[$n];
17436 my $type = $types_to_go[$itok];
17438 if ( $type eq ':' ) {
17440 # do not join at a colon unless it disobeys the
17442 if ( $itok eq $iend_1 ) {
17443 return unless $want_break_before{$type};
17446 ${$rleading_amp_count}++;
17447 return if $want_break_before{$type};
17451 # handle math operators + - * /
17452 elsif ( $is_math_op{$type} ) {
17454 # Combine these lines if this line is a single
17455 # number, or if it is a short term with same
17456 # operator as the previous line. For example, in
17457 # the following code we will combine all of the
17458 # short terms $A, $B, $C, $D, $E, $F, together
17459 # instead of leaving them one per line:
17461 # $A * $B * $C * $D * $E * $F *
17462 # ( 2. * $eps * $sigma * $area ) *
17463 # ( 1. / $tcold**3 - 1. / $thot**3 );
17465 # This can be important in math-intensive code.
17469 my $itokp = min( $inext_to_go[$itok], $iend_2 );
17470 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
17471 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
17472 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
17474 # check for a number on the right
17475 if ( $types_to_go[$itokp] eq 'n' ) {
17477 # ok if nothing else on right
17478 if ( $itokp == $iend_2 ) {
17483 # look one more token to right..
17484 # okay if math operator or some termination
17486 ( ( $itokpp == $iend_2 )
17487 && $is_math_op{ $types_to_go[$itokpp] } )
17488 || $types_to_go[$itokpp] =~ /^[#,;]$/;
17492 # check for a number on the left
17493 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
17495 # okay if nothing else to left
17496 if ( $itokm == $ibeg_1 ) {
17500 # otherwise look one more token to left
17503 # okay if math operator, comma, or assignment
17504 $good_combo = ( $itokmm == $ibeg_1 )
17505 && ( $is_math_op{ $types_to_go[$itokmm] }
17506 || $types_to_go[$itokmm] =~ /^[,]$/
17507 || $is_assignment{ $types_to_go[$itokmm] } );
17511 # look for a single short token either side of the
17513 if ( !$good_combo ) {
17515 # Slight adjustment factor to make results
17516 # independent of break before or after operator
17517 # in long summed lists. (An operator and a
17518 # space make two spaces).
17519 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
17523 # numbers or id's on both sides of this joint
17524 $types_to_go[$itokp] =~ /^[in]$/
17525 && $types_to_go[$itokm] =~ /^[in]$/
17527 # one of the two lines must be short:
17530 # no more than 2 nonblank tokens right
17535 && token_sequence_length( $itokp, $iend_2 ) <
17536 $two + $rOpts_short_concatenation_item_length
17539 # no more than 2 nonblank tokens left of
17544 && token_sequence_length( $ibeg_1, $itokm ) <
17545 2 - $two + $rOpts_short_concatenation_item_length
17550 # keep pure terms; don't mix +- with */
17552 $is_plus_minus{$type}
17553 && ( $is_mult_div{ $types_to_go[$itokmm] }
17554 || $is_mult_div{ $types_to_go[$itokpp] } )
17557 $is_mult_div{$type}
17558 && ( $is_plus_minus{ $types_to_go[$itokmm] }
17559 || $is_plus_minus{ $types_to_go[$itokpp] } )
17565 # it is also good to combine if we can reduce to 2
17567 if ( !$good_combo ) {
17569 # index on other line where same token would be
17571 my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
17576 && $types_to_go[$iother] ne $type;
17579 return unless ($good_combo);
17583 elsif ( $is_amp_amp{$type} ) {
17587 elsif ( $is_assignment{$type} ) {
17589 } ## end assignment
17592 # ok to combine lines
17594 } ## end sub recombine_section_0
17596 sub recombine_section_2 {
17598 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated,
17599 $rleading_amp_count )
17602 # Recombine Section 2:
17603 # Examine token at $iend_1 (right end of first line of pair)
17605 # Here are Indexes of the endpoint tokens of the two lines:
17607 # -----line $n-1--- | -----line $n-----
17608 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
17611 # -----Section 2 looks at this token
17614 # (nothing) => do not join lines
17615 # 1, skip_Section_3 => ok to join lines
17617 # $skip_Section_3 is a flag for skipping the next section
17618 my $skip_Section_3 = 0;
17620 my $nmax = @{$ri_end} - 1;
17621 my $ibeg_1 = $ri_beg->[ $n - 1 ];
17622 my $iend_1 = $ri_end->[ $n - 1 ];
17623 my $iend_2 = $ri_end->[$n];
17624 my $ibeg_2 = $ri_beg->[$n];
17625 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
17626 my $ibeg_nmax = $ri_beg->[$nmax];
17628 my $type_iend_1 = $types_to_go[$iend_1];
17629 my $type_iend_2 = $types_to_go[$iend_2];
17630 my $type_ibeg_1 = $types_to_go[$ibeg_1];
17631 my $type_ibeg_2 = $types_to_go[$ibeg_2];
17633 # an isolated '}' may join with a ';' terminated segment
17634 if ( $type_iend_1 eq '}' ) {
17636 # Check for cases where combining a semicolon terminated
17637 # statement with a previous isolated closing paren will
17638 # allow the combined line to be outdented. This is
17639 # generally a good move. For example, we can join up
17640 # the last two lines here:
17642 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17643 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17649 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17650 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17653 # which makes the parens line up.
17655 # Another example, from Joe Matarazzo, probably looks best
17656 # with the 'or' clause appended to the trailing paren:
17657 # $self->some_method(
17660 # ) or die "Some_method didn't work";
17662 # But we do not want to do this for something like the -lp
17663 # option where the paren is not outdentable because the
17664 # trailing clause will be far to the right.
17666 # The logic here is synchronized with the logic in sub
17667 # sub get_final_indentation, which actually does
17670 $skip_Section_3 ||= $this_line_is_semicolon_terminated
17672 # only one token on last line
17673 && $ibeg_1 == $iend_1
17675 # must be structural paren
17676 && $tokens_to_go[$iend_1] eq ')'
17678 # style must allow outdenting,
17679 && !$closing_token_indentation{')'}
17681 # only leading '&&', '||', and ':' if no others seen
17682 # (but note: our count made below could be wrong
17683 # due to intervening comments). Note that this
17684 # count includes these tokens at all levels. The idea is
17685 # that seeing these at any level can make it hard to read
17686 # formatting if we recombine.
17687 && ( !${$rleading_amp_count}
17688 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
17690 # but leading colons probably line up with a
17691 # previous colon or question (count could be wrong).
17692 && $type_ibeg_2 ne ':'
17694 # only one step in depth allowed. this line must not
17695 # begin with a ')' itself.
17696 && ( $nesting_depth_to_go[$iend_1] ==
17697 $nesting_depth_to_go[$iend_2] + 1 );
17699 # YVES patch 2 of 2:
17700 # Allow cuddled eval chains, like this:
17707 # This patch works together with a patch in
17708 # setting adjusted indentation (where the closing eval
17709 # brace is outdented if possible).
17710 # The problem is that an 'eval' block has continuation
17711 # indentation and it looks better to undo it in some
17712 # cases. If we do not use this patch we would get:
17720 # The alternative, for uncuddled style, is to create
17721 # a patch in get_final_indentation which undoes
17722 # the indentation of a leading line like 'or do {'.
17723 # This doesn't work well with -icb through
17725 $block_type_to_go[$iend_1] eq 'eval'
17726 && !ref( $leading_spaces_to_go[$iend_1] )
17727 && !$rOpts_indent_closing_brace
17728 && $tokens_to_go[$iend_2] eq '{'
17730 ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
17731 || ( $type_ibeg_2 eq 'k'
17732 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
17733 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
17737 $skip_Section_3 ||= 1;
17744 # handle '.' and '?' specially below
17745 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
17747 # fix for c054 (unusual -pbp case)
17748 || $type_ibeg_2 eq '=='
17753 elsif ( $type_iend_1 eq '{' ) {
17756 # honor breaks at opening brace
17757 # Added to prevent recombining something like this:
17758 # } || eval { package main;
17759 return if $forced_breakpoint_to_go[$iend_1];
17762 # do not recombine lines with ending &&, ||,
17763 elsif ( $is_amp_amp{$type_iend_1} ) {
17764 return unless $want_break_before{$type_iend_1};
17767 # Identify and recombine a broken ?/: chain
17768 elsif ( $type_iend_1 eq '?' ) {
17770 # Do not recombine different levels
17772 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
17774 # do not recombine unless next line ends in :
17775 return unless $type_iend_2 eq ':';
17778 # for lines ending in a comma...
17779 elsif ( $type_iend_1 eq ',' ) {
17781 # Do not recombine at comma which is following the
17783 # NOTE: this could be controlled by a special flag,
17784 # but it seems to work okay.
17785 return if ( $old_breakpoint_to_go[$iend_1] );
17787 # An isolated '},' may join with an identifier + ';'
17788 # This is useful for the class of a 'bless' statement
17790 if ( $type_ibeg_1 eq '}'
17791 && $type_ibeg_2 eq 'i' )
17794 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
17795 && ( $iend_2 == ( $ibeg_2 + 1 ) )
17796 && $this_line_is_semicolon_terminated );
17798 # override breakpoint
17799 $forced_breakpoint_to_go[$iend_1] = 0;
17805 # do not recombine after a comma unless this will
17806 # leave just 1 more line
17807 return unless ( $n + 1 >= $nmax );
17809 # do not recombine if there is a change in
17810 # indentation depth
17812 if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
17814 # do not recombine a "complex expression" after a
17815 # comma. "complex" means no parens.
17817 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
17818 if ( $tokens_to_go[$ii] eq '(' ) {
17823 return if $saw_paren;
17828 elsif ( $type_iend_1 eq '(' ) {
17830 # No longer doing this
17833 elsif ( $type_iend_1 eq ')' ) {
17835 # No longer doing this
17838 # keep a terminal for-semicolon
17839 elsif ( $type_iend_1 eq 'f' ) {
17843 # if '=' at end of line ...
17844 elsif ( $is_assignment{$type_iend_1} ) {
17846 # keep break after = if it was in input stream
17847 # this helps prevent 'blinkers'
17850 $old_breakpoint_to_go[$iend_1]
17852 # don't strand an isolated '='
17853 && $iend_1 != $ibeg_1
17856 my $is_short_quote =
17857 ( $type_ibeg_2 eq 'Q'
17858 && $ibeg_2 == $iend_2
17859 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
17860 $rOpts_short_concatenation_item_length );
17862 $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
17863 && $types_to_go[$ibeg_3] eq ':' )
17866 # always join an isolated '=', a short quote, or if this
17867 # will put ?/: at start of adjacent lines
17868 if ( $ibeg_1 != $iend_1
17869 && !$is_short_quote
17876 # unless we can reduce this to two lines
17879 # or three lines, the last with a leading
17881 || ( $nmax == $n + 2
17882 && $types_to_go[$ibeg_nmax] eq ';' )
17884 # or the next line ends with a here doc
17885 || $type_iend_2 eq 'h'
17887 # or the next line ends in an open paren or
17888 # brace and the break hasn't been forced
17890 || ( !$forced_breakpoint_to_go[$iend_1]
17891 && $type_iend_2 eq '{' )
17894 # do not recombine if the two lines might align
17895 # well this is a very approximate test for this
17898 # RT#127633 - the leading tokens are not
17900 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
17902 # or they are different
17904 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
17910 # Recombine if we can make two lines
17913 # -lp users often prefer this:
17914 # my $title = function($env, $env, $sysarea,
17915 # "bubba Borrower Entry");
17916 # so we will recombine if -lp is used we have
17920 && ref( $leading_spaces_to_go[$ibeg_3] )
17921 && $type_iend_2 eq ','
17926 # otherwise, scan the rhs line up to last token for
17927 # complexity. Note that we are not counting the last token
17928 # in case it is an opening paren.
17929 my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 );
17930 return if ( !$ok );
17935 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
17936 $forced_breakpoint_to_go[$iend_1] = 0;
17941 elsif ( $type_iend_1 eq 'k' ) {
17943 # make major control keywords stand out
17948 #/^(last|next|redo|return)$/
17949 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
17951 # but only if followed by multiple lines
17955 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
17957 unless $want_break_before{ $tokens_to_go[$iend_1] };
17960 return ( 1, $skip_Section_3 );
17961 } ## end sub recombine_section_2
17965 my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_;
17967 # Scan line ibeg_2 to $iend_2 up to last token for complexity.
17968 # We are not counting the last token in case it is an opening paren.
17970 # true if rhs is simple, ok to recombine
17974 my $depth = $nesting_depth_to_go[$ibeg_2];
17975 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
17976 if ( $nesting_depth_to_go[$i] != $depth ) {
17978 last if ( $tv > 1 );
17980 $depth = $nesting_depth_to_go[$i];
17983 # ok to recombine if no level changes before
17987 # otherwise, do not recombine if more than
17988 # two level changes.
17989 return if ( $tv > 1 );
17991 # check total complexity of the two
17992 # adjacent lines that will occur if we do
17996 ? $ri_end->[ $n + 1 ]
17998 foreach my $i ( $iend_2 .. $istop ) {
17999 if ( $nesting_depth_to_go[$i] != $depth ) {
18001 last if ( $tv > 2 );
18003 $depth = $nesting_depth_to_go[$i];
18006 # do not recombine if total is more than 2
18008 return if ( $tv > 2 );
18013 sub recombine_section_3 {
18015 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated,
18016 $rleading_amp_count )
18019 # Recombine Section 3:
18020 # Examine token at $ibeg_2 (right end of first line of pair)
18022 # Here are Indexes of the endpoint tokens of the two lines:
18024 # -----line $n-1--- | -----line $n-----
18025 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
18028 # -----Section 3 looks at this token
18031 # (nothing) => do not join lines
18032 # 1, bs_tweak => ok to join lines
18034 # $bstweak is a small tolerance to add to bond strengths
18037 my $nmax = @{$ri_end} - 1;
18038 my $ibeg_1 = $ri_beg->[ $n - 1 ];
18039 my $iend_1 = $ri_end->[ $n - 1 ];
18040 my $iend_2 = $ri_end->[$n];
18041 my $ibeg_2 = $ri_beg->[$n];
18043 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
18044 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
18045 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
18046 my $ibeg_nmax = $ri_beg->[$nmax];
18048 my $type_iend_1 = $types_to_go[$iend_1];
18049 my $type_iend_2 = $types_to_go[$iend_2];
18050 my $type_ibeg_1 = $types_to_go[$ibeg_1];
18051 my $type_ibeg_2 = $types_to_go[$ibeg_2];
18053 # handle lines with leading &&, ||
18054 if ( $is_amp_amp{$type_ibeg_2} ) {
18056 ${$rleading_amp_count}++;
18058 # ok to recombine if it follows a ? or :
18059 # and is followed by an open paren..
18061 ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' )
18063 # or is followed by a ? or : at same depth
18065 # We are looking for something like this. We can
18066 # recombine the && line with the line above to make the
18067 # structure more clear:
18069 # exists $G->{Attr}->{V}
18070 # && exists $G->{Attr}->{V}->{$u}
18071 # ? %{ $G->{Attr}->{V}->{$u} }
18074 # We should probably leave something like this alone:
18076 # exists $G->{Attr}->{E}
18077 # && exists $G->{Attr}->{E}->{$u}
18078 # && exists $G->{Attr}->{E}->{$u}->{$v}
18079 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
18081 # so that we either have all of the &&'s (or ||'s)
18082 # on one line, as in the first example, or break at
18083 # each one as in the second example. However, it
18084 # sometimes makes things worse to check for this because
18085 # it prevents multiple recombinations. So this is not done.
18087 && $is_ternary{ $types_to_go[$ibeg_3] }
18088 && $nesting_depth_to_go[$ibeg_3] ==
18089 $nesting_depth_to_go[$ibeg_2] );
18091 # Combine a trailing && term with an || term: fix for
18092 # c060 This is rare but can happen.
18095 && $type_ibeg_2 eq '&&'
18096 && $type_ibeg_1 eq '||'
18097 && $nesting_depth_to_go[$ibeg_2] ==
18098 $nesting_depth_to_go[$ibeg_1] );
18100 return if !$ok && $want_break_before{$type_ibeg_2};
18101 $forced_breakpoint_to_go[$iend_1] = 0;
18103 # tweak the bond strength to give this joint priority
18108 # Identify and recombine a broken ?/: chain
18109 elsif ( $type_ibeg_2 eq '?' ) {
18111 # Do not recombine different levels
18112 my $lev = $levels_to_go[$ibeg_2];
18113 return if ( $lev ne $levels_to_go[$ibeg_1] );
18115 # Do not recombine a '?' if either next line or
18116 # previous line does not start with a ':'. The reasons
18117 # are that (1) no alignment of the ? will be possible
18118 # and (2) the expression is somewhat complex, so the
18119 # '?' is harder to see in the interior of the line.
18120 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
18121 my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
18122 return unless ( $follows_colon || $precedes_colon );
18124 # we will always combining a ? line following a : line
18125 if ( !$follows_colon ) {
18127 # ...otherwise recombine only if it looks like a
18128 # chain. we will just look at a few nearby lines
18129 # to see if this looks like a chain.
18130 my $local_count = 0;
18131 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
18134 && $types_to_go[$ii] eq ':'
18135 && $levels_to_go[$ii] == $lev;
18137 return unless ( $local_count > 1 );
18139 $forced_breakpoint_to_go[$iend_1] = 0;
18142 # do not recombine lines with leading '.'
18143 elsif ( $type_ibeg_2 eq '.' ) {
18144 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
18148 # ... unless there is just one and we can reduce
18149 # this to two lines if we do. For example, this
18153 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
18155 # looks better than this:
18156 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
18157 # . '$args .= $pat;'
18159 ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 )
18161 # ... or this would strand a short quote , like this
18162 # . "some long quote"
18165 || ( $types_to_go[$i_next_nonblank] eq 'Q'
18166 && $i_next_nonblank >= $iend_2 - 1
18167 && $token_lengths_to_go[$i_next_nonblank] <
18168 $rOpts_short_concatenation_item_length )
18172 # handle leading keyword..
18173 elsif ( $type_ibeg_2 eq 'k' ) {
18175 # handle leading "or"
18176 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
18179 $this_line_is_semicolon_terminated
18181 $type_ibeg_1 eq '}'
18184 # following 'if' or 'unless' or 'or'
18185 $type_ibeg_1 eq 'k'
18186 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
18188 # important: only combine a very simple
18189 # or statement because the step below
18190 # may have combined a trailing 'and'
18191 # with this or, and we do not want to
18192 # then combine everything together
18193 && ( $iend_2 - $ibeg_2 <= 7 )
18199 $forced_breakpoint_to_go[$iend_1] = 0
18200 unless ( $old_breakpoint_to_go[$iend_1] );
18203 # handle leading 'and' and 'xor'
18204 elsif ($tokens_to_go[$ibeg_2] eq 'and'
18205 || $tokens_to_go[$ibeg_2] eq 'xor' )
18208 # Decide if we will combine a single terminal 'and'
18209 # after an 'if' or 'unless'.
18211 # This looks best with the 'and' on the same
18212 # line as the 'if':
18215 # if $seconds and $nu < 2;
18217 # But this looks better as shown:
18220 # if !$this->{Parents}{$_}
18221 # or $this->{Parents}{$_} eq $_;
18225 $this_line_is_semicolon_terminated
18228 # following 'if' or 'unless' or 'or'
18229 $type_ibeg_1 eq 'k'
18230 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
18231 || $tokens_to_go[$ibeg_1] eq 'or' )
18236 # handle leading "if" and "unless"
18237 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
18239 # Combine something like:
18241 # if ( $lang !~ /${l}$/i );
18243 # next if ( $lang !~ /${l}$/i );
18246 $this_line_is_semicolon_terminated
18248 # previous line begins with 'and' or 'or'
18249 && $type_ibeg_1 eq 'k'
18250 && $is_and_or{ $tokens_to_go[$ibeg_1] }
18255 # handle all other leading keywords
18258 # keywords look best at start of lines,
18259 # but combine things like "1 while"
18260 unless ( $is_assignment{$type_iend_1} ) {
18262 if ( ( $type_iend_1 ne 'k' )
18263 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
18268 # similar treatment of && and || as above for 'and' and
18269 # 'or': NOTE: This block of code is currently bypassed
18270 # because of a previous block but is retained for possible
18272 elsif ( $is_amp_amp{$type_ibeg_2} ) {
18274 # maybe looking at something like:
18275 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
18279 $this_line_is_semicolon_terminated
18281 # previous line begins with an 'if' or 'unless'
18283 && $type_ibeg_1 eq 'k'
18284 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
18289 # handle line with leading = or similar
18290 elsif ( $is_assignment{$type_ibeg_2} ) {
18291 return unless ( $n == 1 || $n == $nmax );
18292 return if ( $old_breakpoint_to_go[$iend_1] );
18296 # unless we can reduce this to two lines
18299 # or three lines, the last with a leading semicolon
18300 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
18302 # or the next line ends with a here doc
18303 || $type_iend_2 eq 'h'
18305 # or this is a short line ending in ;
18307 && $this_line_is_semicolon_terminated )
18309 $forced_breakpoint_to_go[$iend_1] = 0;
18311 return ( 1, $bs_tweak );
18312 } ## end sub recombine_section_3
18314 } ## end closure recombine_breakpoints
18316 sub insert_final_ternary_breaks {
18318 my ( $self, $ri_left, $ri_right ) = @_;
18320 # Called once per batch to look for and do any final line breaks for
18321 # long ternary chains
18323 my $nmax = @{$ri_right} - 1;
18325 # scan the left and right end tokens of all lines
18327 my $i_first_colon = -1;
18328 for my $n ( 0 .. $nmax ) {
18329 my $il = $ri_left->[$n];
18330 my $ir = $ri_right->[$n];
18331 my $typel = $types_to_go[$il];
18332 my $typer = $types_to_go[$ir];
18333 return if ( $typel eq '?' );
18334 return if ( $typer eq '?' );
18335 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
18336 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
18339 # For long ternary chains,
18340 # if the first : we see has its ? is in the interior
18341 # of a preceding line, then see if there are any good
18342 # breakpoints before the ?.
18343 if ( $i_first_colon > 0 ) {
18344 my $i_question = $mate_index_to_go[$i_first_colon];
18345 if ( $i_question > 0 ) {
18347 foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
18348 my $token = $tokens_to_go[$ii];
18349 my $type = $types_to_go[$ii];
18351 # For now, a good break is either a comma or,
18352 # in a long chain, a 'return'.
18353 # Patch for RT #126633: added the $nmax>1 check to avoid
18354 # breaking after a return for a simple ternary. For longer
18355 # chains the break after return allows vertical alignment, so
18356 # it is still done. So perltidy -wba='?' will not break
18357 # immediately after the return in the following statement:
18359 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
18360 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
18365 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
18367 && $self->in_same_container_i( $ii, $i_question )
18370 push @insert_list, $ii;
18375 # insert any new break points
18376 if (@insert_list) {
18377 $self->insert_additional_breaks( \@insert_list, $ri_left,
18383 } ## end sub insert_final_ternary_breaks
18385 sub insert_breaks_before_list_opening_containers {
18387 my ( $self, $ri_left, $ri_right ) = @_;
18389 # This routine is called once per batch to implement the parameters
18390 # --break-before-hash-brace, etc.
18392 # Nothing to do if none of these parameters has been set
18393 return unless %break_before_container_types;
18395 my $nmax = @{$ri_right} - 1;
18396 return unless ( $nmax >= 0 );
18398 my $rLL = $self->[_rLL_];
18400 my $rbreak_before_container_by_seqno =
18401 $self->[_rbreak_before_container_by_seqno_];
18402 my $rK_weld_left = $self->[_rK_weld_left_];
18404 # scan the ends of all lines
18406 for my $n ( 0 .. $nmax ) {
18407 my $il = $ri_left->[$n];
18408 my $ir = $ri_right->[$n];
18409 next unless ( $ir > $il );
18410 my $Kl = $K_to_go[$il];
18411 my $Kr = $K_to_go[$ir];
18413 my $type_end = $rLL->[$Kr]->[_TYPE_];
18415 # Backup before any side comment
18416 if ( $type_end eq '#' ) {
18417 $Kend = $self->K_previous_nonblank($Kr);
18418 next unless defined($Kend);
18419 $type_end = $rLL->[$Kend]->[_TYPE_];
18422 # Backup to the start of any weld; fix for b1173.
18423 if ($total_weld_count) {
18424 my $Kend_test = $rK_weld_left->{$Kend};
18425 if ( defined($Kend_test) && $Kend_test > $Kl ) {
18426 $Kend = $Kend_test;
18427 $Kend_test = $rK_weld_left->{$Kend};
18430 # Do not break if we did not back up to the start of a weld
18431 # (shouldn't happen)
18432 next if ( defined($Kend_test) );
18435 my $token = $rLL->[$Kend]->[_TOKEN_];
18436 next unless ( $is_opening_token{$token} );
18437 next unless ( $Kl < $Kend - 1 );
18439 my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
18440 next unless ( defined($seqno) );
18442 # Use the flag which was previously set
18443 next unless ( $rbreak_before_container_by_seqno->{$seqno} );
18445 # Install a break before this opening token.
18446 my $Kbreak = $self->K_previous_nonblank($Kend);
18447 my $ibreak = $Kbreak - $Kl + $il;
18448 next if ( $ibreak < $il );
18449 next if ( $nobreak_to_go[$ibreak] );
18450 push @insert_list, $ibreak;
18453 # insert any new break points
18454 if (@insert_list) {
18455 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18458 } ## end sub insert_breaks_before_list_opening_containers
18460 sub note_added_semicolon {
18461 my ( $self, $line_number ) = @_;
18462 $self->[_last_added_semicolon_at_] = $line_number;
18463 if ( $self->[_added_semicolon_count_] == 0 ) {
18464 $self->[_first_added_semicolon_at_] = $line_number;
18466 $self->[_added_semicolon_count_]++;
18467 write_logfile_entry("Added ';' here\n");
18469 } ## end sub note_added_semicolon
18471 sub note_deleted_semicolon {
18472 my ( $self, $line_number ) = @_;
18473 $self->[_last_deleted_semicolon_at_] = $line_number;
18474 if ( $self->[_deleted_semicolon_count_] == 0 ) {
18475 $self->[_first_deleted_semicolon_at_] = $line_number;
18477 $self->[_deleted_semicolon_count_]++;
18478 write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
18480 } ## end sub note_deleted_semicolon
18482 sub note_embedded_tab {
18483 my ( $self, $line_number ) = @_;
18484 $self->[_embedded_tab_count_]++;
18485 $self->[_last_embedded_tab_at_] = $line_number;
18486 if ( !$self->[_first_embedded_tab_at_] ) {
18487 $self->[_first_embedded_tab_at_] = $line_number;
18490 if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
18491 write_logfile_entry("Embedded tabs in quote or pattern\n");
18494 } ## end sub note_embedded_tab
18496 use constant DEBUG_CORRECT_LP => 0;
18498 sub correct_lp_indentation {
18500 # When the -lp option is used, we need to make a last pass through
18501 # each line to correct the indentation positions in case they differ
18502 # from the predictions. This is necessary because perltidy uses a
18503 # predictor/corrector method for aligning with opening parens. The
18504 # predictor is usually good, but sometimes stumbles. The corrector
18505 # tries to patch things up once the actual opening paren locations
18507 my ( $self, $ri_first, $ri_last ) = @_;
18508 my $K_opening_container = $self->[_K_opening_container_];
18509 my $K_closing_container = $self->[_K_closing_container_];
18510 my $do_not_pad = 0;
18512 # Note on flag '$do_not_pad':
18513 # We want to avoid a situation like this, where the aligner inserts
18514 # whitespace before the '=' to align it with a previous '=', because
18515 # otherwise the parens might become mis-aligned in a situation like
18516 # this, where the '=' has become aligned with the previous line,
18517 # pushing the opening '(' forward beyond where we want it.
18519 # $mkFloor::currentRoom = '';
18520 # $mkFloor::c_entry = $c->Entry(
18522 # -relief => 'sunken',
18526 # We leave it to the aligner to decide how to do this.
18528 # first remove continuation indentation if appropriate
18529 my $rLL = $self->[_rLL_];
18530 my $max_line = @{$ri_first} - 1;
18532 #---------------------------------------------------------------------------
18533 # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
18534 #---------------------------------------------------------------------------
18536 # The point is that sub 'starting_one_line_block' made one-line blocks based
18537 # on default indentation, not -lp indentation. So some of the one-line
18538 # blocks may be too long when given -lp indentation. We will fix that now
18539 # if possible, using the list of these closing block indexes.
18540 my $ri_starting_one_line_block =
18541 $self->[_this_batch_]->[_ri_starting_one_line_block_];
18542 if ( @{$ri_starting_one_line_block} ) {
18543 my @ilist = @{$ri_starting_one_line_block};
18544 my $inext = shift(@ilist);
18546 # loop over lines, checking length of each with a one-line block
18547 my ( $ibeg, $iend );
18548 foreach my $line ( 0 .. $max_line ) {
18549 $iend = $ri_last->[$line];
18550 next if ( $inext > $iend );
18551 $ibeg = $ri_first->[$line];
18553 # This is just for lines with indentation objects (c098)
18555 ref( $leading_spaces_to_go[$ibeg] )
18556 ? $self->excess_line_length( $ibeg, $iend )
18559 if ( $excess > 0 ) {
18560 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
18562 if ( $available_spaces > 0 ) {
18563 my $delete_want = min( $available_spaces, $excess );
18564 my $deleted_spaces =
18565 $self->reduce_lp_indentation( $ibeg, $delete_want );
18566 $available_spaces =
18567 $self->get_available_spaces_to_go($ibeg);
18571 # skip forward to next one-line block to check
18573 $inext = shift @ilist;
18574 next if ( $inext <= $iend );
18575 last if ( $inext > $iend );
18577 last if ( $inext <= $iend );
18581 #-------------------------------------------------------------------
18582 # PASS 2: look for and fix other problems in each line of this batch
18583 #-------------------------------------------------------------------
18585 # look at each output line ...
18586 my ( $ibeg, $iend );
18587 foreach my $line ( 0 .. $max_line ) {
18588 $ibeg = $ri_first->[$line];
18589 $iend = $ri_last->[$line];
18591 # looking at each token in this output line ...
18592 foreach my $i ( $ibeg .. $iend ) {
18594 # How many space characters to place before this token
18595 # for special alignment. Actual padding is done in the
18598 # looking for next unvisited indentation item ...
18599 my $indentation = $leading_spaces_to_go[$i];
18601 # This is just for indentation objects (c098)
18602 next unless ( ref($indentation) );
18604 # Visit each indentation object just once
18605 next if ( $indentation->get_marked() );
18608 $indentation->set_marked(1);
18610 # Skip indentation objects which do not align with container tokens
18611 my $align_seqno = $indentation->get_align_seqno();
18612 next unless ($align_seqno);
18614 # Skip a container which is entirely on this line
18615 my $Ko = $K_opening_container->{$align_seqno};
18616 my $Kc = $K_closing_container->{$align_seqno};
18617 if ( defined($Ko) && defined($Kc) ) {
18618 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
18621 if ( $line == 1 && $i == $ibeg ) {
18625 #--------------------------------------------
18626 # Now see what the error is and try to fix it
18627 #--------------------------------------------
18628 my $closing_index = $indentation->get_closed();
18629 my $predicted_pos = $indentation->get_spaces();
18631 # Find actual position:
18634 if ( $i == $ibeg ) {
18636 # Case 1: token is first character of of batch - table lookup
18637 if ( $line == 0 ) {
18639 $actual_pos = $predicted_pos;
18641 my ( $indent, $offset, $is_leading, $exists ) =
18642 get_saved_opening_indentation($align_seqno);
18643 if ( defined($indent) ) {
18645 # NOTE: we could use '1' here if no space after
18646 # opening and '2' if want space; it is hardwired at 1
18647 # like -gnu-style. But it is probably best to leave
18648 # this alone because changing it would change
18649 # formatting of much existing code without any
18650 # significant benefit.
18651 $actual_pos = get_spaces($indent) + $offset + 1;
18655 # Case 2: token starts a new line - use length of previous line
18658 my $ibegm = $ri_first->[ $line - 1 ];
18659 my $iendm = $ri_last->[ $line - 1 ];
18660 $actual_pos = total_line_length( $ibegm, $iendm );
18664 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
18669 # Case 3: $i>$ibeg: token is mid-line - use length to previous token
18672 $actual_pos = total_line_length( $ibeg, $i - 1 );
18674 # for mid-line token, we must check to see if all
18675 # additional lines have continuation indentation,
18676 # and remove it if so. Otherwise, we do not get
18678 if ( $closing_index > $iend ) {
18679 my $ibeg_next = $ri_first->[ $line + 1 ];
18680 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
18681 $self->undo_lp_ci( $line, $i, $closing_index,
18682 $ri_first, $ri_last );
18687 # By how many spaces (plus or minus) would we need to increase the
18688 # indentation to get alignment with the opening token?
18689 my $move_right = $actual_pos - $predicted_pos;
18691 if (DEBUG_CORRECT_LP) {
18692 my $tok = substr( $tokens_to_go[$i], 0, 8 );
18693 my $avail = $self->get_available_spaces_to_go($ibeg);
18695 "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
18698 # nothing more to do if no error to correct (gnu2.t)
18699 if ( $move_right == 0 ) {
18700 $indentation->set_recoverable_spaces($move_right);
18704 # Get any collapsed length defined for -xlp
18705 my $collapsed_length =
18706 $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
18707 $collapsed_length = 0 unless ( defined($collapsed_length) );
18709 if (DEBUG_CORRECT_LP) {
18711 "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
18714 # if we have not seen closure for this indentation in this batch,
18715 # and do not have a collapsed length estimate, we can only pass on
18716 # a request to the vertical aligner
18717 if ( $closing_index < 0 && !$collapsed_length ) {
18718 $indentation->set_recoverable_spaces($move_right);
18722 # If necessary, look ahead to see if there is really any leading
18723 # whitespace dependent on this whitespace, and also find the
18724 # longest line using this whitespace. Since it is always safe to
18725 # move left if there are no dependents, we only need to do this if
18726 # we may have dependent nodes or need to move right.
18728 my $have_child = $indentation->get_have_child();
18729 my %saw_indentation;
18730 my $line_count = 1;
18731 $saw_indentation{$indentation} = $indentation;
18733 # How far can we move right before we hit the limit?
18734 # let $right_margen = the number of spaces that we can increase
18735 # the current indentation before hitting the maximum line length.
18736 my $right_margin = 0;
18738 if ( $have_child || $move_right > 0 ) {
18741 # include estimated collapsed length for incomplete containers
18742 my $max_length = 0;
18743 if ( $Kc > $K_to_go[$max_index_to_go] ) {
18744 $max_length = $collapsed_length + $predicted_pos;
18747 if ( $i == $ibeg ) {
18748 my $length = total_line_length( $ibeg, $iend );
18749 if ( $length > $max_length ) { $max_length = $length }
18752 # look ahead at the rest of the lines of this batch..
18753 foreach my $line_t ( $line + 1 .. $max_line ) {
18754 my $ibeg_t = $ri_first->[$line_t];
18755 my $iend_t = $ri_last->[$line_t];
18756 last if ( $closing_index <= $ibeg_t );
18758 # remember all different indentation objects
18759 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
18760 $saw_indentation{$indentation_t} = $indentation_t;
18763 # remember longest line in the group
18764 my $length_t = total_line_length( $ibeg_t, $iend_t );
18765 if ( $length_t > $max_length ) {
18766 $max_length = $length_t;
18771 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
18773 if ( $right_margin < 0 ) { $right_margin = 0 }
18776 my $first_line_comma_count =
18777 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
18778 my $comma_count = $indentation->get_comma_count();
18779 my $arrow_count = $indentation->get_arrow_count();
18781 # This is a simple approximate test for vertical alignment:
18782 # if we broke just after an opening paren, brace, bracket,
18783 # and there are 2 or more commas in the first line,
18784 # and there are no '=>'s,
18785 # then we are probably vertically aligned. We could set
18786 # an exact flag in sub break_lists, but this is good
18788 my $indentation_count = keys %saw_indentation;
18789 my $is_vertically_aligned =
18791 && $first_line_comma_count > 1
18792 && $indentation_count == 1
18793 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
18795 # Make the move if possible ..
18798 # we can always move left
18803 # incomplete container
18804 || ( $rOpts_extended_line_up_parentheses
18805 && $Kc > $K_to_go[$max_index_to_go] )
18806 || $closing_index < 0
18808 # but we should only move right if we are sure it will
18809 # not spoil vertical alignment
18810 || ( $comma_count == 0 )
18811 || ( $comma_count > 0 && !$is_vertically_aligned )
18815 ( $move_right <= $right_margin )
18819 if (DEBUG_CORRECT_LP) {
18821 "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
18824 foreach ( keys %saw_indentation ) {
18825 $saw_indentation{$_}
18826 ->permanently_decrease_available_spaces( -$move );
18830 # Otherwise, record what we want and the vertical aligner
18831 # will try to recover it.
18833 $indentation->set_recoverable_spaces($move_right);
18835 } ## end loop over tokens in a line
18836 } ## end loop over lines
18837 return $do_not_pad;
18838 } ## end sub correct_lp_indentation
18842 # If there is a single, long parameter within parens, like this:
18844 # $self->command( "/msg "
18845 # . $infoline->chan
18846 # . " You said $1, but did you know that it's square was "
18847 # . $1 * $1 . " ?" );
18849 # we can remove the continuation indentation of the 2nd and higher lines
18850 # to achieve this effect, which is more pleasing:
18852 # $self->command("/msg "
18853 # . $infoline->chan
18854 # . " You said $1, but did you know that it's square was "
18855 # . $1 * $1 . " ?");
18857 my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
18859 my $max_line = @{$ri_first} - 1;
18861 # must be multiple lines
18862 return unless $max_line > $line_open;
18864 my $lev_start = $levels_to_go[$i_start];
18865 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
18867 # see if all additional lines in this container have continuation
18869 my $line_1 = 1 + $line_open;
18870 my $n = $line_open;
18872 while ( ++$n <= $max_line ) {
18873 my $ibeg = $ri_first->[$n];
18874 my $iend = $ri_last->[$n];
18875 if ( $ibeg eq $closing_index ) { $n--; last }
18876 return if ( $lev_start != $levels_to_go[$ibeg] );
18877 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
18878 last if ( $closing_index <= $iend );
18881 # we can reduce the indentation of all continuation lines
18882 my $continuation_line_count = $n - $line_open;
18883 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
18884 (0) x ($continuation_line_count);
18885 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
18886 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
18888 } ## end sub undo_lp_ci
18890 ###############################################
18891 # CODE SECTION 10: Code to break long statments
18892 ###############################################
18894 use constant DEBUG_BREAK_LINES => 0;
18896 sub break_long_lines {
18898 #-----------------------------------------------------------
18899 # Break a batch of tokens into lines which do not exceed the
18900 # maximum line length.
18901 #-----------------------------------------------------------
18903 my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
18905 # Input parameters:
18906 # $saw_good_break - a flag set by break_lists
18907 # $rcolon_list - ref to a list of all the ? and : tokens in the batch,
18909 # $rbond_strength_bias - small bond strength bias values set by break_lists
18911 # Output: returns references to the arrays:
18914 # which contain the indexes $i of the first and last tokens on each
18917 # In addition, the array:
18918 # $forced_breakpoint_to_go[$i]
18919 # may be updated to be =1 for any index $i after which there must be
18920 # a break. This signals later routines not to undo the breakpoint.
18923 # This routine is called if a statement is longer than the maximum line
18924 # length, or if a preliminary scanning located desirable break points.
18925 # Sub break_lists has already looked at these tokens and set breakpoints
18926 # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for
18927 # example after commas, after opening parens, and before closing parens).
18928 # This routine will honor these breakpoints and also add additional
18929 # breakpoints as necessary to keep the line length below the maximum
18930 # requested. It bases its decision on where the 'bond strength' is
18933 my @i_first = (); # the first index to output
18934 my @i_last = (); # the last index to output
18935 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
18936 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
18938 # Get the 'bond strengths' between tokens
18939 my $rbond_strength_to_go = $self->set_bond_strengths();
18941 # Add any comma bias set by break_lists
18942 if ( @{$rbond_strength_bias} ) {
18943 foreach my $item ( @{$rbond_strength_bias} ) {
18944 my ( $ii, $bias ) = @{$item};
18945 if ( $ii >= 0 && $ii <= $max_index_to_go ) {
18946 $rbond_strength_to_go->[$ii] += $bias;
18948 elsif (DEVEL_MODE) {
18949 my $KK = $K_to_go[0];
18950 my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
18952 "Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
18959 my $imax = $max_index_to_go;
18960 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
18961 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
18963 my $i_begin = $imin;
18964 my $last_break_strength = NO_BREAK;
18965 my $i_last_break = -1;
18966 my $line_count = 0;
18968 # see if any ?/:'s are in order
18969 my $colons_in_order = 1;
18970 my $last_tok = EMPTY_STRING;
18971 foreach ( @{$rcolon_list} ) {
18972 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
18976 # This is a sufficient but not necessary condition for colon chain
18977 my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
18979 #------------------------------------------
18980 # BEGINNING of main loop to set breakpoints
18981 # Keep iterating until we reach the end
18982 #------------------------------------------
18983 while ( $i_begin <= $imax ) {
18985 #------------------------------------------------------------------
18986 # Find the best next breakpoint based on token-token bond strengths
18987 #------------------------------------------------------------------
18988 my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) =
18989 $self->break_lines_inner_loop(
18994 $last_break_strength,
18996 $rbond_strength_to_go,
19001 # Now make any adjustments required by ternary breakpoint rules
19002 if ( @{$rcolon_list} ) {
19004 my $i_next_nonblank = $inext_to_go[$i_lowest];
19006 #-------------------------------------------------------
19007 # ?/: rule 1 : if a break here will separate a '?' on this
19008 # line from its closing ':', then break at the '?' instead.
19009 # But do not break a sequential chain of ?/: statements
19010 #-------------------------------------------------------
19011 if ( !$is_colon_chain ) {
19012 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
19013 next unless ( $tokens_to_go[$i] eq '?' );
19015 # do not break if statement is broken by side comment
19017 if ( $tokens_to_go[$max_index_to_go] eq '#'
19018 && terminal_type_i( 0, $max_index_to_go ) !~
19021 # no break needed if matching : is also on the line
19023 if ( $mate_index_to_go[$i] >= 0
19024 && $mate_index_to_go[$i] <= $i_next_nonblank );
19027 if ( $want_break_before{'?'} ) { $i_lowest-- }
19028 $i_next_nonblank = $inext_to_go[$i_lowest];
19033 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
19035 #-------------------------------------------------------------
19036 # ?/: rule 2 : if we break at a '?', then break at its ':'
19038 # Note: this rule is also in sub break_lists to handle a break
19039 # at the start and end of a line (in case breaks are dictated
19040 # by side comments).
19041 #-------------------------------------------------------------
19042 if ( $next_nonblank_type eq '?' ) {
19043 $self->set_closing_breakpoint($i_next_nonblank);
19045 elsif ( $types_to_go[$i_lowest] eq '?' ) {
19046 $self->set_closing_breakpoint($i_lowest);
19049 #--------------------------------------------------------
19050 # ?/: rule 3 : if we break at a ':' then we save
19051 # its location for further work below. We may need to go
19052 # back and break at its '?'.
19053 #--------------------------------------------------------
19054 if ( $next_nonblank_type eq ':' ) {
19055 push @i_colon_breaks, $i_next_nonblank;
19057 elsif ( $types_to_go[$i_lowest] eq ':' ) {
19058 push @i_colon_breaks, $i_lowest;
19061 # here we should set breaks for all '?'/':' pairs which are
19062 # separated by this line
19065 # guard against infinite loop (should never happen)
19066 if ( $i_lowest <= $i_last_break ) {
19068 && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n");
19074 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
19078 # save this line segment, after trimming blanks at the ends
19080 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
19082 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
19084 # set a forced breakpoint at a container opening, if necessary, to
19085 # signal a break at a closing container. Excepting '(' for now.
19088 $tokens_to_go[$i_lowest] eq '{'
19089 || $tokens_to_go[$i_lowest] eq '['
19091 && !$forced_breakpoint_to_go[$i_lowest]
19094 $self->set_closing_breakpoint($i_lowest);
19097 # get ready to find the next breakpoint
19098 $last_break_strength = $lowest_strength;
19099 $i_last_break = $i_lowest;
19100 $i_begin = $i_lowest + 1;
19102 # skip past a blank
19103 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
19108 #-------------------------------------------------
19109 # END of main loop to set continuation breakpoints
19110 #-------------------------------------------------
19112 #-----------------------------------------------------------
19113 # ?/: rule 4 -- if we broke at a ':', then break at
19114 # corresponding '?' unless this is a chain of ?: expressions
19115 #-----------------------------------------------------------
19116 if (@i_colon_breaks) {
19117 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
19118 if ( !$is_chain ) {
19119 $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last );
19123 return ( \@i_first, \@i_last, $rbond_strength_to_go );
19124 } ## end sub break_long_lines
19126 # small bond strength numbers to help break ties
19127 use constant TINY_BIAS => 0.0001;
19128 use constant MAX_BIAS => 0.001;
19130 sub break_lines_inner_loop {
19132 #-----------------------------------------------------------------
19133 # Find the best next breakpoint in index range ($i_begin .. $imax)
19134 # which, if possible, does not exceed the maximum line length.
19135 #-----------------------------------------------------------------
19143 $last_break_strength,
19145 $rbond_strength_to_go,
19151 # $i_begin = first index of range
19152 # $i_last_break = index of previous break
19153 # $imax = last index of range
19154 # $last_break_strength = bond strength of last break
19155 # $line_count = number of output lines so far
19156 # $rbond_strength_to_go = ref to array of bond strengths
19157 # $saw_good_break = true if old line had a good breakpoint
19160 # $i_lowest = index of best breakpoint
19161 # $lowest_strength = 'bond strength' at best breakpoint
19162 # $leading_alignment_type = special token type after break
19163 # $Msg = string of debug info
19165 my $Msg = EMPTY_STRING;
19166 my $strength = NO_BREAK;
19167 my $i_test = $i_begin - 1;
19169 my $starting_sum = $summed_lengths_to_go[$i_begin];
19170 my $lowest_strength = NO_BREAK;
19171 my $leading_alignment_type = EMPTY_STRING;
19172 my $leading_spaces = leading_spaces_to_go($i_begin);
19173 my $maximum_line_length =
19174 $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
19177 $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n";
19180 # Do not separate an isolated bare word from an opening paren.
19181 # Alternate Fix #2 for issue b1299. This waits as long as possible
19182 # to make the decision.
19183 if ( $types_to_go[$i_begin] eq 'i'
19184 && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
19186 my $i_next_nonblank = $inext_to_go[$i_begin];
19187 if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
19188 $rbond_strength_to_go->[$i_begin] = NO_BREAK;
19192 #-------------------------------------------------
19193 # Begin loop over the indexes in the _to_go arrays
19194 #-------------------------------------------------
19195 while ( ++$i_test <= $imax ) {
19196 my $type = $types_to_go[$i_test];
19197 my $token = $tokens_to_go[$i_test];
19198 my $next_type = $types_to_go[ $i_test + 1 ];
19199 my $next_token = $tokens_to_go[ $i_test + 1 ];
19200 my $i_next_nonblank = $inext_to_go[$i_test];
19201 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
19202 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
19203 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
19205 #---------------------------------------------------------------
19206 # Section A: Get token-token strength and handle any adjustments
19207 #---------------------------------------------------------------
19209 # adjustments to the previous bond strength may have been made, and
19210 # we must keep the bond strength of a token and its following blank
19212 my $last_strength = $strength;
19213 $strength = $rbond_strength_to_go->[$i_test];
19214 if ( $type eq 'b' ) { $strength = $last_strength }
19216 # reduce strength a bit to break ties at an old comma breakpoint ...
19219 $old_breakpoint_to_go[$i_test]
19221 # Patch: limited to just commas to avoid blinking states
19224 # which is a 'good' breakpoint, meaning ...
19225 # we don't want to break before it
19226 && !$want_break_before{$type}
19228 # and either we want to break before the next token
19229 # or the next token is not short (i.e. not a '*', '/' etc.)
19230 && $i_next_nonblank <= $imax
19231 && ( $want_break_before{$next_nonblank_type}
19232 || $token_lengths_to_go[$i_next_nonblank] > 2
19233 || $next_nonblank_type eq ','
19234 || $is_opening_type{$next_nonblank_type} )
19237 $strength -= TINY_BIAS;
19238 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
19241 # otherwise increase strength a bit if this token would be at the
19242 # maximum line length. This is necessary to avoid blinking
19243 # in the above example when the -iob flag is added.
19247 $summed_lengths_to_go[ $i_test + 1 ] -
19249 if ( $len >= $maximum_line_length ) {
19250 $strength += TINY_BIAS;
19251 DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
19255 #-------------------------------------
19256 # Section B: Handle forced breakpoints
19257 #-------------------------------------
19260 # Force an immediate break at certain operators
19261 # with lower level than the start of the line,
19262 # unless we've already seen a better break.
19264 # Note on an issue with a preceding '?' :
19266 # There may be a break at a previous ? if the line is long. Because
19267 # of this we do not want to force a break if there is a previous ? on
19268 # this line. For now the best way to do this is to not break if we
19269 # have seen a lower strength point, which is probably a ?.
19271 # Example of unwanted breaks we are avoiding at a '.' following a ?
19272 # from pod2html using perltidy -gnu:
19274 # ? "\n<A NAME=\""
19276 # . "\">\n$text</A>\n"
19277 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
19279 ( $strength <= $lowest_strength )
19280 && ( $nesting_depth_to_go[$i_begin] >
19281 $nesting_depth_to_go[$i_next_nonblank] )
19283 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
19285 $next_nonblank_type eq 'k'
19287 ## /^(and|or)$/ # note: includes 'xor' now
19288 && $is_and_or{$next_nonblank_token}
19293 $self->set_forced_breakpoint($i_next_nonblank);
19295 && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
19300 # Try to put a break where requested by break_lists
19301 $forced_breakpoint_to_go[$i_test]
19303 # break between ) { in a continued line so that the '{' can
19305 # See similar logic in break_lists which catches instances
19306 # where a line is just something like ') {'. We have to
19307 # be careful because the corresponding block keyword might
19308 # not be on the first line, such as 'for' here:
19312 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
19318 && ( $token eq ')' )
19319 && ( $next_nonblank_type eq '{' )
19320 && ($next_nonblank_block_type)
19321 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
19323 # RT #104427: Dont break before opening sub brace because
19324 # sub block breaks handled at higher level, unless
19325 # it looks like the preceding list is long and broken
19329 $next_nonblank_block_type =~ /$SUB_PATTERN/
19330 || $next_nonblank_block_type =~ /$ASUB_PATTERN/
19332 && ( $nesting_depth_to_go[$i_begin] ==
19333 $nesting_depth_to_go[$i_next_nonblank] )
19336 && !$rOpts_opening_brace_always_on_right
19339 # There is an implied forced break at a terminal opening brace
19340 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
19344 # Forced breakpoints must sometimes be overridden, for example
19345 # because of a side comment causing a NO_BREAK. It is easier
19346 # to catch this here than when they are set.
19347 if ( $strength < NO_BREAK - 1 ) {
19348 $strength = $lowest_strength - TINY_BIAS;
19351 && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
19355 # quit if a break here would put a good terminal token on
19356 # the next line and we already have a possible break
19359 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
19363 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
19365 ) > $maximum_line_length
19369 if ( $i_lowest >= 0 ) {
19370 DEBUG_BREAK_LINES && do {
19371 $Msg .= " :quit at good terminal='$next_nonblank_type'";
19377 # Avoid a break which would strand a single punctuation
19378 # token. For example, we do not want to strand a leading
19379 # '.' which is followed by a long quoted string.
19380 # But note that we do want to do this with -extrude (l=1)
19381 # so please test any changes to this code on -extrude.
19384 && ( $i_test == $i_begin )
19385 && ( $i_test < $imax )
19386 && ( $token eq $type )
19390 $summed_lengths_to_go[ $i_test + 1 ] -
19392 ) < $maximum_line_length
19396 $i_test = min( $imax, $inext_to_go[$i_test] );
19397 DEBUG_BREAK_LINES && do {
19398 $Msg .= " :redo at i=$i_test";
19403 #------------------------------------------------------------
19404 # Section C: Look for the lowest bond strength between tokens
19405 #------------------------------------------------------------
19406 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) {
19408 # break at previous best break if it would have produced
19409 # a leading alignment of certain common tokens, and it
19410 # is different from the latest candidate break
19411 if ($leading_alignment_type) {
19412 DEBUG_BREAK_LINES && do {
19414 " :last at leading_alignment='$leading_alignment_type'";
19419 # Force at least one breakpoint if old code had good
19420 # break It is only called if a breakpoint is required or
19421 # desired. This will probably need some adjustments
19422 # over time. A goal is to try to be sure that, if a new
19423 # side comment is introduced into formatted text, then
19424 # the same breakpoints will occur. scbreak.t
19426 $i_test == $imax # we are at the end
19427 && !$forced_breakpoint_count
19428 && $saw_good_break # old line had good break
19429 && $type =~ /^[#;\{]$/ # and this line ends in
19430 # ';' or side comment
19431 && $i_last_break < 0 # and we haven't made a break
19432 && $i_lowest >= 0 # and we saw a possible break
19433 && $i_lowest < $imax - 1 # (but not just before this ;)
19434 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
19438 DEBUG_BREAK_LINES && do {
19439 $Msg .= " :last at good old break\n";
19444 # Do not skip past an important break point in a short final
19445 # segment. For example, without this check we would miss the
19446 # break at the final / in the following code:
19449 # ( $tau * $mass_pellet * $q_0 *
19450 # ( 1. - exp( -$t_stop / $tau ) ) -
19451 # 4. * $pi * $factor * $k_ice *
19452 # ( $t_melt - $t_ice ) *
19455 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
19459 && $i_lowest >= 0 # and we saw a possible break
19460 && $i_lowest < $i_test
19461 && $i_test > $imax - 2
19462 && $nesting_depth_to_go[$i_begin] >
19463 $nesting_depth_to_go[$i_lowest]
19464 && $lowest_strength < $last_break_strength - .5 * WEAK
19467 # Make this break for math operators for now
19468 my $ir = $inext_to_go[$i_lowest];
19469 my $il = $iprev_to_go[$ir];
19470 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
19471 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
19473 DEBUG_BREAK_LINES && do {
19474 $Msg .= " :last-noskip_short";
19480 # Update the minimum bond strength location
19481 $lowest_strength = $strength;
19482 $i_lowest = $i_test;
19484 DEBUG_BREAK_LINES && do {
19485 $Msg .= " :last-must_break";
19490 # set flags to remember if a break here will produce a
19491 # leading alignment of certain common tokens
19492 if ( $line_count > 0
19494 && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) )
19496 my $i_last_end = $iprev_to_go[$i_begin];
19497 my $tok_beg = $tokens_to_go[$i_begin];
19498 my $type_beg = $types_to_go[$i_begin];
19501 # check for leading alignment of certain tokens
19503 $tok_beg eq $next_nonblank_token
19504 && $is_chain_operator{$tok_beg}
19505 && ( $type_beg eq 'k'
19506 || $type_beg eq $tok_beg )
19507 && $nesting_depth_to_go[$i_begin] >=
19508 $nesting_depth_to_go[$i_next_nonblank]
19511 || ( $tokens_to_go[$i_last_end] eq $token
19512 && $is_chain_operator{$token}
19513 && ( $type eq 'k' || $type eq $token )
19514 && $nesting_depth_to_go[$i_last_end] >=
19515 $nesting_depth_to_go[$i_test] )
19518 $leading_alignment_type = $next_nonblank_type;
19523 #-----------------------------------------------------------
19524 # Section D: See if the maximum line length will be exceeded
19525 #-----------------------------------------------------------
19526 my $too_long = ( $i_test >= $imax );
19527 if ( !$too_long ) {
19530 $summed_lengths_to_go[ $i_test + 2 ] -
19532 $too_long = $next_length > $maximum_line_length;
19534 # To prevent blinkers we will avoid leaving a token exactly at
19535 # the line length limit unless it is the last token or one of
19536 # several "good" types.
19538 # The following code was a blinker with -pbp before this
19540 ## $last_nonblank_token eq '('
19541 ## && $is_indirect_object_taker{ $paren_type
19542 ## [$paren_depth] }
19543 # The issue causing the problem is that if the
19544 # term [$paren_depth] gets broken across a line then
19545 # the whitespace routine doesn't see both opening and closing
19546 # brackets and will format like '[ $paren_depth ]'. This
19547 # leads to an oscillation in length depending if we break
19548 # before the closing bracket or not.
19550 && $i_test + 1 < $imax
19551 && $next_nonblank_type ne ','
19552 && !$is_closing_type{$next_nonblank_type} )
19554 $too_long = $next_length >= $maximum_line_length;
19555 DEBUG_BREAK_LINES && do {
19556 $Msg .= " :too_long=$too_long" if ($too_long);
19561 DEBUG_BREAK_LINES && do {
19564 $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
19565 my $i_testp2 = $i_test + 2;
19566 if ( $i_testp2 > $max_index_to_go + 1 ) {
19567 $i_testp2 = $max_index_to_go + 1;
19569 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
19570 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
19572 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
19575 # allow one extra terminal token after exceeding line length
19576 # if it would strand this token.
19577 if ( $rOpts_fuzzy_line_length
19579 && $i_lowest == $i_test
19580 && $token_lengths_to_go[$i_test] > 1
19581 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) )
19584 DEBUG_BREAK_LINES && do {
19585 $Msg .= " :do_not_strand next='$next_nonblank_type'";
19589 # Stop if line will be too long and we have a solution
19592 # ... no more space and we have a break
19593 $too_long && $i_lowest >= 0
19595 # ... or no more tokens
19596 || $i_test == $imax
19599 DEBUG_BREAK_LINES && do {
19601 " :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
19607 #-----------------------------------------------
19608 # End loop over the indexes in the _to_go arrays
19609 #-----------------------------------------------
19611 # Be sure we return an index in the range ($ibegin .. $imax).
19612 # We will break at imax if no other break was found.
19613 if ( $i_lowest < 0 ) { $i_lowest = $imax }
19615 return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg );
19616 } ## end sub break_lines_inner_loop
19618 sub do_colon_breaks {
19619 my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_;
19621 # using a simple method for deciding if we are in a ?/: chain --
19622 # this is a chain if it has multiple ?/: pairs all in order;
19624 # Note that if line starts in a ':' we count that above as a break
19626 my @insert_list = ();
19627 foreach ( @{$ri_colon_breaks} ) {
19628 my $i_question = $mate_index_to_go[$_];
19629 if ( $i_question >= 0 ) {
19630 if ( $want_break_before{'?'} ) {
19631 $i_question = $iprev_to_go[$i_question];
19634 if ( $i_question >= 0 ) {
19635 push @insert_list, $i_question;
19638 $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last );
19643 ###########################################
19644 # CODE SECTION 11: Code to break long lists
19645 ###########################################
19647 { ## begin closure break_lists
19649 # These routines and variables are involved in finding good
19650 # places to break long lists.
19652 use constant DEBUG_BREAK_LISTS => 0;
19663 $i_last_nonblank_token,
19664 $last_nonblank_block_type,
19665 $last_nonblank_token,
19666 $last_nonblank_type,
19667 $last_old_breakpoint_count,
19669 $next_nonblank_block_type,
19670 $next_nonblank_token,
19671 $next_nonblank_type,
19672 $old_breakpoint_count,
19673 $starting_breakpoint_count,
19684 @breakpoint_undo_stack,
19687 @identifier_count_stack,
19688 @index_before_arrow,
19693 @last_nonblank_type,
19694 @old_breakpoint_count_stack,
19695 @opening_structure_index_stack,
19696 @rfor_semicolon_list,
19697 @has_old_logical_breakpoints,
19701 @type_sequence_stack,
19705 # these arrays must retain values between calls
19706 my ( @has_broken_sublist, @dont_align, @want_comma_break );
19711 sub initialize_break_lists {
19713 @has_broken_sublist = ();
19714 @want_comma_break = ();
19716 #---------------------------------------------------
19717 # Set tolerances to prevent formatting instabilities
19718 #---------------------------------------------------
19720 # Define tolerances to use when checking if closed
19721 # containers will fit on one line. This is necessary to avoid
19722 # formatting instability. The basic tolerance is based on the
19725 # - Always allow for at least one extra space after a closing token so
19726 # that we do not strand a comma or semicolon. (oneline.t).
19728 # - Use an increased line length tolerance when -ci > -i to avoid
19729 # blinking states (case b923 and others).
19731 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
19733 # In addition, it may be necessary to use a few extra tolerance spaces
19734 # when -lp is used and/or when -xci is used. The history of this
19735 # so far is as follows:
19737 # FIX1: At least 3 characters were been found to be required for -lp
19738 # to fixes cases b1059 b1063 b1117.
19740 # FIX2: Further testing showed that we need a total of 3 extra spaces
19741 # when -lp is set for non-lists, and at least 2 spaces when -lp and
19743 # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
19744 # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
19747 # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
19748 # 'find_token_starting_list' to go back before an initial blank space.
19749 # This fixed these three cases, and allowed the tolerances to be
19750 # reduced to continue to fix all other known cases of instability.
19751 # This gives the current tolerance formulation.
19755 if ($rOpts_line_up_parentheses) {
19757 # boost tol for combination -lp -xci
19758 if ($rOpts_extended_continuation_indentation) {
19762 # boost tol for combination -lp and any -vtc > 0, but only for
19763 # non-list containers
19765 foreach ( keys %closing_vertical_tightness ) {
19767 unless ( $closing_vertical_tightness{$_} );
19768 $lp_tol_boost = 1; # Fixes B1193;
19774 # Define a level where list formatting becomes highly stressed and
19775 # needs to be simplified. Introduced for case b1262.
19776 # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2);
19777 # This is now '$high_stress_level'.
19780 } ## end sub initialize_break_lists
19782 # routine to define essential variables when we go 'up' to
19784 sub check_for_new_minimum_depth {
19785 my ( $self, $depth_t, $seqno ) = @_;
19786 if ( $depth_t < $minimum_depth ) {
19788 $minimum_depth = $depth_t;
19790 # these arrays need not retain values between calls
19791 $type_sequence_stack[$depth_t] = $seqno;
19792 $override_cab3[$depth_t] =
19793 $rOpts_comma_arrow_breakpoints == 3
19795 && $self->[_roverride_cab3_]->{$seqno};
19797 $override_cab3[$depth_t] = undef;
19798 $breakpoint_stack[$depth_t] = $starting_breakpoint_count;
19799 $container_type[$depth_t] = EMPTY_STRING;
19800 $identifier_count_stack[$depth_t] = 0;
19801 $index_before_arrow[$depth_t] = -1;
19802 $interrupted_list[$depth_t] = 1;
19803 $item_count_stack[$depth_t] = 0;
19804 $last_nonblank_type[$depth_t] = EMPTY_STRING;
19805 $opening_structure_index_stack[$depth_t] = -1;
19807 $breakpoint_undo_stack[$depth_t] = undef;
19808 $comma_index[$depth_t] = undef;
19809 $last_comma_index[$depth_t] = undef;
19810 $last_dot_index[$depth_t] = undef;
19811 $old_breakpoint_count_stack[$depth_t] = undef;
19812 $has_old_logical_breakpoints[$depth_t] = 0;
19813 $rand_or_list[$depth_t] = [];
19814 $rfor_semicolon_list[$depth_t] = [];
19815 $i_equals[$depth_t] = -1;
19817 # these arrays must retain values between calls
19818 if ( !defined( $has_broken_sublist[$depth_t] ) ) {
19819 $dont_align[$depth_t] = 0;
19820 $has_broken_sublist[$depth_t] = 0;
19821 $want_comma_break[$depth_t] = 0;
19825 } ## end sub check_for_new_minimum_depth
19827 # routine to decide which commas to break at within a container;
19829 # $bp_count = number of comma breakpoints set
19830 # $do_not_break_apart = a flag indicating if container need not
19832 sub set_comma_breakpoints {
19834 my ( $self, $dd, $rbond_strength_bias ) = @_;
19836 my $do_not_break_apart = 0;
19839 if ( $item_count_stack[$dd] ) {
19841 # Do not break a list unless there are some non-line-ending commas.
19842 # This avoids getting different results with only non-essential
19843 # commas, and fixes b1192.
19844 my $seqno = $type_sequence_stack[$dd];
19846 my $real_comma_count =
19847 $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
19849 # handle commas not in containers...
19850 if ( $dont_align[$dd] ) {
19851 $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
19854 # handle commas within containers...
19855 elsif ($real_comma_count) {
19856 my $fbc = $forced_breakpoint_count;
19858 # always open comma lists not preceded by keywords,
19859 # barewords, identifiers (that is, anything that doesn't
19860 # look like a function call)
19861 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
19863 $self->set_comma_breakpoints_final(
19866 i_opening_paren => $opening_structure_index_stack[$dd],
19867 i_closing_paren => $i,
19868 item_count => $item_count_stack[$dd],
19869 identifier_count => $identifier_count_stack[$dd],
19870 rcomma_index => $comma_index[$dd],
19871 next_nonblank_type => $next_nonblank_type,
19872 list_type => $container_type[$dd],
19873 interrupted => $interrupted_list[$dd],
19874 rdo_not_break_apart => \$do_not_break_apart,
19875 must_break_open => $must_break_open,
19876 has_broken_sublist => $has_broken_sublist[$dd],
19879 $bp_count = $forced_breakpoint_count - $fbc;
19880 $do_not_break_apart = 0 if $must_break_open;
19883 return ( $bp_count, $do_not_break_apart );
19884 } ## end sub set_comma_breakpoints
19886 # These types are excluded at breakpoints to prevent blinking
19887 # Switched from excluded to included as part of fix for b1214
19888 my %is_uncontained_comma_break_included_type;
19892 my @q = qw< k R } ) ] Y Z U w i q Q .
19893 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
19894 @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
19897 sub do_uncontained_comma_breaks {
19899 # Handle commas not in containers...
19900 # This is a catch-all routine for commas that we
19901 # don't know what to do with because the don't fall
19902 # within containers. We will bias the bond strength
19903 # to break at commas which ended lines in the input
19904 # file. This usually works better than just trying
19905 # to put as many items on a line as possible. A
19906 # downside is that if the input file is garbage it
19907 # won't work very well. However, the user can always
19908 # prevent following the old breakpoints with the
19910 my ( $self, $dd, $rbond_strength_bias ) = @_;
19912 # Check added for issue c131; an error here would be due to an
19913 # error initializing @comma_index when entering depth $dd.
19915 foreach my $ii ( @{ $comma_index[$dd] } ) {
19916 if ( $ii < 0 || $ii > $max_index_to_go ) {
19917 my $KK = $K_to_go[0];
19918 my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
19920 Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
19927 my $old_comma_break_count = 0;
19928 foreach my $ii ( @{ $comma_index[$dd] } ) {
19930 if ( $old_breakpoint_to_go[$ii] ) {
19931 $old_comma_break_count++;
19933 # Store the bias info for use by sub set_bond_strength
19934 push @{$rbond_strength_bias}, [ $ii, $bias ];
19936 # reduce bias magnitude to force breaks in order
19941 # Also put a break before the first comma if
19942 # (1) there was a break there in the input, and
19943 # (2) there was exactly one old break before the first comma break
19944 # (3) OLD: there are multiple old comma breaks
19945 # (3) NEW: there are one or more old comma breaks (see return example)
19946 # (4) the first comma is at the starting level ...
19947 # ... fixes cases b064 b065 b068 b210 b747
19948 # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
19949 # ... fixes b1220. If ci>0 we are in the middle of a snippet,
19950 # maybe because -boc has been forcing out previous lines.
19952 # For example, we will follow the user and break after
19953 # 'print' in this snippet:
19955 # "conformability (Not the same dimension)\n",
19956 # "\t", $have, " is ", text_unit($hu), "\n",
19957 # "\t", $want, " is ", text_unit($wu), "\n",
19960 # Another example, just one comma, where we will break after
19963 # $x * cos($a) - $y * sin($a),
19964 # $x * sin($a) + $y * cos($a);
19966 # Breaking a print statement:
19968 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
19969 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
19971 # But we will not force a break after the opening paren here
19972 # (causes a blinker):
19973 # $heap->{stream}->set_output_filter(
19974 # poe::filter::reference->new('myotherfreezer') ),
19977 my $i_first_comma = $comma_index[$dd]->[0];
19978 my $level_comma = $levels_to_go[$i_first_comma];
19979 my $ci_start = $ci_levels_to_go[0];
19981 # Here we want to use the value of ci before any -xci adjustment
19982 if ( $ci_start && $rOpts_extended_continuation_indentation ) {
19983 my $K0 = $K_to_go[0];
19984 if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
19987 && $old_breakpoint_to_go[$i_first_comma]
19988 && $level_comma == $levels_to_go[0] )
19992 foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
19993 if ( $old_breakpoint_to_go[$ii] ) {
19995 last if ( $obp_count > 1 );
19997 if ( $levels_to_go[$ii] == $level_comma );
20001 # Changed rule from multiple old commas to just one here:
20002 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
20004 my $ibreak_m = $ibreak;
20005 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
20006 if ( $ibreak_m >= 0 ) {
20008 # In order to avoid blinkers we have to be fairly
20012 # Rule 1: Do not to break before an opening token
20013 # Rule 2: avoid breaking at ternary operators
20014 # (see b931, which is similar to the above print example)
20015 # Rule 3: Do not break at chain operators to fix case b1119
20016 # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
20018 # NEW Rule, replaced above rules after case b1214:
20019 # only break at one of the included types
20021 # Be sure to test any changes to these rules against runs
20022 # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
20024 my $type_m = $types_to_go[$ibreak_m];
20026 # Switched from excluded to included for b1214. If necessary
20027 # the token could also be checked if type_m eq 'k'
20028 if ( $is_uncontained_comma_break_included_type{$type_m} ) {
20029 $self->set_forced_breakpoint($ibreak);
20035 } ## end sub do_uncontained_comma_breaks
20037 my %is_logical_container;
20041 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
20042 @is_logical_container{@q} = (1) x scalar(@q);
20044 # This filter will allow most tokens to skip past a section of code
20045 %quick_filter = %is_assignment;
20046 @q = qw# => . ; < > ~ #;
20048 push @q, 'f'; # added for ';' for issue c154
20049 @quick_filter{@q} = (1) x scalar(@q);
20052 sub set_for_semicolon_breakpoints {
20053 my ( $self, $dd ) = @_;
20054 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
20055 $self->set_forced_breakpoint($_);
20060 sub set_logical_breakpoints {
20061 my ( $self, $dd ) = @_;
20063 $item_count_stack[$dd] == 0
20064 && $is_logical_container{ $container_type[$dd] }
20066 || $has_old_logical_breakpoints[$dd]
20070 # Look for breaks in this order:
20073 foreach my $i ( 0 .. 3 ) {
20074 if ( $rand_or_list[$dd][$i] ) {
20075 foreach ( @{ $rand_or_list[$dd][$i] } ) {
20076 $self->set_forced_breakpoint($_);
20079 # break at any 'if' and 'unless' too
20080 foreach ( @{ $rand_or_list[$dd][4] } ) {
20081 $self->set_forced_breakpoint($_);
20083 $rand_or_list[$dd] = [];
20089 } ## end sub set_logical_breakpoints
20091 sub is_unbreakable_container {
20093 # never break a container of one of these types
20094 # because bad things can happen (map1.t)
20096 return $is_sort_map_grep{ $container_type[$dd] };
20101 my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
20103 #--------------------------------------------------------------------
20104 # This routine is called once per batch, if the batch is a list, to
20105 # set line breaks so that hierarchical structure can be displayed and
20106 # so that list items can be vertically aligned. The output of this
20107 # routine is stored in the array @forced_breakpoint_to_go, which is
20108 # used by sub 'break_long_lines' to set final breakpoints. This is
20109 # probably the most complex routine in perltidy, so I have
20110 # broken it into pieces and over-commented it.
20111 #--------------------------------------------------------------------
20113 my $rLL = $self->[_rLL_];
20114 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
20115 my $ris_broken_container = $self->[_ris_broken_container_];
20116 my $rbreak_before_container_by_seqno =
20117 $self->[_rbreak_before_container_by_seqno_];
20119 $starting_depth = $nesting_depth_to_go[0];
20121 $block_type = SPACE;
20122 $current_depth = $starting_depth;
20124 $i_last_colon = -1;
20126 $i_line_start = -1;
20127 $last_nonblank_token = ';';
20128 $last_nonblank_type = ';';
20129 $last_nonblank_block_type = SPACE;
20130 $last_old_breakpoint_count = 0;
20131 $minimum_depth = $current_depth + 1; # forces update in check below
20132 $old_breakpoint_count = 0;
20133 $starting_breakpoint_count = $forced_breakpoint_count;
20136 $type_sequence = EMPTY_STRING;
20138 my $total_depth_variation = 0;
20139 my $i_old_assignment_break;
20140 my $depth_last = $starting_depth;
20141 my $comma_follows_last_closing_token;
20143 $self->check_for_new_minimum_depth( $current_depth,
20144 $parent_seqno_to_go[0] )
20145 if ( $current_depth < $minimum_depth );
20147 my $want_previous_breakpoint = -1;
20149 my $saw_good_breakpoint;
20151 #----------------------------------------
20152 # Main loop over all tokens in this batch
20153 #----------------------------------------
20154 while ( ++$i <= $max_index_to_go ) {
20155 if ( $type ne 'b' ) {
20156 $i_last_nonblank_token = $i - 1;
20157 $last_nonblank_type = $type;
20158 $last_nonblank_token = $token;
20159 $last_nonblank_block_type = $block_type;
20161 $type = $types_to_go[$i];
20162 $block_type = $block_type_to_go[$i];
20163 $token = $tokens_to_go[$i];
20164 $type_sequence = $type_sequence_to_go[$i];
20166 my $i_next_nonblank = $inext_to_go[$i];
20167 $next_nonblank_type = $types_to_go[$i_next_nonblank];
20168 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
20169 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
20171 #-------------------------------------------
20172 # Loop Section A: Look for special breakpoints...
20173 #-------------------------------------------
20175 # set break if flag was set
20176 if ( $want_previous_breakpoint >= 0 ) {
20177 $self->set_forced_breakpoint($want_previous_breakpoint);
20178 $want_previous_breakpoint = -1;
20181 $last_old_breakpoint_count = $old_breakpoint_count;
20183 # Check for a good old breakpoint ..
20185 $old_breakpoint_to_go[$i]
20187 # Note: ignore old breaks at types 'L' and 'R' to fix case
20188 # b1097. These breaks only occur under high stress.
20190 && $next_nonblank_type ne 'R'
20192 # ... and ignore other high stress level breaks, fixes b1395
20193 && $levels_to_go[$i] < $high_stress_level
20196 ( $want_previous_breakpoint, $i_old_assignment_break ) =
20197 $self->check_old_breakpoints( $i_next_nonblank,
20198 $want_previous_breakpoint, $i_old_assignment_break );
20201 next if ( $type eq 'b' );
20203 $depth = $nesting_depth_to_go[ $i + 1 ];
20205 $total_depth_variation += abs( $depth - $depth_last );
20206 $depth_last = $depth;
20208 # safety check - be sure we always break after a comment
20209 # Shouldn't happen .. an error here probably means that the
20210 # nobreak flag did not get turned off correctly during
20212 if ( $type eq '#' ) {
20213 if ( $i != $max_index_to_go ) {
20216 Non-fatal program bug: backup logic required to break after a comment
20219 $nobreak_to_go[$i] = 0;
20220 $self->set_forced_breakpoint($i);
20221 } ## end if ( $i != $max_index_to_go)
20222 } ## end if ( $type eq '#' )
20224 # Force breakpoints at certain tokens in long lines.
20225 # Note that such breakpoints will be undone later if these tokens
20226 # are fully contained within parens on a line.
20229 # break before a keyword within a line
20233 # if one of these keywords:
20234 && $is_if_unless_while_until_for_foreach{$token}
20236 # but do not break at something like '1 while'
20237 && ( $last_nonblank_type ne 'n' || $i > 2 )
20239 # and let keywords follow a closing 'do' brace
20240 && $last_nonblank_block_type ne 'do'
20245 # or container is broken (by side-comment, etc)
20246 || ( $next_nonblank_token eq '('
20247 && $mate_index_to_go[$i_next_nonblank] < $i )
20251 $self->set_forced_breakpoint( $i - 1 );
20254 # remember locations of '||' and '&&' for possible breaks if we
20255 # decide this is a long logical expression.
20256 if ( $type eq '||' ) {
20257 push @{ $rand_or_list[$depth][2] }, $i;
20258 ++$has_old_logical_breakpoints[$depth]
20259 if ( ( $i == $i_line_start || $i == $i_line_end )
20260 && $rOpts_break_at_old_logical_breakpoints );
20262 elsif ( $type eq '&&' ) {
20263 push @{ $rand_or_list[$depth][3] }, $i;
20264 ++$has_old_logical_breakpoints[$depth]
20265 if ( ( $i == $i_line_start || $i == $i_line_end )
20266 && $rOpts_break_at_old_logical_breakpoints );
20268 elsif ( $type eq 'f' ) {
20269 push @{ $rfor_semicolon_list[$depth] }, $i;
20271 elsif ( $type eq 'k' ) {
20272 if ( $token eq 'and' ) {
20273 push @{ $rand_or_list[$depth][1] }, $i;
20274 ++$has_old_logical_breakpoints[$depth]
20275 if ( ( $i == $i_line_start || $i == $i_line_end )
20276 && $rOpts_break_at_old_logical_breakpoints );
20279 # break immediately at 'or's which are probably not in a logical
20280 # block -- but we will break in logical breaks below so that
20281 # they do not add to the forced_breakpoint_count
20282 elsif ( $token eq 'or' ) {
20283 push @{ $rand_or_list[$depth][0] }, $i;
20284 ++$has_old_logical_breakpoints[$depth]
20285 if ( ( $i == $i_line_start || $i == $i_line_end )
20286 && $rOpts_break_at_old_logical_breakpoints );
20287 if ( $is_logical_container{ $container_type[$depth] } ) {
20290 if ($is_long_line) { $self->set_forced_breakpoint($i) }
20291 elsif ( ( $i == $i_line_start || $i == $i_line_end )
20292 && $rOpts_break_at_old_logical_breakpoints )
20294 $saw_good_breakpoint = 1;
20298 elsif ( $token eq 'if' || $token eq 'unless' ) {
20299 push @{ $rand_or_list[$depth][4] }, $i;
20300 if ( ( $i == $i_line_start || $i == $i_line_end )
20301 && $rOpts_break_at_old_logical_breakpoints )
20303 $self->set_forced_breakpoint($i);
20307 elsif ( $is_assignment{$type} ) {
20308 $i_equals[$depth] = $i;
20311 #-----------------------------------------
20312 # Loop Section B: Handle a sequenced token
20313 #-----------------------------------------
20314 if ($type_sequence) {
20315 $self->break_lists_type_sequence;
20318 #------------------------------------------
20319 # Loop Section C: Handle Increasing Depth..
20320 #------------------------------------------
20322 # hardened against bad input syntax: depth jump must be 1 and type
20323 # must be opening..fixes c102
20324 if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
20325 $self->break_lists_increasing_depth();
20328 #------------------------------------------
20329 # Loop Section D: Handle Decreasing Depth..
20330 #------------------------------------------
20332 # hardened against bad input syntax: depth jump must be 1 and type
20333 # must be closing .. fixes c102
20334 elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
20336 $self->break_lists_decreasing_depth();
20338 $comma_follows_last_closing_token =
20339 $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
20343 #----------------------------------
20344 # Loop Section E: Handle this token
20345 #----------------------------------
20347 $current_depth = $depth;
20349 # most token types can skip the rest of this loop
20350 next unless ( $quick_filter{$type} );
20352 # handle comma-arrow
20353 if ( $type eq '=>' ) {
20354 next if ( $last_nonblank_type eq '=>' );
20355 next if $rOpts_break_at_old_comma_breakpoints;
20357 if ( $rOpts_comma_arrow_breakpoints == 3
20358 && !$override_cab3[$depth] );
20359 $want_comma_break[$depth] = 1;
20360 $index_before_arrow[$depth] = $i_last_nonblank_token;
20364 elsif ( $type eq '.' ) {
20365 $last_dot_index[$depth] = $i;
20368 # Turn off comma alignment if we are sure that this is not a list
20369 # environment. To be safe, we will do this if we see certain
20370 # non-list tokens, such as ';', '=', and also the environment is
20372 ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
20373 elsif ( $is_non_list_type{$type}
20374 && !$self->is_in_list_by_i($i) )
20376 $dont_align[$depth] = 1;
20377 $want_comma_break[$depth] = 0;
20378 $index_before_arrow[$depth] = -1;
20380 # no special comma breaks in C-style 'for' terms (c154)
20381 if ( $type eq 'f' ) { $last_comma_index[$depth] = undef }
20384 # now just handle any commas
20385 next if ( $type ne ',' );
20386 $self->study_comma($comma_follows_last_closing_token);
20388 } ## end while ( ++$i <= $max_index_to_go)
20390 #-------------------------------------------
20391 # END of loop over all tokens in this batch
20392 # Now set breaks for any unfinished lists ..
20393 #-------------------------------------------
20395 foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
20397 $interrupted_list[$dd] = 1;
20398 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
20399 $self->set_comma_breakpoints( $dd, $rbond_strength_bias )
20400 if ( $item_count_stack[$dd] );
20401 $self->set_logical_breakpoints($dd)
20402 if ( $has_old_logical_breakpoints[$dd] );
20403 $self->set_for_semicolon_breakpoints($dd);
20405 # break open container...
20406 my $i_opening = $opening_structure_index_stack[$dd];
20407 if ( defined($i_opening) && $i_opening >= 0 ) {
20408 $self->set_forced_breakpoint($i_opening)
20410 is_unbreakable_container($dd)
20412 # Avoid a break which would place an isolated ' or "
20415 && $i_opening >= $max_index_to_go - 2
20416 && ( $token eq "'" || $token eq '"' ) )
20419 } ## end for ( my $dd = $current_depth...)
20421 #----------------------------------------
20422 # Return the flag '$saw_good_breakpoint'.
20423 #----------------------------------------
20424 # This indicates if the input file had some good breakpoints. This
20425 # flag will be used to force a break in a line shorter than the
20426 # allowed line length.
20427 if ( $has_old_logical_breakpoints[$current_depth] ) {
20428 $saw_good_breakpoint = 1;
20431 # A complex line with one break at an = has a good breakpoint.
20432 # This is not complex ($total_depth_variation=0):
20436 # This is complex ($total_depth_variation=6):
20438 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
20440 # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
20441 elsif ($i_old_assignment_break
20442 && $total_depth_variation > 4
20443 && $old_breakpoint_count == 1
20444 && $i_old_assignment_break < $max_index_to_go )
20446 $saw_good_breakpoint = 1;
20449 return $saw_good_breakpoint;
20450 } ## end sub break_lists
20454 # study and store info for a list comma
20456 my ( $self, $comma_follows_last_closing_token ) = @_;
20458 $last_dot_index[$depth] = undef;
20459 $last_comma_index[$depth] = $i;
20461 # break here if this comma follows a '=>'
20462 # but not if there is a side comment after the comma
20463 if ( $want_comma_break[$depth] ) {
20465 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
20466 if ($rOpts_comma_arrow_breakpoints) {
20467 $want_comma_break[$depth] = 0;
20472 $self->set_forced_breakpoint($i)
20473 unless ( $next_nonblank_type eq '#' );
20475 # break before the previous token if it looks safe
20476 # Example of something that we will not try to break before:
20477 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
20478 # Also we don't want to break at a binary operator (like +):
20482 # $y - $R, -fill => 'black',
20484 my $ibreak = $index_before_arrow[$depth] - 1;
20486 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
20488 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
20489 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
20490 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
20492 # don't break before a comma, as in the following:
20493 # ( LONGER_THAN,=> 1,
20494 # EIGHTY_CHARACTERS,=> 2,
20495 # CAUSES_FORMATTING,=> 3,
20498 # This example is for -tso but should be general rule
20499 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
20500 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
20502 $self->set_forced_breakpoint($ibreak);
20507 $want_comma_break[$depth] = 0;
20508 $index_before_arrow[$depth] = -1;
20510 # handle list which mixes '=>'s and ','s:
20511 # treat any list items so far as an interrupted list
20512 $interrupted_list[$depth] = 1;
20516 # Break after all commas above starting depth...
20517 # But only if the last closing token was followed by a comma,
20518 # to avoid breaking a list operator (issue c119)
20519 if ( $depth < $starting_depth
20520 && $comma_follows_last_closing_token
20521 && !$dont_align[$depth] )
20523 $self->set_forced_breakpoint($i)
20524 unless ( $next_nonblank_type eq '#' );
20528 # add this comma to the list..
20529 my $item_count = $item_count_stack[$depth];
20530 if ( $item_count == 0 ) {
20532 # but do not form a list with no opening structure
20535 # open INFILE_COPY, ">$input_file_copy"
20536 # or die ("very long message");
20537 if ( ( $opening_structure_index_stack[$depth] < 0 )
20538 && $self->is_in_block_by_i($i) )
20540 $dont_align[$depth] = 1;
20544 $comma_index[$depth][$item_count] = $i;
20545 ++$item_count_stack[$depth];
20546 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
20547 $identifier_count_stack[$depth]++;
20550 } ## end sub study_comma
20552 sub check_old_breakpoints {
20554 # Check for a good old breakpoint
20556 my ( $self, $i_next_nonblank, $want_previous_breakpoint,
20557 $i_old_assignment_break )
20561 $i_line_start = $i_next_nonblank;
20563 $old_breakpoint_count++;
20565 # Break before certain keywords if user broke there and
20566 # this is a 'safe' break point. The idea is to retain
20567 # any preferred breaks for sequential list operations,
20568 # like a schwartzian transform.
20569 if ($rOpts_break_at_old_keyword_breakpoints) {
20571 $next_nonblank_type eq 'k'
20572 && $is_keyword_returning_list{$next_nonblank_token}
20573 && ( $type =~ /^[=\)\]\}Riw]$/
20574 || $type eq 'k' && $is_keyword_returning_list{$token} )
20578 # we actually have to set this break next time through
20579 # the loop because if we are at a closing token (such
20580 # as '}') which forms a one-line block, this break might
20583 # But do not do this at an '=' if:
20584 # - the user wants breaks before an equals (b434 b903)
20585 # - or -naws is set (can be unstable, see b1354)
20586 my $skip = $type eq '='
20587 && ( $want_break_before{$type}
20588 || !$rOpts_add_whitespace );
20590 $want_previous_breakpoint = $i
20596 # Break before attributes if user broke there
20597 if ($rOpts_break_at_old_attribute_breakpoints) {
20598 if ( $next_nonblank_type eq 'A' ) {
20599 $want_previous_breakpoint = $i;
20603 # remember an = break as possible good break point
20604 if ( $is_assignment{$type} ) {
20605 $i_old_assignment_break = $i;
20607 elsif ( $is_assignment{$next_nonblank_type} ) {
20608 $i_old_assignment_break = $i_next_nonblank;
20610 return ( $want_previous_breakpoint, $i_old_assignment_break );
20611 } ## end sub check_old_breakpoints
20613 sub break_lists_type_sequence {
20617 # handle any postponed closing breakpoints
20618 if ( $is_closing_sequence_token{$token} ) {
20619 if ( $type eq ':' ) {
20620 $i_last_colon = $i;
20622 # retain break at a ':' line break
20623 if ( ( $i == $i_line_start || $i == $i_line_end )
20624 && $rOpts_break_at_old_ternary_breakpoints
20625 && $levels_to_go[$i] < $high_stress_level )
20628 $self->set_forced_breakpoint($i);
20630 # Break at a previous '=', but only if it is before
20631 # the mating '?'. Mate_index test fixes b1287.
20632 my $ieq = $i_equals[$depth];
20633 if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
20634 $self->set_forced_breakpoint( $i_equals[$depth] );
20635 $i_equals[$depth] = -1;
20639 if ( has_postponed_breakpoint($type_sequence) ) {
20640 my $inc = ( $type eq ':' ) ? 0 : 1;
20641 if ( $i >= $inc ) {
20642 $self->set_forced_breakpoint( $i - $inc );
20647 # set breaks at ?/: if they will get separated (and are
20648 # not a ?/: chain), or if the '?' is at the end of the
20650 elsif ( $token eq '?' ) {
20651 my $i_colon = $mate_index_to_go[$i];
20653 $i_colon <= 0 # the ':' is not in this batch
20654 || $i == 0 # this '?' is the first token of the line
20655 || $i == $max_index_to_go # or this '?' is the last token
20659 # don't break if # this has a side comment, and
20660 # don't break at a '?' if preceded by ':' on
20661 # this line of previous ?/: pair on this line.
20662 # This is an attempt to preserve a chain of ?/:
20663 # expressions (elsif2.t).
20667 || $parent_seqno_to_go[$i_last_colon] !=
20668 $parent_seqno_to_go[$i]
20670 && $tokens_to_go[$max_index_to_go] ne '#'
20673 $self->set_forced_breakpoint($i);
20675 $self->set_closing_breakpoint($i);
20679 elsif ( $is_opening_token{$token} ) {
20681 # do requested -lp breaks at the OPENING token for BROKEN
20682 # blocks. NOTE: this can be done for both -lp and -xlp,
20683 # but only -xlp can really take advantage of this. So this
20684 # is currently restricted to -xlp to avoid excess changes to
20685 # existing -lp formatting.
20686 if ( $rOpts_extended_line_up_parentheses
20687 && $mate_index_to_go[$i] < 0 )
20690 $self->[_rlp_object_by_seqno_]->{$type_sequence};
20692 my $K_begin_line = $lp_object->get_K_begin_line();
20693 my $i_begin_line = $K_begin_line - $K_to_go[0];
20694 $self->set_forced_lp_break( $i_begin_line, $i );
20699 } ## end sub break_lists_type_sequence
20701 sub break_lists_increasing_depth {
20705 #--------------------------------------------
20706 # prepare for a new list when depth increases
20707 # token $i is a '(','{', or '['
20708 #--------------------------------------------
20710 #----------------------------------------------------------
20711 # BEGIN initialize depth arrays
20712 # ... use the same order as sub check_for_new_minimum_depth
20713 #----------------------------------------------------------
20714 $type_sequence_stack[$depth] = $type_sequence;
20715 $override_cab3[$depth] =
20716 $rOpts_comma_arrow_breakpoints == 3
20718 && $self->[_roverride_cab3_]->{$type_sequence};
20720 $breakpoint_stack[$depth] = $forced_breakpoint_count;
20721 $container_type[$depth] =
20724 $is_container_label_type{$last_nonblank_type}
20725 ? $last_nonblank_token
20727 $identifier_count_stack[$depth] = 0;
20728 $index_before_arrow[$depth] = -1;
20729 $interrupted_list[$depth] = 0;
20730 $item_count_stack[$depth] = 0;
20731 $last_nonblank_type[$depth] = $last_nonblank_type;
20732 $opening_structure_index_stack[$depth] = $i;
20734 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
20735 $comma_index[$depth] = undef;
20736 $last_comma_index[$depth] = undef;
20737 $last_dot_index[$depth] = undef;
20738 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
20739 $has_old_logical_breakpoints[$depth] = 0;
20740 $rand_or_list[$depth] = [];
20741 $rfor_semicolon_list[$depth] = [];
20742 $i_equals[$depth] = -1;
20744 # if line ends here then signal closing token to break
20745 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) {
20746 $self->set_closing_breakpoint($i);
20749 # Not all lists of values should be vertically aligned..
20750 $dont_align[$depth] =
20752 # code BLOCKS are handled at a higher level
20753 ( $block_type ne EMPTY_STRING )
20755 # certain paren lists
20756 || ( $type eq '(' ) && (
20758 # it does not usually look good to align a list of
20759 # identifiers in a parameter list, as in:
20760 # my($var1, $var2, ...)
20761 # (This test should probably be refined, for now I'm just
20762 # testing for any keyword)
20763 ( $last_nonblank_type eq 'k' )
20765 # a trailing '(' usually indicates a non-list
20766 || ( $next_nonblank_type eq '(' )
20768 $has_broken_sublist[$depth] = 0;
20769 $want_comma_break[$depth] = 0;
20771 #----------------------------
20772 # END initialize depth arrays
20773 #----------------------------
20775 # patch to outdent opening brace of long if/for/..
20776 # statements (like this one). See similar coding in
20777 # set_continuation breaks. We have also catch it here for
20778 # short line fragments which otherwise will not go through
20779 # break_long_lines.
20783 # if we have the ')' but not its '(' in this batch..
20784 && ( $last_nonblank_token eq ')' )
20785 && $mate_index_to_go[$i_last_nonblank_token] < 0
20787 # and user wants brace to left
20788 && !$rOpts_opening_brace_always_on_right
20790 && ( $type eq '{' ) # should be true
20791 && ( $token eq '{' ) # should be true
20794 $self->set_forced_breakpoint( $i - 1 );
20798 } ## end sub break_lists_increasing_depth
20800 sub break_lists_decreasing_depth {
20802 my ( $self, $rbond_strength_bias ) = @_;
20804 # We have arrived at a closing container token in sub break_lists:
20805 # the token at index $i is one of these: ')','}', ']'
20806 # A number of important breakpoints for this container can now be set
20807 # based on the information that we have collected. This includes:
20808 # - breaks at commas to format tables
20809 # - breaks at certain logical operators and other good breakpoints
20810 # - breaks at opening and closing containers if needed by selected
20811 # formatting styles
20812 # These breaks are made by calling sub 'set_forced_breakpoint'
20814 $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] )
20815 if ( $depth < $minimum_depth );
20817 # force all outer logical containers to break after we see on
20819 $has_old_logical_breakpoints[$depth] ||=
20820 $has_old_logical_breakpoints[$current_depth];
20822 # Patch to break between ') {' if the paren list is broken.
20823 # There is similar logic in break_long_lines for
20824 # non-broken lists.
20826 && $next_nonblank_block_type
20827 && $interrupted_list[$current_depth]
20828 && $next_nonblank_type eq '{'
20829 && !$rOpts_opening_brace_always_on_right )
20831 $self->set_forced_breakpoint($i);
20834 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
20836 #-----------------------------------------------------------------
20837 # Set breaks at commas to display a table of values if appropriate
20838 #-----------------------------------------------------------------
20839 my ( $bp_count, $do_not_break_apart ) = ( 0, 0 );
20840 ( $bp_count, $do_not_break_apart ) =
20841 $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias )
20842 if ( $item_count_stack[$current_depth] );
20844 #-----------------------------------------------------------
20845 # Now set flags needed to decide if we should break open the
20846 # container ... This is a long rambling section which has
20847 # grown over time to handle all situations.
20848 #-----------------------------------------------------------
20849 my $i_opening = $opening_structure_index_stack[$current_depth];
20850 my $saw_opening_structure = ( $i_opening >= 0 );
20852 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
20853 $lp_object = $self->[_rlp_object_by_seqno_]
20854 ->{ $type_sequence_to_go[$i_opening] };
20857 # this term is long if we had to break at interior commas..
20858 my $is_long_term = $bp_count > 0;
20860 # If this is a short container with one or more comma arrows,
20861 # then we will mark it as a long term to open it if requested.
20862 # $rOpts_comma_arrow_breakpoints =
20863 # 0 - open only if comma precedes closing brace
20864 # 1 - stable: except for one line blocks
20865 # 2 - try to form 1 line blocks
20867 # 4 - always open up if vt=0
20868 # 5 - stable: even for one line blocks if vt=0
20870 # PATCH: Modify the -cab flag if we are not processing a list:
20871 # We only want the -cab flag to apply to list containers, so
20872 # for non-lists we use the default and stable -cab=5 value.
20873 # Fixes case b939a.
20874 my $cab_flag = $rOpts_comma_arrow_breakpoints;
20875 if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} )
20880 # Ignore old breakpoints when under stress.
20881 # Fixes b1203 b1204 as well as b1197-b1200.
20882 # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
20883 # b1264 to see if this check is still required at all, and
20884 # these still require a check, but at higher level beta+3
20885 # instead of beta: b1193 b780
20886 if ( $saw_opening_structure
20888 && $levels_to_go[$i_opening] >= $high_stress_level )
20892 # Do not break hash braces under stress (fixes b1238)
20893 $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
20895 # This option fixes b1235, b1237, b1240 with old and new
20896 # -lp, but formatting is nicer with next option.
20897 ## $is_long_term ||=
20898 ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
20900 # This option fixes b1240 but not b1235, b1237 with new -lp,
20901 # but this gives better formatting than the previous option.
20902 # TODO: see if stress_level_alha should also be considered
20903 $do_not_break_apart ||=
20904 $levels_to_go[$i_opening] > $stress_level_beta;
20907 if ( !$is_long_term
20908 && $saw_opening_structure
20909 && $is_opening_token{ $tokens_to_go[$i_opening] }
20910 && $index_before_arrow[ $depth + 1 ] > 0
20911 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } )
20915 || $cab_flag == 0 && $last_nonblank_token eq ','
20916 || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
20919 # mark term as long if the length between opening and closing
20920 # parens exceeds allowed line length
20921 if ( !$is_long_term && $saw_opening_structure ) {
20923 my $i_opening_minus = $self->find_token_starting_list($i_opening);
20925 my $excess = $self->excess_line_length( $i_opening_minus, $i );
20927 # Use standard spaces for indentation of lists in -lp mode
20928 # if it gives a longer line length. This helps to avoid an
20929 # instability due to forming and breaking one-line blocks.
20930 # This fixes case b1314.
20931 my $indentation = $leading_spaces_to_go[$i_opening_minus];
20932 if ( ref($indentation)
20933 && $self->[_ris_broken_container_]->{$type_sequence} )
20935 my $lp_spaces = $indentation->get_spaces();
20936 my $std_spaces = $indentation->get_standard_spaces();
20937 my $diff = $std_spaces - $lp_spaces;
20938 if ( $diff > 0 ) { $excess += $diff }
20941 my $tol = $length_tol;
20943 # boost tol for an -lp container
20947 && ( $rOpts_extended_continuation_indentation
20948 || !$self->[_ris_list_by_seqno_]->{$type_sequence} )
20951 $tol += $lp_tol_boost;
20954 # Patch to avoid blinking with -bbxi=2 and -cab=2
20955 # in which variations in -ci cause unstable formatting
20956 # in edge cases. We just always add one ci level so that
20957 # the formatting is independent of the -BBX results.
20958 # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
20959 # b1161 b1166 b1167 b1168
20960 if ( !$ci_levels_to_go[$i_opening]
20961 && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence}
20964 $tol += $rOpts_continuation_indentation;
20967 $is_long_term = $excess + $tol > 0;
20971 # We've set breaks after all comma-arrows. Now we have to
20972 # undo them if this can be a one-line block
20973 # (the only breakpoints set will be due to comma-arrows)
20977 # user doesn't require breaking after all comma-arrows
20978 ( $cab_flag != 0 ) && ( $cab_flag != 4 )
20980 # and if the opening structure is in this batch
20981 && $saw_opening_structure
20983 # and either on the same old line
20985 $old_breakpoint_count_stack[$current_depth] ==
20986 $last_old_breakpoint_count
20988 # or user wants to form long blocks with arrows
20991 # if -cab=3 is overridden then use -cab=2 behavior
20992 || $cab_flag == 3 && $override_cab3[$current_depth]
20995 # and we made breakpoints between the opening and closing
20996 && ( $breakpoint_undo_stack[$current_depth] <
20997 $forced_breakpoint_undo_count )
20999 # and this block is short enough to fit on one line
21000 # Note: use < because need 1 more space for possible comma
21005 $self->undo_forced_breakpoint_stack(
21006 $breakpoint_undo_stack[$current_depth] );
21009 # now see if we have any comma breakpoints left
21010 my $has_comma_breakpoints =
21011 ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count );
21013 # update broken-sublist flag of the outer container
21014 $has_broken_sublist[$depth] =
21015 $has_broken_sublist[$depth]
21016 || $has_broken_sublist[$current_depth]
21018 || $has_comma_breakpoints;
21020 # Having come to the closing ')', '}', or ']', now we have to decide
21021 # if we should 'open up' the structure by placing breaks at the
21022 # opening and closing containers. This is a tricky decision. Here
21023 # are some of the basic considerations:
21025 # -If this is a BLOCK container, then any breakpoints will have
21026 # already been set (and according to user preferences), so we need do
21029 # -If we have a comma-separated list for which we can align the list
21030 # items, then we need to do so because otherwise the vertical aligner
21031 # cannot currently do the alignment.
21033 # -If this container does itself contain a container which has been
21034 # broken open, then it should be broken open to properly show the
21037 # -If there is nothing to align, and no other reason to break apart,
21038 # then do not do it.
21040 # We will not break open the parens of a long but 'simple' logical
21041 # expression. For example:
21043 # This is an example of a simple logical expression and its formatting:
21045 # if ( $bigwasteofspace1 && $bigwasteofspace2
21046 # || $bigwasteofspace3 && $bigwasteofspace4 )
21048 # Most people would prefer this than the 'spacey' version:
21051 # $bigwasteofspace1 && $bigwasteofspace2
21052 # || $bigwasteofspace3 && $bigwasteofspace4
21055 # To illustrate the rules for breaking logical expressions, consider:
21059 # and ( exists $ids_excl_uc{$id_uc}
21060 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
21062 # This is on the verge of being difficult to read. The current
21063 # default is to open it up like this:
21068 # and ( exists $ids_excl_uc{$id_uc}
21069 # or grep $id_uc =~ /$_/, @ids_excl_uc )
21072 # This is a compromise which tries to avoid being too dense and to
21073 # spacey. A more spaced version would be:
21079 # exists $ids_excl_uc{$id_uc}
21080 # or grep $id_uc =~ /$_/, @ids_excl_uc
21084 # Some people might prefer the spacey version -- an option could be
21085 # added. The innermost expression contains a long block '( exists
21088 # Here is how the logic goes: We will force a break at the 'or' that
21089 # the innermost expression contains, but we will not break apart its
21090 # opening and closing containers because (1) it contains no
21091 # multi-line sub-containers itself, and (2) there is no alignment to
21092 # be gained by breaking it open like this
21095 # exists $ids_excl_uc{$id_uc}
21096 # or grep $id_uc =~ /$_/, @ids_excl_uc
21099 # (although this looks perfectly ok and might be good for long
21100 # expressions). The outer 'if' container, though, contains a broken
21101 # sub-container, so it will be broken open to avoid too much density.
21102 # Also, since it contains no 'or's, there will be a forced break at
21105 # Handle the experimental flag --break-open-compact-parens
21106 # NOTE: This flag is not currently used and may eventually be removed.
21107 # If this flag is set, we will implement it by
21108 # pretending we did not see the opening structure, since in that case
21109 # parens always get opened up.
21110 if ( $saw_opening_structure
21111 && $rOpts_break_open_compact_parens )
21114 # This parameter is a one-character flag, as follows:
21115 # '0' matches no parens -> break open NOT OK
21116 # '1' matches all parens -> break open OK
21117 # Other values are same as used by the weld-exclusion-list
21118 my $flag = $rOpts_break_open_compact_parens;
21122 $saw_opening_structure = 0;
21126 # NOTE: $seqno will be equal to closure var $type_sequence here
21127 my $seqno = $type_sequence_to_go[$i_opening];
21128 $saw_opening_structure =
21129 !$self->match_paren_control_flag( $seqno, $flag );
21133 # Set some more flags telling something about this container..
21134 my $is_simple_logical_expression;
21135 if ( $item_count_stack[$current_depth] == 0
21136 && $saw_opening_structure
21137 && $tokens_to_go[$i_opening] eq '('
21138 && $is_logical_container{ $container_type[$current_depth] } )
21141 # This seems to be a simple logical expression with
21142 # no existing breakpoints. Set a flag to prevent
21144 if ( !$has_comma_breakpoints ) {
21145 $is_simple_logical_expression = 1;
21148 #---------------------------------------------------
21149 # This seems to be a simple logical expression with
21150 # breakpoints (broken sublists, for example). Break
21151 # at all 'or's and '||'s.
21152 #---------------------------------------------------
21154 $self->set_logical_breakpoints($current_depth);
21158 # break long terms at any C-style for semicolons (c154)
21160 && @{ $rfor_semicolon_list[$current_depth] } )
21162 $self->set_for_semicolon_breakpoints($current_depth);
21164 # and open up a long 'for' or 'foreach' container to allow
21165 # leading term alignment unless -lp is used.
21166 $has_comma_breakpoints = 1 unless ($lp_object);
21169 #----------------------------------------------------------------
21170 # FINALLY: Break open container according to the flags which have
21172 #----------------------------------------------------------------
21175 # breaks for code BLOCKS are handled at a higher level
21178 # we do not need to break at the top level of an 'if'
21180 && !$is_simple_logical_expression
21182 ## modification to keep ': (' containers vertically tight;
21183 ## but probably better to let user set -vt=1 to avoid
21184 ## inconsistency with other paren types
21185 ## && ($container_type[$current_depth] ne ':')
21187 # otherwise, we require one of these reasons for breaking:
21190 # - this term has forced line breaks
21191 $has_comma_breakpoints
21193 # - the opening container is separated from this batch
21194 # for some reason (comment, blank line, code block)
21195 # - this is a non-paren container spanning multiple lines
21196 || !$saw_opening_structure
21198 # - this is a long block contained in another breakable
21200 || $is_long_term && !$self->is_in_block_by_i($i_opening)
21205 # do special -lp breaks at the CLOSING token for INTACT
21206 # blocks (because we might not do them if the block does
21209 my $K_begin_line = $lp_object->get_K_begin_line();
21210 my $i_begin_line = $K_begin_line - $K_to_go[0];
21211 $self->set_forced_lp_break( $i_begin_line, $i_opening );
21214 # break after opening structure.
21215 # note: break before closing structure will be automatic
21216 if ( $minimum_depth <= $current_depth ) {
21218 if ( $i_opening >= 0 ) {
21219 $self->set_forced_breakpoint($i_opening)
21220 unless ( $do_not_break_apart
21221 || is_unbreakable_container($current_depth) );
21224 # break at ',' of lower depth level before opening token
21225 if ( $last_comma_index[$depth] ) {
21226 $self->set_forced_breakpoint( $last_comma_index[$depth] );
21229 # break at '.' of lower depth level before opening token
21230 if ( $last_dot_index[$depth] ) {
21231 $self->set_forced_breakpoint( $last_dot_index[$depth] );
21234 # break before opening structure if preceded by another
21235 # closing structure and a comma. This is normally
21236 # done by the previous closing brace, but not
21237 # if it was a one-line block.
21238 if ( $i_opening > 2 ) {
21240 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
21244 my $type_prev = $types_to_go[$i_prev];
21245 my $token_prev = $tokens_to_go[$i_prev];
21248 && ( $types_to_go[ $i_prev - 1 ] eq ')'
21249 || $types_to_go[ $i_prev - 1 ] eq '}' )
21252 $self->set_forced_breakpoint($i_prev);
21255 # also break before something like ':(' or '?('
21257 elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/
21258 && $want_break_before{$token_prev} )
21260 $self->set_forced_breakpoint($i_prev);
21265 # break after comma following closing structure
21266 if ( $types_to_go[ $i + 1 ] eq ',' ) {
21267 $self->set_forced_breakpoint( $i + 1 );
21270 # break before an '=' following closing structure
21272 $is_assignment{$next_nonblank_type}
21273 && ( $breakpoint_stack[$current_depth] !=
21274 $forced_breakpoint_count )
21277 $self->set_forced_breakpoint($i);
21280 # break at any comma before the opening structure Added
21281 # for -lp, but seems to be good in general. It isn't
21282 # obvious how far back to look; the '5' below seems to
21283 # work well and will catch the comma in something like
21284 # push @list, myfunc( $param, $param, ..
21286 my $icomma = $last_comma_index[$depth];
21287 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
21288 unless ( $forced_breakpoint_to_go[$icomma] ) {
21289 $self->set_forced_breakpoint($icomma);
21294 #-----------------------------------------------------------
21295 # Break open a logical container open if it was already open
21296 #-----------------------------------------------------------
21297 elsif ($is_simple_logical_expression
21298 && $has_old_logical_breakpoints[$current_depth] )
21300 $self->set_logical_breakpoints($current_depth);
21303 # Handle long container which does not get opened up
21304 elsif ($is_long_term) {
21306 # must set fake breakpoint to alert outer containers that
21308 set_fake_breakpoint();
21312 } ## end sub break_lists_decreasing_depth
21313 } ## end closure break_lists
21320 # Added 'w' to fix b1172
21321 my @q = qw(k w i Z ->);
21322 @is_kwiZ{@q} = (1) x scalar(@q);
21324 # added = for b1211
21325 @q = qw<( [ { L R } ] ) = b>;
21327 @is_key_type{@q} = (1) x scalar(@q);
21330 use constant DEBUG_FIND_START => 0;
21332 sub find_token_starting_list {
21334 # When testing to see if a block will fit on one line, some
21335 # previous token(s) may also need to be on the line; particularly
21336 # if this is a sub call. So we will look back at least one
21338 my ( $self, $i_opening_paren ) = @_;
21340 # This will be the return index
21341 my $i_opening_minus = $i_opening_paren;
21343 if ( $i_opening_minus <= 0 ) {
21344 return $i_opening_minus;
21347 my $im1 = $i_opening_paren - 1;
21348 my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
21349 if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
21351 $type_prev_nb = $types_to_go[$iprev_nb];
21354 if ( $type_prev_nb eq ',' ) {
21356 # a previous comma is a good break point
21357 # $i_opening_minus = $i_opening_paren;
21361 $tokens_to_go[$i_opening_paren] eq '('
21363 # non-parens added here to fix case b1186
21364 || $is_kwiZ{$type_prev_nb}
21367 $i_opening_minus = $im1;
21369 # Walk back to improve length estimate...
21370 # FIX for cases b1169 b1170 b1171: start walking back
21371 # at the previous nonblank. This makes the result insensitive
21372 # to the flag --space-function-paren, and similar.
21373 # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
21374 foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
21375 if ( $is_key_type{ $types_to_go[$j] } ) {
21378 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
21381 $i_opening_minus = $j;
21383 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
21386 DEBUG_FIND_START && print <<EOM;
21387 FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
21390 return $i_opening_minus;
21391 } ## end sub find_token_starting_list
21393 { ## begin closure set_comma_breakpoints_final
21395 my %is_keyword_with_special_leading_term;
21399 # These keywords have prototypes which allow a special leading item
21400 # followed by a list
21402 qw(formline grep kill map printf sprintf push chmod join pack unshift);
21403 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
21406 use constant DEBUG_SPARSE => 0;
21408 sub comma_broken_sublist_rule {
21417 $i_true_last_comma,
21425 # Break at every comma except for a comma between two
21426 # simple, small terms. This prevents long vertical
21427 # columns of, say, just 0's.
21428 my $small_length = 10; # 2 + actual maximum length wanted
21430 # We'll insert a break in long runs of small terms to
21431 # allow alignment in uniform tables.
21432 my $skipped_count = 0;
21433 my $columns = table_columns_available($i_first_comma);
21434 my $fields = int( $columns / $small_length );
21435 if ( $rOpts_maximum_fields_per_table
21436 && $fields > $rOpts_maximum_fields_per_table )
21438 $fields = $rOpts_maximum_fields_per_table;
21440 my $max_skipped_count = $fields - 1;
21442 my $is_simple_last_term = 0;
21443 my $is_simple_next_term = 0;
21444 foreach my $j ( 0 .. $item_count ) {
21445 $is_simple_last_term = $is_simple_next_term;
21446 $is_simple_next_term = 0;
21447 if ( $j < $item_count
21448 && $ri_term_end->[$j] == $ri_term_begin->[$j]
21449 && $ritem_lengths->[$j] <= $small_length )
21451 $is_simple_next_term = 1;
21454 if ( $is_simple_last_term
21455 && $is_simple_next_term
21456 && $skipped_count < $max_skipped_count )
21461 $skipped_count = 0;
21462 my $i_tc = $ri_term_comma->[ $j - 1 ];
21463 last unless defined $i_tc;
21464 $self->set_forced_breakpoint($i_tc);
21468 # always break at the last comma if this list is
21469 # interrupted; we wouldn't want to leave a terminal '{', for
21471 if ($interrupted) {
21472 $self->set_forced_breakpoint($i_true_last_comma);
21477 sub set_emergency_comma_breakpoints {
21483 $number_of_fields_best,
21490 # The number of fields worked out to be negative, so we
21491 # have to make an emergency fix.
21493 my $rcomma_index = $rinput_hash->{rcomma_index};
21494 my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
21495 my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
21496 my $must_break_open = $rinput_hash->{must_break_open};
21498 # are we an item contained in an outer list?
21499 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
21501 # In many cases, it may be best to not force a break if there is just
21502 # one comma, because the standard continuation break logic will do a
21503 # better job without it.
21505 # In the common case that all but one of the terms can fit
21506 # on a single line, it may look better not to break open the
21507 # containing parens. Consider, for example
21511 # sort { $color_value{$::a} <=> $color_value{$::b}; }
21514 # which will look like this with the container broken:
21518 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
21521 # Here is an example of this rule for a long last term:
21523 # log_message( 0, 256, 128,
21524 # "Number of routes in adj-RIB-in to be considered: $peercount" );
21526 # And here is an example with a long first term:
21529 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
21530 # $r, $pu, $ps, $cu, $cs, $tt
21532 # if $style eq 'all';
21534 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
21536 my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0;
21537 my $long_first_term =
21538 $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <=
21541 # break at every comma ...
21544 # if requested by user or is best looking
21545 $number_of_fields_best == 1
21547 # or if this is a sublist of a larger list
21548 || $in_hierarchical_list
21550 # or if multiple commas and we don't have a long first or last
21552 || ( $comma_count > 1
21553 && !( $long_last_term || $long_first_term ) )
21556 foreach ( 0 .. $comma_count - 1 ) {
21557 $self->set_forced_breakpoint( $rcomma_index->[$_] );
21560 elsif ($long_last_term) {
21562 $self->set_forced_breakpoint($i_last_comma);
21563 ${$rdo_not_break_apart} = 1 unless $must_break_open;
21565 elsif ($long_first_term) {
21567 $self->set_forced_breakpoint($i_first_comma);
21571 # let breaks be defined by default bond strength logic
21576 sub set_comma_breakpoints_final {
21578 # Given a list of comma-separated items, set breakpoints at some of
21579 # the commas, if necessary, to make it easy to read.
21581 my ( $self, $rinput_hash ) = @_;
21583 my $depth = $rinput_hash->{depth};
21584 my $i_opening_paren = $rinput_hash->{i_opening_paren};
21585 my $i_closing_paren = $rinput_hash->{i_closing_paren};
21586 my $item_count = $rinput_hash->{item_count};
21587 my $identifier_count = $rinput_hash->{identifier_count};
21588 my $rcomma_index = $rinput_hash->{rcomma_index};
21589 my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
21590 my $list_type = $rinput_hash->{list_type};
21591 my $interrupted = $rinput_hash->{interrupted};
21592 my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
21593 my $must_break_open = $rinput_hash->{must_break_open};
21594 my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
21596 # nothing to do if no commas seen
21597 return if ( $item_count < 1 );
21599 my $i_first_comma = $rcomma_index->[0];
21600 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
21601 my $i_last_comma = $i_true_last_comma;
21602 if ( $i_last_comma >= $max_index_to_go ) {
21603 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
21604 return if ( $item_count < 1 );
21606 my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
21608 #-----------------------------------------------------------
21609 # Section A: Find lengths of all items in the list needed to
21610 # calculate page layout
21611 #-----------------------------------------------------------
21612 my $comma_count = $item_count;
21614 my $ritem_lengths = [];
21615 my $ri_term_begin = [];
21616 my $ri_term_end = [];
21617 my $ri_term_comma = [];
21619 my $rmax_length = [ 0, 0 ];
21622 my $first_term_length;
21623 my $i = $i_opening_paren;
21626 foreach my $j ( 0 .. $comma_count - 1 ) {
21627 $is_odd = 1 - $is_odd;
21628 $i_prev_plus = $i + 1;
21629 $i = $rcomma_index->[$j];
21632 ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
21634 ( $types_to_go[$i_prev_plus] eq 'b' )
21637 push @{$ri_term_begin}, $i_term_begin;
21638 push @{$ri_term_end}, $i_term_end;
21639 push @{$ri_term_comma}, $i;
21641 # note: currently adding 2 to all lengths (for comma and space)
21643 2 + token_sequence_length( $i_term_begin, $i_term_end );
21644 push @{$ritem_lengths}, $length;
21647 $first_term_length = $length;
21651 if ( $length > $rmax_length->[$is_odd] ) {
21652 $rmax_length->[$is_odd] = $length;
21657 # now we have to make a distinction between the comma count and item
21658 # count, because the item count will be one greater than the comma
21659 # count if the last item is not terminated with a comma
21661 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
21662 ? $i_last_comma + 1
21665 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
21666 ? $i_closing_paren - 2
21667 : $i_closing_paren - 1;
21668 my $i_effective_last_comma = $i_last_comma;
21670 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
21672 if ( $last_item_length > 0 ) {
21674 # add 2 to length because other lengths include a comma and a blank
21675 $last_item_length += 2;
21676 push @{$ritem_lengths}, $last_item_length;
21677 push @{$ri_term_begin}, $i_b + 1;
21678 push @{$ri_term_end}, $i_e;
21679 push @{$ri_term_comma}, undef;
21681 my $i_odd = $item_count % 2;
21683 if ( $last_item_length > $rmax_length->[$i_odd] ) {
21684 $rmax_length->[$i_odd] = $last_item_length;
21688 $i_effective_last_comma = $i_e + 1;
21690 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
21691 $identifier_count++;
21695 # End of length calculations
21697 #-----------------------------------------
21698 # Section B: Handle some special cases ...
21699 #-----------------------------------------
21701 #-------------------------------------------------------------
21702 # Special Case B1: Compound List Rule 1:
21703 # Break at (almost) every comma for a list containing a broken
21704 # sublist. This has higher priority than the Interrupted List
21706 #-------------------------------------------------------------
21707 if ($has_broken_sublist) {
21709 $self->comma_broken_sublist_rule(
21714 $i_true_last_comma,
21724 #my ( $a, $b, $c ) = caller();
21725 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
21726 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
21727 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
21729 #--------------------------------------------------------------
21730 # Special Case B2: Interrupted List Rule:
21731 # A list is forced to use old breakpoints if it was interrupted
21732 # by side comments or blank lines, or requested by user.
21733 #--------------------------------------------------------------
21734 if ( $rOpts_break_at_old_comma_breakpoints
21736 || $i_opening_paren < 0 )
21738 $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
21742 my $opening_token = $tokens_to_go[$i_opening_paren];
21743 my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
21745 #-----------------------------------------------------------------
21746 # Special Case B3: If it fits on one line, return and let the line
21747 # break logic decide if and where to break.
21748 #-----------------------------------------------------------------
21750 # The -bbxi=2 parameters can add an extra hidden level of indentation
21751 # so they need a tolerance to avoid instability. Fixes b1259, 1260.
21753 if ( $break_before_container_types{$opening_token}
21754 && $container_indentation_options{$opening_token}
21755 && $container_indentation_options{$opening_token} == 2 )
21757 $tol = $rOpts_indent_columns;
21759 # use greater of -ci and -i (fix for case b1334)
21760 if ( $tol < $rOpts_continuation_indentation ) {
21761 $tol = $rOpts_continuation_indentation;
21765 my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
21767 $self->excess_line_length( $i_opening_minus, $i_closing_paren );
21768 return if ( $excess + $tol <= 0 );
21770 #---------------------------------------
21771 # Section C: Handle a multiline list ...
21772 #---------------------------------------
21774 #---------------------------------------------------------------
21775 # Section C1: Determine '$number_of_fields' = the best number of
21776 # fields to use if this is to be formatted as a table.
21777 #---------------------------------------------------------------
21779 # Now we know that this block spans multiple lines; we have to set
21780 # at least one breakpoint -- real or fake -- as a signal to break
21781 # open any outer containers.
21782 set_fake_breakpoint();
21784 # be sure we do not extend beyond the current list length
21785 if ( $i_effective_last_comma >= $max_index_to_go ) {
21786 $i_effective_last_comma = $max_index_to_go - 1;
21789 # Set a flag indicating if we need to break open to keep -lp
21790 # items aligned. This is necessary if any of the list terms
21791 # exceeds the available space after the '('.
21792 my $need_lp_break_open = $must_break_open;
21793 if ( $is_lp_formatting && !$must_break_open ) {
21794 my $columns_if_unbroken =
21795 $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
21796 - total_line_length( $i_opening_minus, $i_opening_paren );
21797 $need_lp_break_open =
21798 ( $rmax_length->[0] > $columns_if_unbroken )
21799 || ( $rmax_length->[1] > $columns_if_unbroken )
21800 || ( $first_term_length > $columns_if_unbroken );
21803 # Specify if the list must have an even number of fields or not.
21804 # It is generally safest to assume an even number, because the
21805 # list items might be a hash list. But if we can be sure that
21806 # it is not a hash, then we can allow an odd number for more
21808 # 1 = odd field count ok, 2 = want even count
21809 my $odd_or_even = 2;
21810 if ( $identifier_count >= $item_count - 1
21811 || $is_assignment{$next_nonblank_type}
21812 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
21818 # do we have a long first term which should be
21819 # left on a line by itself?
21820 my $use_separate_first_term = (
21821 $odd_or_even == 1 # only if we can use 1 field/line
21822 && $item_count > 3 # need several items
21823 && $first_term_length >
21824 2 * $rmax_length->[0] - 2 # need long first term
21825 && $first_term_length >
21826 2 * $rmax_length->[1] - 2 # need long first term
21829 # or do we know from the type of list that the first term should
21831 if ( !$use_separate_first_term ) {
21832 if ( $is_keyword_with_special_leading_term{$list_type} ) {
21833 $use_separate_first_term = 1;
21835 # should the container be broken open?
21836 if ( $item_count < 3 ) {
21837 if ( $i_first_comma - $i_opening_paren < 4 ) {
21838 ${$rdo_not_break_apart} = 1;
21841 elsif ($first_term_length < 20
21842 && $i_first_comma - $i_opening_paren < 4 )
21844 my $columns = table_columns_available($i_first_comma);
21845 if ( $first_term_length < $columns ) {
21846 ${$rdo_not_break_apart} = 1;
21853 if ($use_separate_first_term) {
21855 # ..set a break and update starting values
21856 $use_separate_first_term = 1;
21857 $self->set_forced_breakpoint($i_first_comma);
21858 $i_opening_paren = $i_first_comma;
21859 $i_first_comma = $rcomma_index->[1];
21861 return if $comma_count == 1;
21862 shift @{$ritem_lengths};
21863 shift @{$ri_term_begin};
21864 shift @{$ri_term_end};
21865 shift @{$ri_term_comma};
21868 # if not, update the metrics to include the first term
21870 if ( $first_term_length > $rmax_length->[0] ) {
21871 $rmax_length->[0] = $first_term_length;
21875 # Field width parameters
21876 my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] );
21878 ( $rmax_length->[0] > $rmax_length->[1] )
21879 ? $rmax_length->[0]
21880 : $rmax_length->[1];
21882 # Number of free columns across the page width for laying out tables
21883 my $columns = table_columns_available($i_first_comma);
21885 # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
21886 # to break after an opening paren, then the maximum line length for the
21887 # first line could be less than the later lines. So we need to reduce
21888 # the line length. Normally, we will get a break after an opening
21889 # paren, but in some cases we might not.
21890 if ( $rOpts_variable_maximum_line_length
21891 && $tokens_to_go[$i_opening_paren] eq '('
21892 && @{$ri_term_begin} )
21894 my $ib = $ri_term_begin->[0];
21895 my $type = $types_to_go[$ib];
21897 # So far, the only known instance of this problem is when
21898 # a bareword follows an opening paren with -vmll
21899 if ( $type eq 'w' ) {
21901 # If a line starts with paren+space+terms, then its max length
21902 # could be up to ci+2-i spaces less than if the term went out
21903 # on a line after the paren. So..
21904 my $tol_w = max( 0,
21905 2 + $rOpts_continuation_indentation -
21906 $rOpts_indent_columns );
21907 $columns = max( 0, $columns - $tol_w );
21909 ## Here is the original b1210 fix, but it failed on b1216-b1218
21910 ##my $columns2 = table_columns_available($i_opening_paren);
21911 ##$columns = min( $columns, $columns2 );
21915 # Estimated maximum number of fields which fit this space.
21916 # This will be our first guess:
21917 my $number_of_fields_max =
21918 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
21920 my $number_of_fields = $number_of_fields_max;
21922 # Find the best-looking number of fields.
21923 # This will be our second guess, if possible.
21924 my ( $number_of_fields_best, $ri_ragged_break_list,
21925 $new_identifier_count )
21926 = $self->study_list_complexity( $ri_term_begin, $ri_term_end,
21927 $ritem_lengths, $max_width );
21929 if ( $number_of_fields_best != 0
21930 && $number_of_fields_best < $number_of_fields_max )
21932 $number_of_fields = $number_of_fields_best;
21935 # If we are crowded and the -lp option is being used, try
21936 # to undo some indentation
21940 $number_of_fields == 0
21941 || ( $number_of_fields == 1
21942 && $number_of_fields != $number_of_fields_best )
21946 ( $number_of_fields, $number_of_fields_best, $columns ) =
21947 $self->lp_table_fix(
21953 $number_of_fields_best,
21961 # try for one column if two won't work
21962 if ( $number_of_fields <= 0 ) {
21963 $number_of_fields = int( $columns / $max_width );
21966 # The user can place an upper bound on the number of fields,
21967 # which can be useful for doing maintenance on tables
21968 if ( $rOpts_maximum_fields_per_table
21969 && $number_of_fields > $rOpts_maximum_fields_per_table )
21971 $number_of_fields = $rOpts_maximum_fields_per_table;
21974 # How many columns (characters) and lines would this container take
21975 # if no additional whitespace were added?
21976 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
21977 $i_effective_last_comma + 1 );
21978 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
21979 my $packed_lines = 1 + int( $packed_columns / $columns );
21981 # are we an item contained in an outer list?
21982 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
21984 #-----------------------------------------------------------------
21985 # Section C2: Stop here if we did not compute a positive number of
21986 # fields. In this case we just have to bail out.
21987 #-----------------------------------------------------------------
21988 if ( $number_of_fields <= 0 ) {
21990 $self->set_emergency_comma_breakpoints(
21992 $number_of_fields_best,
22001 #------------------------------------------------------------------
22002 # Section C3: We have a tentative field count that seems to work.
22003 # Now we must look more closely to determine if a table layout will
22004 # actually look okay.
22005 #------------------------------------------------------------------
22007 # How many lines will this require?
22008 my $formatted_lines = $item_count / ($number_of_fields);
22009 if ( $formatted_lines != int $formatted_lines ) {
22010 $formatted_lines = 1 + int $formatted_lines;
22013 # So far we've been trying to fill out to the right margin. But
22014 # compact tables are easier to read, so let's see if we can use fewer
22015 # fields without increasing the number of lines.
22016 $number_of_fields =
22017 compactify_table( $item_count, $number_of_fields, $formatted_lines,
22020 # How many spaces across the page will we fill?
22021 my $columns_per_line =
22022 ( int $number_of_fields / 2 ) * $pair_width +
22023 ( $number_of_fields % 2 ) * $max_width;
22025 my $formatted_columns;
22027 if ( $number_of_fields > 1 ) {
22028 $formatted_columns =
22029 ( $pair_width * ( int( $item_count / 2 ) ) +
22030 ( $item_count % 2 ) * $max_width );
22033 $formatted_columns = $max_width * $item_count;
22035 if ( $formatted_columns < $packed_columns ) {
22036 $formatted_columns = $packed_columns;
22039 my $unused_columns = $formatted_columns - $packed_columns;
22041 # set some empirical parameters to help decide if we should try to
22042 # align; high sparsity does not look good, especially with few lines
22043 my $sparsity = ($unused_columns) / ($formatted_columns);
22044 my $max_allowed_sparsity =
22045 ( $item_count < 3 ) ? 0.1
22046 : ( $packed_lines == 1 ) ? 0.15
22047 : ( $packed_lines == 2 ) ? 0.4
22050 my $two_line_word_wrap_ok;
22051 if ( $opening_token eq '(' ) {
22053 # default is to allow wrapping of short paren lists
22054 $two_line_word_wrap_ok = 1;
22056 # but turn off word wrap where requested
22057 if ($rOpts_break_open_compact_parens) {
22059 # This parameter is a one-character flag, as follows:
22060 # '0' matches no parens -> break open NOT OK -> word wrap OK
22061 # '1' matches all parens -> break open OK -> word wrap NOT OK
22062 # Other values are the same as used by the weld-exclusion-list
22063 my $flag = $rOpts_break_open_compact_parens;
22067 $two_line_word_wrap_ok = 0;
22069 elsif ( $flag eq '0' ) {
22070 $two_line_word_wrap_ok = 1;
22073 my $seqno = $type_sequence_to_go[$i_opening_paren];
22074 $two_line_word_wrap_ok =
22075 !$self->match_paren_control_flag( $seqno, $flag );
22080 #-------------------------------------------------------------------
22081 # Section C4: Check for shortcut methods, which avoid treating
22082 # a list as a table for relatively small parenthesized lists. These
22083 # are usually easier to read if not formatted as tables.
22084 #-------------------------------------------------------------------
22086 $packed_lines <= 2 # probably can fit in 2 lines
22087 && $item_count < 9 # doesn't have too many items
22088 && $opening_is_in_block # not a sub-container
22089 && $two_line_word_wrap_ok # ok to wrap this paren list
22093 # Section C4A: Shortcut method 1: for -lp and just one comma:
22094 # This is a no-brainer, just break at the comma.
22096 $is_lp_formatting # -lp
22097 && $item_count == 2 # two items, one comma
22098 && !$must_break_open
22101 my $i_break = $rcomma_index->[0];
22102 $self->set_forced_breakpoint($i_break);
22103 ${$rdo_not_break_apart} = 1;
22108 # Section C4B: Shortcut method 2 is for most small ragged lists
22109 # which might look best if not displayed as a table.
22111 ( $number_of_fields == 2 && $item_count == 3 )
22113 $new_identifier_count > 0 # isn't all quotes
22114 && $sparsity > 0.15
22115 ) # would be fairly spaced gaps if aligned
22119 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
22120 $ri_ragged_break_list );
22121 ++$break_count if ($use_separate_first_term);
22123 # NOTE: we should really use the true break count here,
22124 # which can be greater if there are large terms and
22125 # little space, but usually this will work well enough.
22126 unless ($must_break_open) {
22128 if ( $break_count <= 1 ) {
22129 ${$rdo_not_break_apart} = 1;
22131 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
22132 ${$rdo_not_break_apart} = 1;
22138 } ## end shortcut methods
22141 DEBUG_SPARSE && do {
22143 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
22147 #------------------------------------------------------------------
22148 # Section C5: Compound List Rule 2:
22149 # If this list is too long for one line, and it is an item of a
22150 # larger list, then we must format it, regardless of sparsity
22151 # (ian.t). One reason that we have to do this is to trigger
22152 # Compound List Rule 1, above, which causes breaks at all commas of
22153 # all outer lists. In this way, the structure will be properly
22155 #------------------------------------------------------------------
22157 # Decide if this list is too long for one line unless broken
22158 my $total_columns = table_columns_available($i_opening_paren);
22159 my $too_long = $packed_columns > $total_columns;
22161 # For a paren list, include the length of the token just before the
22162 # '(' because this is likely a sub call, and we would have to
22163 # include the sub name on the same line as the list. This is still
22164 # imprecise, but not too bad. (steve.t)
22165 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
22167 $too_long = $self->excess_line_length( $i_opening_minus,
22168 $i_effective_last_comma + 1 ) > 0;
22171 # TODO: For an item after a '=>', try to include the length of the
22172 # thing before the '=>'. This is crude and should be improved by
22173 # actually looking back token by token.
22174 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
22175 my $i_opening_minus_test = $i_opening_paren - 4;
22176 if ( $i_opening_minus >= 0 ) {
22177 $too_long = $self->excess_line_length( $i_opening_minus_test,
22178 $i_effective_last_comma + 1 ) > 0;
22182 # Always break lists contained in '[' and '{' if too long for 1 line,
22183 # and always break lists which are too long and part of a more complex
22185 my $must_break_open_container = $must_break_open
22187 && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
22189 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
22191 #--------------------------------------------------------------------
22192 # Section C6: A table will work here. But do not attempt to align
22193 # columns if this is a tiny table or it would be too spaced. It
22194 # seems that the more packed lines we have, the sparser the list that
22195 # can be allowed and still look ok.
22196 #--------------------------------------------------------------------
22198 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
22199 || ( $formatted_lines < 2 )
22200 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
22203 #----------------------------------------------------------------
22204 # Section C6A: too sparse: would not look good aligned in a table
22205 #----------------------------------------------------------------
22207 # use old breakpoints if this is a 'big' list
22208 if ( $packed_lines > 2 && $item_count > 10 ) {
22209 write_logfile_entry("List sparse: using old breakpoints\n");
22210 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
22213 # let the continuation logic handle it if 2 lines
22216 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
22217 $ri_ragged_break_list );
22218 ++$break_count if ($use_separate_first_term);
22220 unless ($must_break_open_container) {
22221 if ( $break_count <= 1 ) {
22222 ${$rdo_not_break_apart} = 1;
22224 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
22225 ${$rdo_not_break_apart} = 1;
22232 #--------------------------------------------
22233 # Section C6B: Go ahead and format as a table
22234 #--------------------------------------------
22235 $self->write_formatted_table( $number_of_fields, $comma_count,
22236 $rcomma_index, $use_separate_first_term );
22239 } ## end sub set_comma_breakpoints_final
22243 # try to undo some -lp indentation to improve table formatting
22253 $number_of_fields_best,
22260 my $available_spaces =
22261 $self->get_available_spaces_to_go($i_first_comma);
22262 if ( $available_spaces > 0 ) {
22264 my $spaces_wanted = $max_width - $columns; # for 1 field
22266 if ( $number_of_fields_best == 0 ) {
22267 $number_of_fields_best =
22268 get_maximum_fields_wanted($ritem_lengths);
22271 if ( $number_of_fields_best != 1 ) {
22272 my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields
22273 if ( $available_spaces > $spaces_wanted_2 ) {
22274 $spaces_wanted = $spaces_wanted_2;
22278 if ( $spaces_wanted > 0 ) {
22279 my $deleted_spaces =
22280 $self->reduce_lp_indentation( $i_first_comma,
22284 if ( $deleted_spaces > 0 ) {
22285 $columns = table_columns_available($i_first_comma);
22286 $number_of_fields =
22287 maximum_number_of_fields( $columns, $odd_or_even,
22288 $max_width, $pair_width );
22290 if ( $number_of_fields_best == 1
22291 && $number_of_fields >= 1 )
22293 $number_of_fields = $number_of_fields_best;
22298 return ( $number_of_fields, $number_of_fields_best, $columns );
22299 } ## end sub lp_table_fix
22301 sub write_formatted_table {
22303 # Write a table of comma separated items with fixed number of fields
22304 my ( $self, $number_of_fields, $comma_count, $rcomma_index,
22305 $use_separate_first_term )
22308 write_logfile_entry(
22309 "List: auto formatting with $number_of_fields fields/row\n");
22311 my $j_first_break =
22312 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
22314 my $j = $j_first_break;
22315 while ( $j < $comma_count ) {
22316 my $i_comma = $rcomma_index->[$j];
22317 $self->set_forced_breakpoint($i_comma);
22318 $j += $number_of_fields;
22322 } ## end closure set_comma_breakpoints_final
22324 sub study_list_complexity {
22326 # Look for complex tables which should be formatted with one term per line.
22327 # Returns the following:
22329 # \@i_ragged_break_list = list of good breakpoints to avoid lines
22330 # which are hard to read
22331 # $number_of_fields_best = suggested number of fields based on
22332 # complexity; = 0 if any number may be used.
22334 my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
22335 my $item_count = @{$ri_term_begin};
22336 my $complex_item_count = 0;
22337 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
22338 my $i_max = @{$ritem_lengths} - 1;
22339 ##my @item_complexity;
22341 my $i_last_last_break = -3;
22342 my $i_last_break = -2;
22343 my @i_ragged_break_list;
22345 my $definitely_complex = 30;
22346 my $definitely_simple = 12;
22347 my $quote_count = 0;
22349 for my $i ( 0 .. $i_max ) {
22350 my $ib = $ri_term_begin->[$i];
22351 my $ie = $ri_term_end->[$i];
22353 # define complexity: start with the actual term length
22354 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
22356 ##TBD: join types here and check for variations
22357 ##my $str=join "", @tokens_to_go[$ib..$ie];
22360 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
22364 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
22368 if ( $ib eq $ie ) {
22369 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
22370 $complex_item_count++;
22371 $weighted_length *= 2;
22377 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
22378 $complex_item_count++;
22379 $weighted_length *= 2;
22381 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
22382 $weighted_length += 4;
22386 # add weight for extra tokens.
22387 $weighted_length += 2 * ( $ie - $ib );
22389 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
22390 ## print "# COMPLEXITY:$weighted_length $BUB\n";
22392 ##push @item_complexity, $weighted_length;
22394 # now mark a ragged break after this item it if it is 'long and
22396 if ( $weighted_length >= $definitely_complex ) {
22398 # if we broke after the previous term
22399 # then break before it too
22400 if ( $i_last_break == $i - 1
22402 && $i_last_last_break != $i - 2 )
22405 ## TODO: don't strand a small term
22406 pop @i_ragged_break_list;
22407 push @i_ragged_break_list, $i - 2;
22408 push @i_ragged_break_list, $i - 1;
22411 push @i_ragged_break_list, $i;
22412 $i_last_last_break = $i_last_break;
22413 $i_last_break = $i;
22416 # don't break before a small last term -- it will
22417 # not look good on a line by itself.
22418 elsif ($i == $i_max
22419 && $i_last_break == $i - 1
22420 && $weighted_length <= $definitely_simple )
22422 pop @i_ragged_break_list;
22426 my $identifier_count = $i_max + 1 - $quote_count;
22428 # Need more tuning here..
22429 if ( $max_width > 12
22430 && $complex_item_count > $item_count / 2
22431 && $number_of_fields_best != 2 )
22433 $number_of_fields_best = 1;
22436 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
22437 } ## end sub study_list_complexity
22439 sub get_maximum_fields_wanted {
22441 # Not all tables look good with more than one field of items.
22442 # This routine looks at a table and decides if it should be
22443 # formatted with just one field or not.
22444 # This coding is still under development.
22445 my ($ritem_lengths) = @_;
22447 my $number_of_fields_best = 0;
22449 # For just a few items, we tentatively assume just 1 field.
22450 my $item_count = @{$ritem_lengths};
22451 if ( $item_count <= 5 ) {
22452 $number_of_fields_best = 1;
22455 # For larger tables, look at it both ways and see what looks best
22459 my @max_length = ( 0, 0 );
22460 my @last_length_2 = ( undef, undef );
22461 my @first_length_2 = ( undef, undef );
22462 my $last_length = undef;
22463 my $total_variation_1 = 0;
22464 my $total_variation_2 = 0;
22465 my @total_variation_2 = ( 0, 0 );
22467 foreach my $j ( 0 .. $item_count - 1 ) {
22469 $is_odd = 1 - $is_odd;
22470 my $length = $ritem_lengths->[$j];
22471 if ( $length > $max_length[$is_odd] ) {
22472 $max_length[$is_odd] = $length;
22475 if ( defined($last_length) ) {
22476 my $dl = abs( $length - $last_length );
22477 $total_variation_1 += $dl;
22479 $last_length = $length;
22481 my $ll = $last_length_2[$is_odd];
22482 if ( defined($ll) ) {
22483 my $dl = abs( $length - $ll );
22484 $total_variation_2[$is_odd] += $dl;
22487 $first_length_2[$is_odd] = $length;
22489 $last_length_2[$is_odd] = $length;
22491 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
22493 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
22494 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
22495 $number_of_fields_best = 1;
22498 return ($number_of_fields_best);
22499 } ## end sub get_maximum_fields_wanted
22501 sub table_columns_available {
22502 my $i_first_comma = shift;
22504 $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
22505 leading_spaces_to_go($i_first_comma);
22507 # Patch: the vertical formatter does not line up lines whose lengths
22508 # exactly equal the available line length because of allowances
22509 # that must be made for side comments. Therefore, the number of
22510 # available columns is reduced by 1 character.
22513 } ## end sub table_columns_available
22515 sub maximum_number_of_fields {
22517 # how many fields will fit in the available space?
22518 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
22519 my $max_pairs = int( $columns / $pair_width );
22520 my $number_of_fields = $max_pairs * 2;
22521 if ( $odd_or_even == 1
22522 && $max_pairs * $pair_width + $max_width <= $columns )
22524 $number_of_fields++;
22526 return $number_of_fields;
22527 } ## end sub maximum_number_of_fields
22529 sub compactify_table {
22531 # given a table with a certain number of fields and a certain number
22532 # of lines, see if reducing the number of fields will make it look
22534 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
22535 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
22537 my $min_fields = $number_of_fields;
22539 while ($min_fields >= $odd_or_even
22540 && $min_fields * $formatted_lines >= $item_count )
22542 $number_of_fields = $min_fields;
22543 $min_fields -= $odd_or_even;
22546 return $number_of_fields;
22547 } ## end sub compactify_table
22549 sub set_ragged_breakpoints {
22551 # Set breakpoints in a list that cannot be formatted nicely as a
22553 my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
22555 my $break_count = 0;
22556 foreach ( @{$ri_ragged_break_list} ) {
22557 my $j = $ri_term_comma->[$_];
22559 $self->set_forced_breakpoint($j);
22563 return $break_count;
22564 } ## end sub set_ragged_breakpoints
22566 sub copy_old_breakpoints {
22567 my ( $self, $i_first_comma, $i_last_comma ) = @_;
22568 for my $i ( $i_first_comma .. $i_last_comma ) {
22569 if ( $old_breakpoint_to_go[$i] ) {
22571 # If the comma style is under certain controls, and if this is a
22572 # comma breakpoint with the comma is at the beginning of the next
22573 # line, then we must pass that index instead. This will allow sub
22574 # set_forced_breakpoints to check and follow the user settings. This
22575 # produces a uniform style and can prevent instability (b1422).
22577 # The flag '$controlled_comma_style' will be set if the user
22578 # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not
22579 # needed or set for the -boc flag.
22581 if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) {
22582 my $index = $inext_to_go[$ibreak];
22583 if ( $index > $ibreak && $types_to_go[$index] eq ',' ) {
22587 $self->set_forced_breakpoint($ibreak);
22594 my ( $self, $i, $j ) = @_;
22595 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
22598 my ( $a, $b, $c ) = caller();
22600 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
22603 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
22606 # shouldn't happen; non-critical error
22609 my ( $a, $b, $c ) = caller();
22611 NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go
22616 } ## end sub set_nobreaks
22618 ###############################################
22619 # CODE SECTION 12: Code for setting indentation
22620 ###############################################
22622 sub token_sequence_length {
22624 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
22625 my ( $ibeg, $iend ) = @_;
22627 # fix possible negative starting index
22628 if ( $ibeg < 0 ) { $ibeg = 0 }
22630 # returns 0 if index range is empty (some subs assume this)
22631 if ( $ibeg > $iend ) {
22635 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
22636 } ## end sub token_sequence_length
22638 sub total_line_length {
22640 # return length of a line of tokens ($ibeg .. $iend)
22641 my ( $ibeg, $iend ) = @_;
22643 # Start with the leading spaces on this line ...
22644 my $length = $leading_spaces_to_go[$ibeg];
22645 if ( ref($length) ) { $length = $length->get_spaces() }
22647 # ... then add the net token length
22649 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
22652 } ## end sub total_line_length
22654 sub excess_line_length {
22656 # return number of characters by which a line of tokens ($ibeg..$iend)
22657 # exceeds the allowable line length.
22658 # NOTE: profiling shows that efficiency of this routine is essential.
22660 my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
22662 # Start with the leading spaces on this line ...
22663 my $excess = $leading_spaces_to_go[$ibeg];
22664 if ( ref($excess) ) { $excess = $excess->get_spaces() }
22666 # ... then add the net token length, minus the maximum length
22668 $summed_lengths_to_go[ $iend + 1 ] -
22669 $summed_lengths_to_go[$ibeg] -
22670 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
22672 # ... and include right weld lengths unless requested not to
22673 if ( $total_weld_count
22674 && $type_sequence_to_go[$iend]
22675 && !$ignore_right_weld )
22677 my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
22678 $excess += $wr if defined($wr);
22682 } ## end sub excess_line_length
22686 # return the number of leading spaces associated with an indentation
22687 # variable $indentation is either a constant number of spaces or an object
22688 # with a get_spaces method.
22689 my $indentation = shift;
22690 return ref($indentation) ? $indentation->get_spaces() : $indentation;
22693 sub get_recoverable_spaces {
22695 # return the number of spaces (+ means shift right, - means shift left)
22696 # that we would like to shift a group of lines with the same indentation
22697 # to get them to line up with their opening parens
22698 my $indentation = shift;
22699 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
22702 sub get_available_spaces_to_go {
22704 my ( $self, $ii ) = @_;
22705 my $item = $leading_spaces_to_go[$ii];
22707 # return the number of available leading spaces associated with an
22708 # indentation variable. $indentation is either a constant number of
22709 # spaces or an object with a get_available_spaces method.
22710 return ref($item) ? $item->get_available_spaces() : 0;
22711 } ## end sub get_available_spaces_to_go
22713 { ## begin closure set_lp_indentation
22715 use constant DEBUG_LP => 0;
22717 # Stack of -lp index objects which survives between batches.
22721 # The predicted position of the next opening container which may start
22722 # an -lp indentation level. This survives between batches.
22723 my $lp_position_predictor;
22727 # Index names for the -lp stack variables.
22728 # Do not combine with other BEGIN blocks (c101).
22732 _lp_ci_level_ => $i++,
22733 _lp_level_ => $i++,
22734 _lp_object_ => $i++,
22735 _lp_container_seqno_ => $i++,
22736 _lp_space_count_ => $i++,
22740 sub initialize_lp_vars {
22742 # initialize gnu variables for a new file;
22743 # must be called once at the start of a new file.
22745 $lp_position_predictor = 0;
22748 # we can turn off -lp if all levels will be at or above the cutoff
22749 if ( $high_stress_level <= 1 ) {
22750 $rOpts_line_up_parentheses = 0;
22751 $rOpts_extended_line_up_parentheses = 0;
22756 # initialize the leading whitespace stack to negative levels
22757 # so that we can never run off the end of the stack
22758 $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
22759 $rLP->[$max_lp_stack]->[_lp_level_] = -1;
22760 $rLP->[$max_lp_stack]->[_lp_object_] = undef;
22761 $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
22762 $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
22765 } ## end sub initialize_lp_vars
22767 # hashes for efficient testing
22773 my @q = qw< } ) ] >;
22774 @hash_test1{@q} = (1) x scalar(@q);
22777 @hash_test2{@q} = (1) x scalar(@q);
22778 @q = qw( . || && );
22779 @hash_test3{@q} = (1) x scalar(@q);
22782 # shared variables, re-initialized for each batch
22783 my $rlp_object_list;
22784 my $max_lp_object_list;
22785 my %lp_comma_count;
22786 my %lp_arrow_count;
22789 my $current_ci_level;
22793 my $K_last_nonblank;
22794 my $last_nonblank_token;
22795 my $last_nonblank_type;
22796 my $last_last_nonblank_type;
22798 sub set_lp_indentation {
22802 #------------------------------------------------------------------
22803 # Define the leading whitespace for all tokens in the current batch
22804 # when the -lp formatting is selected.
22805 #------------------------------------------------------------------
22807 return unless ($rOpts_line_up_parentheses);
22808 return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
22810 # List of -lp indentation objects created in this batch
22811 $rlp_object_list = [];
22812 $max_lp_object_list = -1;
22814 %lp_comma_count = ();
22815 %lp_arrow_count = ();
22816 $space_count = undef;
22817 $current_level = undef;
22818 $current_ci_level = undef;
22819 $ii_begin_line = 0;
22821 $stack_changed = 1;
22822 $K_last_nonblank = undef;
22823 $last_nonblank_token = EMPTY_STRING;
22824 $last_nonblank_type = EMPTY_STRING;
22825 $last_last_nonblank_type = EMPTY_STRING;
22827 my %last_lp_equals = ();
22829 my $rLL = $self->[_rLL_];
22830 my $Klimit = $self->[_Klimit_];
22831 my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
22832 my $radjusted_levels = $self->[_radjusted_levels_];
22834 my $nws = @{$radjusted_levels};
22837 # The 'starting_in_quote' flag means that the first token is the first
22838 # token of a line and it is also the continuation of some kind of
22839 # multi-line quote or pattern. It must have no added leading
22840 # whitespace, so we can skip it.
22841 if ($starting_in_quote) {
22845 my $Kpnb = $K_to_go[0] - 1;
22846 if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
22849 if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
22850 $K_last_nonblank = $Kpnb;
22853 if ( defined($K_last_nonblank) ) {
22854 $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
22855 $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
22858 #-----------------------------------
22859 # Loop over all tokens in this batch
22860 #-----------------------------------
22861 foreach my $ii ( $imin .. $max_index_to_go ) {
22863 my $type = $types_to_go[$ii];
22864 my $token = $tokens_to_go[$ii];
22865 my $level = $levels_to_go[$ii];
22866 my $ci_level = $ci_levels_to_go[$ii];
22867 my $total_depth = $nesting_depth_to_go[$ii];
22869 #--------------------------------------------------
22870 # Adjust levels if necessary to recycle whitespace:
22871 #--------------------------------------------------
22872 if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
22874 my $KK = $K_to_go[$ii];
22875 $level = $radjusted_levels->[$KK];
22876 if ( $level < 0 ) {
22878 # should not happen
22879 DEVEL_MODE && Fault("unexpected level=$level\n");
22884 # get the top state from the stack if it has changed
22885 if ($stack_changed) {
22886 my $rLP_top = $rLP->[$max_lp_stack];
22887 my $lp_object = $rLP_top->[_lp_object_];
22889 ( $space_count, $current_level, $current_ci_level ) =
22890 @{ $lp_object->get_spaces_level_ci() };
22893 $current_ci_level = $rLP_top->[_lp_ci_level_];
22894 $current_level = $rLP_top->[_lp_level_];
22895 $space_count = $rLP_top->[_lp_space_count_];
22897 $stack_changed = 0;
22900 #------------------------------------------------------------
22901 # Break at a previous '=' if necessary to control line length
22902 #------------------------------------------------------------
22903 if ( $type eq '{' || $type eq '(' ) {
22904 $lp_comma_count{ $total_depth + 1 } = 0;
22905 $lp_arrow_count{ $total_depth + 1 } = 0;
22907 # If we come to an opening token after an '=' token of some
22908 # type, see if it would be helpful to 'break' after the '=' to
22910 my $ii_last_equals = $last_lp_equals{$total_depth};
22911 if ($ii_last_equals) {
22912 $self->lp_equals_break_check( $ii, $ii_last_equals );
22916 #------------------------
22917 # Handle decreasing depth
22918 #------------------------
22919 # Note that one token may have both decreasing and then increasing
22920 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
22921 # in this example we would first go back to (1,0) then up to (2,0)
22922 # in a single call.
22923 if ( $level < $current_level || $ci_level < $current_ci_level ) {
22924 $self->lp_decreasing_depth($ii);
22927 #------------------------
22928 # handle increasing depth
22929 #------------------------
22930 if ( $level > $current_level || $ci_level > $current_ci_level ) {
22931 $self->lp_increasing_depth($ii);
22934 #------------------
22935 # Handle all tokens
22936 #------------------
22937 if ( $type ne 'b' ) {
22939 # Count commas and look for non-list characters. Once we see a
22940 # non-list character, we give up and don't look for any more
22942 if ( $type eq '=>' ) {
22943 $lp_arrow_count{$total_depth}++;
22945 # remember '=>' like '=' for estimating breaks (but see
22946 # above note for b1035)
22947 $last_lp_equals{$total_depth} = $ii;
22950 elsif ( $type eq ',' ) {
22951 $lp_comma_count{$total_depth}++;
22954 elsif ( $is_assignment{$type} ) {
22955 $last_lp_equals{$total_depth} = $ii;
22958 # this token might start a new line if ..
22960 $ii > $ii_begin_line
22964 # this is the first nonblank token of the line
22965 $ii == 1 && $types_to_go[0] eq 'b'
22967 # or previous character was one of these:
22969 || $hash_test2{$last_nonblank_type}
22971 # or previous character was opening and this is not
22973 || ( $last_nonblank_type eq '{' && $type ne '}' )
22974 || ( $last_nonblank_type eq '(' and $type ne ')' )
22976 # or this token is one of these:
22977 # /^([\.]|\|\||\&\&)$/
22978 || $hash_test3{$type}
22980 # or this is a closing structure
22981 || ( $last_nonblank_type eq '}'
22982 && $last_nonblank_token eq $last_nonblank_type )
22984 # or previous token was keyword 'return'
22986 $last_nonblank_type eq 'k'
22987 && ( $last_nonblank_token eq 'return'
22991 # or starting a new line at certain keywords is fine
22993 && $is_if_unless_and_or_last_next_redo_return{
22996 # or this is after an assignment after a closing
22999 $is_assignment{$last_nonblank_type}
23002 $hash_test1{$last_last_nonblank_type}
23004 # and it is significantly to the right
23005 || $lp_position_predictor > (
23006 $maximum_line_length_at_level[$level] -
23007 $rOpts_maximum_line_length / 2
23014 check_for_long_gnu_style_lines($ii);
23015 $ii_begin_line = $ii;
23017 # back up 1 token if we want to break before that type
23018 # otherwise, we may strand tokens like '?' or ':' on a line
23019 if ( $ii_begin_line > 0 ) {
23021 $last_nonblank_type eq 'k'
23022 ? $want_break_before{$last_nonblank_token}
23023 : $want_break_before{$last_nonblank_type};
23024 $ii_begin_line-- if ($wbb);
23028 $K_last_nonblank = $K_to_go[$ii];
23029 $last_last_nonblank_type = $last_nonblank_type;
23030 $last_nonblank_type = $type;
23031 $last_nonblank_token = $token;
23033 } ## end if ( $type ne 'b' )
23035 # remember the predicted position of this token on the output line
23036 if ( $ii > $ii_begin_line ) {
23038 ## NOTE: this is a critical loop - the following call has been
23039 ## expanded for about 2x speedup:
23040 ## $lp_position_predictor =
23041 ## total_line_length( $ii_begin_line, $ii );
23043 my $indentation = $leading_spaces_to_go[$ii_begin_line];
23044 if ( ref($indentation) ) {
23045 $indentation = $indentation->get_spaces();
23047 $lp_position_predictor =
23049 $summed_lengths_to_go[ $ii + 1 ] -
23050 $summed_lengths_to_go[$ii_begin_line];
23053 $lp_position_predictor =
23054 $space_count + $token_lengths_to_go[$ii];
23057 # Store the indentation object for this token.
23058 # This allows us to manipulate the leading whitespace
23059 # (in case we have to reduce indentation to fit a line) without
23060 # having to change any token values.
23062 #---------------------------------------------------------------
23063 # replace leading whitespace with indentation objects where used
23064 #---------------------------------------------------------------
23065 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
23066 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
23067 $leading_spaces_to_go[$ii] = $lp_object;
23068 if ( $max_lp_stack > 0
23070 && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
23072 $reduced_spaces_to_go[$ii] =
23073 $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
23076 $reduced_spaces_to_go[$ii] = $lp_object;
23079 } ## end loop over all tokens in this batch
23081 undo_incomplete_lp_indentation()
23082 if ( !$rOpts_extended_line_up_parentheses );
23085 } ## end sub set_lp_indentation
23087 sub lp_equals_break_check {
23089 my ( $self, $ii, $ii_last_equals ) = @_;
23091 # If we come to an opening token after an '=' token of some
23092 # type, see if it would be helpful to 'break' after the '=' to
23096 # $ii = index of an opening token in the output batch
23097 # $ii_begin_line = index of token starting next output line
23099 # $lp_position_predictor - updated position predictor
23100 # $ii_begin_line = updated starting token index
23102 # Skip an empty set of parens, such as after channel():
23103 # my $exchange = $self->_channel()->exchange(
23104 # This fixes issues b1318 b1322 b1323 b1328
23105 my $is_empty_container;
23106 if ( $ii_last_equals && $ii < $max_index_to_go ) {
23107 my $seqno = $type_sequence_to_go[$ii];
23108 my $inext_nb = $ii + 1;
23110 if ( $types_to_go[$inext_nb] eq 'b' );
23111 my $seqno_nb = $type_sequence_to_go[$inext_nb];
23112 $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno;
23115 if ( $ii_last_equals
23116 && $ii_last_equals > $ii_begin_line
23117 && !$is_empty_container )
23120 my $seqno = $type_sequence_to_go[$ii];
23122 # find the position if we break at the '='
23123 my $i_test = $ii_last_equals;
23125 # Fix for issue b1229, check if want break before this token
23126 # Fix for issue b1356, if i_test is a blank, the leading spaces may
23127 # be incorrect (if it was an interline blank).
23128 # Fix for issue b1357 .. b1370, i_test must be prev nonblank
23129 # ( the ci value for blanks can vary )
23130 # See also case b223
23131 # Fix for issue b1371-b1374 : all of these and the above are fixed
23132 # by simply backing up one index and setting the leading spaces of
23133 # a blank equal to that of the equals.
23134 if ( $want_break_before{ $types_to_go[$i_test] } ) {
23136 $leading_spaces_to_go[$i_test] =
23137 $leading_spaces_to_go[$ii_last_equals]
23138 if ( $types_to_go[$i_test] eq 'b' );
23140 elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
23142 my $test_position = total_line_length( $i_test, $ii );
23143 my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
23145 #------------------------------------------------------
23146 # Break if structure will reach the maximum line length
23147 #------------------------------------------------------
23149 # Historically, -lp just used one-half line length here
23150 my $len_increase = $rOpts_maximum_line_length / 2;
23152 # For -xlp, we can also use the pre-computed lengths
23153 my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno};
23154 if ( $min_len && $min_len > $len_increase ) {
23155 $len_increase = $min_len;
23160 # if we might exceed the maximum line length
23161 $lp_position_predictor + $len_increase > $mll
23163 # if a -bbx flag WANTS a break before this opening token
23165 && $self->[_rbreak_before_container_by_seqno_]->{$seqno} )
23167 # or we are beyond the 1/4 point and there was an old
23168 # break at an assignment (not '=>') [fix for b1035]
23170 $lp_position_predictor >
23171 $mll - $rOpts_maximum_line_length * 3 / 4
23172 && $types_to_go[$ii_last_equals] ne '=>'
23174 $old_breakpoint_to_go[$ii_last_equals]
23175 || ( $ii_last_equals > 0
23176 && $old_breakpoint_to_go[ $ii_last_equals - 1 ] )
23177 || ( $ii_last_equals > 1
23178 && $types_to_go[ $ii_last_equals - 1 ] eq 'b'
23179 && $old_breakpoint_to_go[ $ii_last_equals - 2 ] )
23185 # then make the switch -- note that we do not set a
23186 # real breakpoint here because we may not really need
23187 # one; sub break_lists will do that if necessary.
23189 my $Kc = $self->[_K_closing_container_]->{$seqno};
23192 # For -lp, only if the closing token is in this
23193 # batch (c117). Otherwise it cannot be done by sub
23195 defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
23197 # For -xlp, we only need one nonblank token after
23198 # the opening token.
23199 || $rOpts_extended_line_up_parentheses
23202 $ii_begin_line = $i_test + 1;
23203 $lp_position_predictor = $test_position;
23205 #--------------------------------------------------
23206 # Fix for an opening container terminating a batch:
23207 #--------------------------------------------------
23208 # To get alignment of a -lp container with its
23209 # contents, we have to put a break after $i_test.
23210 # For $ii<$max_index_to_go, this will be done by
23211 # sub break_lists based on the indentation object.
23212 # But for $ii=$max_index_to_go, the indentation
23213 # object for this seqno will not be created until
23214 # the next batch, so we have to set a break at
23215 # $i_test right now in order to get one.
23216 if ( $ii == $max_index_to_go
23217 && !$block_type_to_go[$ii]
23218 && $types_to_go[$ii] eq '{'
23220 && !$self->[_ris_excluded_lp_container_]->{$seqno} )
23222 $self->set_forced_lp_break( $ii_begin_line, $ii );
23228 } ## end sub lp_equals_break_check
23230 sub lp_decreasing_depth {
23231 my ( $self, $ii ) = @_;
23233 my $rLL = $self->[_rLL_];
23235 my $level = $levels_to_go[$ii];
23236 my $ci_level = $ci_levels_to_go[$ii];
23238 # loop to find the first entry at or completely below this level
23241 # Be sure we have not hit the stack bottom - should never
23242 # happen because only negative levels can get here, and
23243 # $level was forced to be positive above.
23244 if ( !$max_lp_stack ) {
23246 # non-fatal, just keep going except in DEVEL_MODE
23249 program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
23255 # save index of token which closes this level
23256 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
23257 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
23259 $lp_object->set_closed($ii);
23261 my $comma_count = 0;
23262 my $arrow_count = 0;
23263 my $type = $types_to_go[$ii];
23264 if ( $type eq '}' || $type eq ')' ) {
23265 my $total_depth = $nesting_depth_to_go[$ii];
23266 $comma_count = $lp_comma_count{$total_depth};
23267 $arrow_count = $lp_arrow_count{$total_depth};
23268 $comma_count = 0 unless $comma_count;
23269 $arrow_count = 0 unless $arrow_count;
23272 $lp_object->set_comma_count($comma_count);
23273 $lp_object->set_arrow_count($arrow_count);
23275 # Undo any extra indentation if we saw no commas
23276 my $available_spaces = $lp_object->get_available_spaces();
23277 my $K_start = $lp_object->get_K_begin_line();
23279 if ( $available_spaces > 0
23280 && $K_start >= $K_to_go[0]
23281 && ( $comma_count <= 0 || $arrow_count > 0 ) )
23284 my $i = $lp_object->get_lp_item_index();
23286 # Safety check for a valid stack index. It
23287 # should be ok because we just checked that the
23288 # index K of the token associated with this
23289 # indentation is in this batch.
23290 if ( $i < 0 || $i > $max_lp_object_list ) {
23291 my $KK = $K_to_go[$ii];
23292 my $lno = $rLL->[$KK]->[_LINE_INDEX_];
23293 DEVEL_MODE && Fault(<<EOM);
23294 Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
23299 if ( $arrow_count == 0 ) {
23300 $rlp_object_list->[$i]
23301 ->permanently_decrease_available_spaces(
23302 $available_spaces);
23305 $rlp_object_list->[$i]
23306 ->tentatively_decrease_available_spaces(
23307 $available_spaces);
23309 foreach my $j ( $i + 1 .. $max_lp_object_list ) {
23310 $rlp_object_list->[$j]
23311 ->decrease_SPACES($available_spaces);
23316 # go down one level
23319 my $rLP_top = $rLP->[$max_lp_stack];
23320 my $ci_lev = $rLP_top->[_lp_ci_level_];
23321 my $lev = $rLP_top->[_lp_level_];
23322 my $spaces = $rLP_top->[_lp_space_count_];
23323 if ( $rLP_top->[_lp_object_] ) {
23324 my $lp_obj = $rLP_top->[_lp_object_];
23325 ( $spaces, $lev, $ci_lev ) =
23326 @{ $lp_obj->get_spaces_level_ci() };
23329 # stop when we reach a level at or below the current
23331 if ( $lev <= $level && $ci_lev <= $ci_level ) {
23332 $space_count = $spaces;
23333 $current_level = $lev;
23334 $current_ci_level = $ci_lev;
23339 } ## end sub lp_decreasing_depth
23341 sub lp_increasing_depth {
23342 my ( $self, $ii ) = @_;
23344 my $rLL = $self->[_rLL_];
23346 my $type = $types_to_go[$ii];
23347 my $level = $levels_to_go[$ii];
23348 my $ci_level = $ci_levels_to_go[$ii];
23350 $stack_changed = 1;
23352 # Compute the standard incremental whitespace. This will be
23353 # the minimum incremental whitespace that will be used. This
23354 # choice results in a smooth transition between the gnu-style
23355 # and the standard style.
23356 my $standard_increment =
23357 ( $level - $current_level ) * $rOpts_indent_columns +
23358 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
23360 # Now we have to define how much extra incremental space
23361 # ("$available_space") we want. This extra space will be
23362 # reduced as necessary when long lines are encountered or when
23363 # it becomes clear that we do not have a good list.
23364 my $available_spaces = 0;
23365 my $align_seqno = 0;
23368 my $last_nonblank_seqno;
23369 my $last_nonblank_block_type;
23370 if ( defined($K_last_nonblank) ) {
23371 $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
23372 $last_nonblank_block_type =
23373 $last_nonblank_seqno
23374 ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno}
23378 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
23380 #-----------------------------------------------
23381 # Initialize indentation spaces on empty stack..
23382 #-----------------------------------------------
23383 if ( $max_lp_stack == 0 ) {
23384 $space_count = $level * $rOpts_indent_columns;
23387 #----------------------------------------
23388 # Add the standard space increment if ...
23389 #----------------------------------------
23392 # if this is a BLOCK, add the standard increment
23393 $last_nonblank_block_type
23395 # or if this is not a sequenced item
23396 || !$last_nonblank_seqno
23398 # or this container is excluded by user rules
23399 # or contains here-docs or multiline qw text
23400 || defined($last_nonblank_seqno)
23401 && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno}
23403 # or if last nonblank token was not structural indentation
23404 || $last_nonblank_type ne '{'
23406 # and do not start -lp under stress .. fixes b1244, b1255
23407 || !$in_lp_mode && $level >= $high_stress_level
23412 # If we have entered lp mode, use the top lp object to get
23413 # the current indentation spaces because it may have
23414 # changed. Fixes b1285, b1286.
23416 $space_count = $in_lp_mode->get_spaces();
23418 $space_count += $standard_increment;
23421 #---------------------------------------------------------------
23422 # -lp mode: try to use space to the first non-blank level change
23423 #---------------------------------------------------------------
23426 # see how much space we have available
23427 my $test_space_count = $lp_position_predictor;
23430 $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno};
23431 my $next_opening_too_far;
23433 if ( defined($min_len) ) {
23435 $test_space_count +
23437 $maximum_line_length_at_level[$level];
23438 if ( $excess > 0 ) {
23439 $test_space_count -= $excess;
23441 # will the next opening token be a long way out?
23442 $next_opening_too_far =
23443 $lp_position_predictor + $excess >
23444 $maximum_line_length_at_level[$level];
23448 my $rLP_top = $rLP->[$max_lp_stack];
23449 my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
23450 if ( $rLP_top->[_lp_object_] ) {
23451 $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces();
23453 $available_spaces = $test_space_count - $min_gnu_indentation;
23455 # Do not startup -lp indentation mode if no space ...
23456 # ... or if it puts the opening far to the right
23458 && ( $available_spaces <= 0 || $next_opening_too_far ) )
23460 $space_count += $standard_increment;
23461 $available_spaces = 0;
23466 $space_count = $test_space_count;
23469 if ( $available_spaces >= $standard_increment ) {
23470 $min_gnu_indentation += $standard_increment;
23472 elsif ( $available_spaces > 1 ) {
23473 $min_gnu_indentation += $available_spaces + 1;
23475 # The "+1" space can cause mis-alignment if there is no
23476 # blank space between the opening paren and the next
23477 # nonblank token (i.e., -pt=2) and the container does not
23478 # get broken open. So we will mark this token for later
23479 # space removal by sub 'xlp_tweak' if this container
23480 # remains intact (issue git #106).
23484 # Skip if the maximum line length is exceeded here
23487 # This is only for level changes, not ci level changes.
23488 # But note: this test is here out of caution but I have
23489 # not found a case where it is actually necessary.
23490 && $is_opening_token{$last_nonblank_token}
23492 # Be sure we are at consecutive nonblanks. This test
23493 # should be true, but it guards against future coding
23494 # changes to level values assigned to blank spaces.
23496 && $types_to_go[ $ii - 1 ] ne 'b'
23500 $K_extra_space = $K_to_go[$ii];
23503 elsif ( $is_opening_token{$last_nonblank_token} ) {
23504 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
23505 $min_gnu_indentation += 2;
23508 $min_gnu_indentation += 1;
23512 $min_gnu_indentation += $standard_increment;
23514 $available_spaces = $space_count - $min_gnu_indentation;
23516 if ( $available_spaces < 0 ) {
23517 $space_count = $min_gnu_indentation;
23518 $available_spaces = 0;
23520 $align_seqno = $last_nonblank_seqno;
23524 #-------------------------------------------
23525 # update the state, but not on a blank token
23526 #-------------------------------------------
23527 if ( $type ne 'b' ) {
23529 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
23530 $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
23534 #----------------------------------------
23535 # Create indentation object if in lp-mode
23536 #----------------------------------------
23541 # A negative level implies not to store the item in the
23543 my $lp_item_index = 0;
23544 if ( $level >= 0 ) {
23545 $lp_item_index = ++$max_lp_object_list;
23548 my $K_begin_line = 0;
23549 if ( $ii_begin_line >= 0
23550 && $ii_begin_line <= $max_index_to_go )
23552 $K_begin_line = $K_to_go[$ii_begin_line];
23555 # Minor Fix: when creating indentation at a side
23556 # comment we don't know what the space to the actual
23557 # next code token will be. We will allow a space for
23558 # sub correct_lp to move it in if necessary.
23560 && $max_index_to_go > 0
23563 $available_spaces += 1;
23566 my $standard_spaces = $leading_spaces_to_go[$ii];
23567 $lp_object = Perl::Tidy::IndentationItem->new(
23568 spaces => $space_count,
23570 ci_level => $ci_level,
23571 available_spaces => $available_spaces,
23572 lp_item_index => $lp_item_index,
23573 align_seqno => $align_seqno,
23574 stack_depth => $max_lp_stack,
23575 K_begin_line => $K_begin_line,
23576 standard_spaces => $standard_spaces,
23577 K_extra_space => $K_extra_space,
23581 my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
23582 my $token = $tokens_to_go[$ii];
23583 print STDERR <<EOM;
23584 DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
23588 if ( $level >= 0 ) {
23589 $rlp_object_list->[$max_lp_object_list] = $lp_object;
23592 if ( $is_opening_token{$last_nonblank_token}
23593 && $last_nonblank_seqno )
23595 $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} =
23600 #------------------------------------
23601 # Store this indentation on the stack
23602 #------------------------------------
23603 $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
23604 $rLP->[$max_lp_stack]->[_lp_level_] = $level;
23605 $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
23606 $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
23607 $last_nonblank_seqno;
23608 $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
23610 # If the opening paren is beyond the half-line length, then
23611 # we will use the minimum (standard) indentation. This will
23612 # help avoid problems associated with running out of space
23613 # near the end of a line. As a result, in deeply nested
23614 # lists, there will be some indentations which are limited
23615 # to this minimum standard indentation. But the most deeply
23616 # nested container will still probably be able to shift its
23617 # parameters to the right for proper alignment, so in most
23618 # cases this will not be noticeable.
23619 if ( $available_spaces > 0 && $lp_object ) {
23621 $maximum_line_length_at_level[$level] -
23622 $rOpts_maximum_line_length / 2;
23623 $lp_object->tentatively_decrease_available_spaces(
23625 if ( $space_count > $halfway );
23629 } ## end sub lp_increasing_depth
23631 sub check_for_long_gnu_style_lines {
23633 # look at the current estimated maximum line length, and
23634 # remove some whitespace if it exceeds the desired maximum
23635 my ($mx_index_to_go) = @_;
23637 # nothing can be done if no stack items defined for this line
23638 return if ( $max_lp_object_list < 0 );
23640 # see if we have exceeded the maximum desired line length
23641 # keep 2 extra free because they are needed in some cases
23642 # (result of trial-and-error testing)
23643 my $spaces_needed =
23644 $lp_position_predictor -
23645 $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
23647 return if ( $spaces_needed <= 0 );
23649 # We are over the limit, so try to remove a requested number of
23650 # spaces from leading whitespace. We are only allowed to remove
23651 # from whitespace items created on this batch, since others have
23652 # already been used and cannot be undone.
23653 my @candidates = ();
23655 # loop over all whitespace items created for the current batch
23656 foreach my $i ( 0 .. $max_lp_object_list ) {
23657 my $item = $rlp_object_list->[$i];
23659 # item must still be open to be a candidate (otherwise it
23660 # cannot influence the current token)
23661 next if ( $item->get_closed() >= 0 );
23663 my $available_spaces = $item->get_available_spaces();
23665 if ( $available_spaces > 0 ) {
23666 push( @candidates, [ $i, $available_spaces ] );
23670 return unless (@candidates);
23672 # sort by available whitespace so that we can remove whitespace
23673 # from the maximum available first.
23675 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates;
23677 # keep removing whitespace until we are done or have no more
23678 foreach my $candidate (@candidates) {
23679 my ( $i, $available_spaces ) = @{$candidate};
23680 my $deleted_spaces =
23681 ( $available_spaces > $spaces_needed )
23683 : $available_spaces;
23685 # remove the incremental space from this item
23686 $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
23690 # update the leading whitespace of this item and all items
23691 # that came after it
23693 while ( ++$i <= $max_lp_object_list ) {
23695 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
23696 if ( $old_spaces >= $deleted_spaces ) {
23697 $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
23700 # shouldn't happen except for code bug:
23702 # non-fatal, keep going except in DEVEL_MODE
23704 my $level = $rlp_object_list->[$i_debug]->get_level();
23706 $rlp_object_list->[$i_debug]->get_ci_level();
23707 my $old_level = $rlp_object_list->[$i]->get_level();
23709 $rlp_object_list->[$i]->get_ci_level();
23711 program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
23716 $lp_position_predictor -= $deleted_spaces;
23717 $spaces_needed -= $deleted_spaces;
23718 last unless ( $spaces_needed > 0 );
23721 } ## end sub check_for_long_gnu_style_lines
23723 sub undo_incomplete_lp_indentation {
23725 #------------------------------------------------------------------
23726 # Undo indentation for all incomplete -lp indentation levels of the
23727 # current batch unless -xlp is set.
23728 #------------------------------------------------------------------
23730 # This routine is called once after each output stream batch is
23731 # finished to undo indentation for all incomplete -lp indentation
23732 # levels. If this routine is called then comments and blank lines will
23733 # disrupt this indentation style. In older versions of perltidy this
23734 # was always done because it could cause problems otherwise, but recent
23735 # improvements allow fairly good results to be obtained by skipping
23736 # this step with the -xlp flag.
23738 # nothing to do if no stack items defined for this line
23739 return if ( $max_lp_object_list < 0 );
23741 # loop over all whitespace items created for the current batch
23742 foreach my $i ( 0 .. $max_lp_object_list ) {
23743 my $item = $rlp_object_list->[$i];
23745 # only look for open items
23746 next if ( $item->get_closed() >= 0 );
23748 # Tentatively remove all of the available space
23749 # (The vertical aligner will try to get it back later)
23750 my $available_spaces = $item->get_available_spaces();
23751 if ( $available_spaces > 0 ) {
23753 # delete incremental space for this item
23754 $rlp_object_list->[$i]
23755 ->tentatively_decrease_available_spaces($available_spaces);
23757 # Reduce the total indentation space of any nodes that follow
23758 # Note that any such nodes must necessarily be dependents
23760 foreach ( $i + 1 .. $max_lp_object_list ) {
23761 $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
23766 } ## end sub undo_incomplete_lp_indentation
23767 } ## end closure set_lp_indentation
23769 #----------------------------------------------------------------------
23770 # sub to set a requested break before an opening container in -lp mode.
23771 #----------------------------------------------------------------------
23772 sub set_forced_lp_break {
23774 my ( $self, $i_begin_line, $i_opening ) = @_;
23777 # $i_begin_line = index of break in the _to_go arrays
23778 # $i_opening = index of the opening container
23780 # Set any requested break at a token before this opening container
23781 # token. This is often an '=' or '=>' but can also be things like
23782 # '.', ',', 'return'. It was defined by sub set_lp_indentation.
23785 # For intact containers, call this at the closing token.
23786 # For broken containers, call this at the opening token.
23787 # This will avoid needless breaks when it turns out that the
23788 # container does not actually get broken. This isn't known until
23789 # the closing container for intact blocks.
23792 if ( $i_begin_line < 0
23793 || $i_begin_line > $max_index_to_go );
23795 # Handle request to put a break break immediately before this token.
23796 # We may not want to do that since we are also breaking after it.
23797 if ( $i_begin_line == $i_opening ) {
23799 # The following rules should be reviewed. We may want to always
23800 # allow the break. If we do not do the break, the indentation
23803 # RULE: don't break before it unless it is welded to a qw.
23804 # This works well, but we may want to relax this to allow
23805 # breaks in additional cases.
23807 if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
23808 return unless ( $types_to_go[$max_index_to_go] eq 'q' );
23811 # Only break for breakpoints at the same
23812 # indentation level as the opening paren
23813 my $test1 = $nesting_depth_to_go[$i_opening];
23814 my $test2 = $nesting_depth_to_go[$i_begin_line];
23815 return if ( $test2 != $test1 );
23817 # Back up at a blank (fixes case b932)
23818 my $ibr = $i_begin_line - 1;
23820 && $types_to_go[$ibr] eq 'b' )
23825 my $i_nonblank = $self->set_forced_breakpoint($ibr);
23827 # Crude patch to prevent sub recombine_breakpoints from undoing
23828 # this break, especially after an '='. It will leave old
23829 # breakpoints alone. See c098/x045 for some examples.
23830 if ( defined($i_nonblank) ) {
23831 $old_breakpoint_to_go[$i_nonblank] = 1;
23835 } ## end sub set_forced_lp_break
23837 sub reduce_lp_indentation {
23839 # reduce the leading whitespace at token $i if possible by $spaces_needed
23840 # (a large value of $spaces_needed will remove all excess space)
23841 # NOTE: to be called from break_lists only for a sequence of tokens
23842 # contained between opening and closing parens/braces/brackets
23844 my ( $self, $i, $spaces_wanted ) = @_;
23845 my $deleted_spaces = 0;
23847 my $item = $leading_spaces_to_go[$i];
23848 my $available_spaces = $item->get_available_spaces();
23851 $available_spaces > 0
23852 && ( ( $spaces_wanted <= $available_spaces )
23853 || !$item->get_have_child() )
23857 # we'll remove these spaces, but mark them as recoverable
23859 $item->tentatively_decrease_available_spaces($spaces_wanted);
23862 return $deleted_spaces;
23863 } ## end sub reduce_lp_indentation
23865 ###########################################################
23866 # CODE SECTION 13: Preparing batches for vertical alignment
23867 ###########################################################
23869 sub check_convey_batch_input {
23871 # Check for valid input to sub convey_batch_to_vertical_aligner. An
23872 # error here would most likely be due to an error in the calling
23873 # routine 'sub grind_batch_of_CODE'.
23874 my ( $self, $ri_first, $ri_last ) = @_;
23876 if ( !defined($ri_first) || !defined($ri_last) ) {
23878 Undefined line ranges ri_first and/r ri_last
23882 my $nmax = @{$ri_first} - 1;
23883 my $nmax_check = @{$ri_last} - 1;
23884 if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
23886 Line range index error: nmax=$nmax but nmax_check=$nmax_check
23887 These should be equal and >=0
23890 my ( $ibeg, $iend );
23891 foreach my $n ( 0 .. $nmax ) {
23892 my $ibeg_m = $ibeg;
23893 my $iend_m = $iend;
23894 $ibeg = $ri_first->[$n];
23895 $iend = $ri_last->[$n];
23896 if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
23898 Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
23899 These should have iend >= ibeg and be in the range (0..$max_index_to_go)
23902 next if ( $n == 0 );
23903 if ( $ibeg <= $iend_m ) {
23905 Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
23910 } ## end sub check_convey_batch_input
23912 sub convey_batch_to_vertical_aligner {
23916 # This routine receives a batch of code for which the final line breaks
23917 # have been defined. Here we prepare the lines for passing to the vertical
23918 # aligner. We do the following tasks:
23919 # - mark certain vertical alignment tokens, such as '=', in each line
23920 # - make final indentation adjustments
23921 # - do logical padding: insert extra blank spaces to help display certain
23922 # logical constructions
23923 # - send the line to the vertical aligner
23925 my $rLL = $self->[_rLL_];
23926 my $Klimit = $self->[_Klimit_];
23927 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
23928 my $this_batch = $self->[_this_batch_];
23930 my $do_not_pad = $this_batch->[_do_not_pad_];
23931 my $starting_in_quote = $this_batch->[_starting_in_quote_];
23932 my $ending_in_quote = $this_batch->[_ending_in_quote_];
23933 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
23934 my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
23935 my $ri_first = $this_batch->[_ri_first_];
23936 my $ri_last = $this_batch->[_ri_last_];
23938 $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
23940 my $n_last_line = @{$ri_first} - 1;
23942 my $ibeg_next = $ri_first->[0];
23943 my $iend_next = $ri_last->[0];
23945 my $type_beg_next = $types_to_go[$ibeg_next];
23946 my $type_end_next = $types_to_go[$iend_next];
23947 my $token_beg_next = $tokens_to_go[$ibeg_next];
23949 my $rindentation_list = [0]; # ref to indentations for each line
23950 my ( $cscw_block_comment, $closing_side_comment, $is_block_comment );
23952 if ( !$max_index_to_go && $type_beg_next eq '#' ) {
23953 $is_block_comment = 1;
23956 if ($rOpts_closing_side_comments) {
23957 ( $closing_side_comment, $cscw_block_comment ) =
23958 $self->add_closing_side_comment( $ri_first, $ri_last );
23961 if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) {
23962 $self->undo_ci( $ri_first, $ri_last,
23963 $this_batch->[_rix_seqno_controlling_ci_] );
23966 # for multi-line batches ...
23967 if ( $n_last_line > 0 ) {
23969 # flush before a long if statement to avoid unwanted alignment
23970 $self->flush_vertical_aligner()
23971 if ( $type_beg_next eq 'k'
23972 && $is_if_unless{$token_beg_next} );
23974 $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote )
23975 if ($rOpts_logical_padding);
23977 $self->xlp_tweak( $ri_first, $ri_last )
23978 if ($rOpts_extended_line_up_parentheses);
23981 if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
23983 # ----------------------------------------------------------
23984 # define the vertical alignments for all lines of this batch
23985 # ----------------------------------------------------------
23986 my $rline_alignments =
23987 $self->make_vertical_alignments( $ri_first, $ri_last );
23989 # ----------------------------------------------
23990 # loop to send each line to the vertical aligner
23991 # ----------------------------------------------
23992 my ( $type_beg, $type_end, $token_beg, $ljump );
23994 for my $n ( 0 .. $n_last_line ) {
23996 # ----------------------------------------------------------------
23997 # This hash will hold the args for vertical alignment of this line
23998 # We will populate it as we go.
23999 # ----------------------------------------------------------------
24000 my $rvao_args = {};
24002 my $type_beg_last = $type_beg;
24003 my $type_end_last = $type_end;
24005 my $ibeg = $ibeg_next;
24006 my $iend = $iend_next;
24007 my $Kbeg = $K_to_go[$ibeg];
24008 my $Kend = $K_to_go[$iend];
24010 $type_beg = $type_beg_next;
24011 $type_end = $type_end_next;
24012 $token_beg = $token_beg_next;
24014 # ---------------------------------------------------
24015 # Define the check value 'Kend' to send for this line
24016 # ---------------------------------------------------
24017 # The 'Kend' value is an integer for checking that lines come out of
24018 # the far end of the pipeline in the right order. It increases
24019 # linearly along the token stream. But we only send ending K values of
24020 # non-comments down the pipeline. This is equivalent to checking that
24021 # the last CODE_type is blank or equal to 'VER'. See also sub
24022 # resync_lines_and_tokens for related coding. Note that
24023 # '$batch_CODE_type' is the code type of the line to which the ending
24026 $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
24028 # Get some vars on line [n+1], if any,
24029 # and define $ljump = level jump needed by 'sub get_final_indentation'
24030 if ( $n < $n_last_line ) {
24031 $ibeg_next = $ri_first->[ $n + 1 ];
24032 $iend_next = $ri_last->[ $n + 1 ];
24034 $type_beg_next = $types_to_go[$ibeg_next];
24035 $type_end_next = $types_to_go[$iend_next];
24036 $token_beg_next = $tokens_to_go[$ibeg_next];
24038 my $Kbeg_next = $K_to_go[$ibeg_next];
24039 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
24041 elsif ( !$is_block_comment && $Kend < $Klimit ) {
24043 # Patch for git #51, a bare closing qw paren was not outdented
24044 # if the flag '-nodelete-old-newlines is set
24045 # Note that we are just looking ahead for the next nonblank
24046 # character. We could scan past an arbitrary number of block
24047 # comments or hanging side comments by calling K_next_code, but it
24048 # could add significant run time with very little to be gained.
24049 my $Kbeg_next = $Kend + 1;
24050 if ( $Kbeg_next < $Klimit
24051 && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
24056 $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
24062 # ---------------------------------------------
24063 # get the vertical alignment info for this line
24064 # ---------------------------------------------
24066 # The lines are broken into fields which can be spaced by the vertical
24067 # to achieve vertical alignment. These fields are the actual text
24068 # which will be output, so from here on no more changes can be made to
24070 my $rline_alignment = $rline_alignments->[$n];
24071 my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
24072 @{$rline_alignment};
24074 # Programming check: (shouldn't happen)
24075 # The number of tokens which separate the fields must always be
24076 # one less than the number of fields. If this is not true then
24077 # an error has been introduced in sub make_alignment_patterns.
24079 if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
24080 my $nt = @{$rtokens};
24081 my $nf = @{$rfields};
24083 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
24084 The number of tokens = $nt should be one less than number of fields: $nf
24090 # --------------------------------------
24091 # get the final indentation of this line
24092 # --------------------------------------
24099 $is_outdented_line,
24101 ) = $self->get_final_indentation(
24109 $rindentation_list,
24111 $starting_in_quote,
24112 $is_static_block_comment,
24116 # --------------------------------
24117 # define flag 'outdent_long_lines'
24118 # --------------------------------
24120 # we will allow outdenting of long lines..
24121 # which are long quotes, if allowed
24122 ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
24124 # which are long block comments, if allowed
24127 && $rOpts_outdent_long_comments
24129 # but not if this is a static block comment
24130 && !$is_static_block_comment
24134 $rvao_args->{outdent_long_lines} = 1;
24136 # convert -lp indentation objects to spaces to allow outdenting
24137 if ( ref($indentation) ) {
24138 $indentation = $indentation->get_spaces();
24142 # --------------------------------------------------
24143 # define flags 'break_alignment_before' and '_after'
24144 # --------------------------------------------------
24146 # These flags tell the vertical aligner to stop alignment before or
24148 if ($is_outdented_line) {
24149 $rvao_args->{break_alignment_before} = 1;
24150 $rvao_args->{break_alignment_after} = 1;
24152 elsif ($do_not_pad) {
24153 $rvao_args->{break_alignment_before} = 1;
24156 # flush at an 'if' which follows a line with (1) terminal semicolon
24157 # or (2) terminal block_type which is not an 'if'. This prevents
24158 # unwanted alignment between the lines.
24159 elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
24164 my $Km = $Kbeg - 1;
24165 $type_m = $rLL->[$Km]->[_TYPE_];
24166 if ( $type_m eq 'b' && $Km > 0 ) {
24168 $type_m = $rLL->[$Km]->[_TYPE_];
24170 if ( $type_m eq '#' && $Km > 0 ) {
24172 $type_m = $rLL->[$Km]->[_TYPE_];
24173 if ( $type_m eq 'b' && $Km > 0 ) {
24175 $type_m = $rLL->[$Km]->[_TYPE_];
24179 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
24181 $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m};
24185 # break after anything that is not if-like
24188 || ( $type_m eq '}'
24190 && $block_type_m ne 'if'
24191 && $block_type_m ne 'unless'
24192 && $block_type_m ne 'elsif'
24193 && $block_type_m ne 'else' )
24196 $rvao_args->{break_alignment_before} = 1;
24200 # ----------------------------------
24201 # define 'rvertical_tightness_flags'
24202 # ----------------------------------
24203 # These flags tell the vertical aligner if/when to combine consecutive
24204 # lines, based on the user input parameters.
24205 $rvao_args->{rvertical_tightness_flags} =
24206 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
24207 $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
24208 unless ( $is_block_comment
24209 || $self->[_no_vertical_tightness_flags_] );
24211 # ----------------------------------
24212 # define 'is_terminal_ternary' flag
24213 # ----------------------------------
24215 # This flag is set at the final ':' of a ternary chain to request
24216 # vertical alignment of the final term. Here is a slightly complex
24219 # $self->{_text} = (
24221 # : $type eq 'item' ? "the $section entry"
24222 # : "the section on $section"
24226 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
24227 # : ' elsewhere in this document'
24230 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
24232 my $is_terminal_ternary = 0;
24233 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
24234 my $terminal_type = $types_to_go[$i_terminal];
24235 if ( $terminal_type ne ';'
24236 && $n_last_line > $n
24237 && $level_end == $lev )
24239 my $Kbeg_next = $K_to_go[$ibeg_next];
24240 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
24241 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
24244 $last_leading_type eq ':'
24245 && ( ( $terminal_type eq ';' && $level_end <= $lev )
24246 || ( $terminal_type ne ':' && $level_end < $lev ) )
24250 # the terminal term must not contain any ternary terms, as in
24252 # $Is_MSWin32 ? ".\\echo$$"
24253 # : $Is_MacOS ? ":echo$$"
24254 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
24256 $is_terminal_ternary = 1;
24258 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
24259 while ( defined($KP) && $KP <= $Kend ) {
24260 my $type_KP = $rLL->[$KP]->[_TYPE_];
24261 if ( $type_KP eq '?' || $type_KP eq ':' ) {
24262 $is_terminal_ternary = 0;
24265 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
24268 $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
24271 # -------------------------------------------------
24272 # add any new closing side comment to the last line
24273 # -------------------------------------------------
24274 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
24276 $rfields->[-1] .= " $closing_side_comment";
24278 # NOTE: Patch for csc. We can just use 1 for the length of the csc
24279 # because its length should not be a limiting factor from here on.
24280 $rfield_lengths->[-1] += 2;
24284 [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
24287 # ------------------------
24288 # define flag 'list_seqno'
24289 # ------------------------
24291 # This flag indicates if this line is contained in a multi-line list
24292 if ( !$is_block_comment ) {
24293 my $parent_seqno = $parent_seqno_to_go[$ibeg];
24294 $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
24297 # The alignment tokens have been marked with nesting_depths, so we need
24298 # to pass nesting depths to the vertical aligner. They remain invariant
24299 # under all formatting operations. Previously, level values were sent
24300 # to the aligner. But they can be altered in welding and other
24301 # operations, and this can lead to alignment errors.
24302 my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
24303 my $nesting_depth_end = $nesting_depth_to_go[$iend];
24305 # A quirk in the definition of nesting depths is that the closing token
24306 # has the same depth as internal tokens. The vertical aligner is
24307 # programmed to expect them to have the lower depth, so we fix this.
24308 if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
24309 if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
24311 # Adjust nesting depths to keep -lp indentation for qw lists. This is
24312 # required because qw lists contained in brackets do not get nesting
24313 # depths, but the vertical aligner is watching nesting depth changes to
24314 # decide if a -lp block is intact. Without this patch, qw lists
24315 # enclosed in angle brackets will not get the correct -lp indentation.
24317 # Looking for line with isolated qw ...
24318 if ( $rOpts_line_up_parentheses
24319 && $type_beg eq 'q'
24320 && $ibeg == $iend )
24323 # ... which is part of a multiline qw
24324 my $Km = $self->K_previous_nonblank($Kbeg);
24325 my $Kp = $self->K_next_nonblank($Kbeg);
24326 if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
24327 || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
24329 $nesting_depth_beg++;
24330 $nesting_depth_end++;
24334 # ---------------------------------
24335 # define flag 'forget_side_comment'
24336 # ---------------------------------
24338 # This flag tells the vertical aligner to reset the side comment
24339 # location if we are entering a new block from level 0. This is
24340 # intended to keep side comments from drifting too far to the right.
24341 if ( $block_type_to_go[$i_terminal]
24342 && $nesting_depth_end > $nesting_depth_beg )
24344 my $level_adj = $lev;
24345 my $radjusted_levels = $self->[_radjusted_levels_];
24346 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
24348 $level_adj = $radjusted_levels->[$Kbeg];
24349 if ( $level_adj < 0 ) { $level_adj = 0 }
24351 if ( $level_adj == 0 ) {
24352 $rvao_args->{forget_side_comment} = 1;
24356 # -----------------------------------
24357 # Store the remaining non-flag values
24358 # -----------------------------------
24359 $rvao_args->{Kend} = $Kend_code;
24360 $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
24361 $rvao_args->{indentation} = $indentation;
24362 $rvao_args->{level_end} = $nesting_depth_end;
24363 $rvao_args->{level} = $nesting_depth_beg;
24364 $rvao_args->{rline_alignment} = $rline_alignment;
24365 $rvao_args->{maximum_line_length} =
24366 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
24368 # --------------------------------------
24369 # send this line to the vertical aligner
24370 # --------------------------------------
24371 my $vao = $self->[_vertical_aligner_object_];
24372 $vao->valign_input($rvao_args);
24376 } ## end of loop to output each line
24378 # Set flag indicating if the last line ends in an opening
24379 # token and is very short, so that a blank line is not
24380 # needed if the subsequent line is a comment.
24381 # Examples of what we are looking for:
24387 $self->[_last_output_short_opening_token_]
24389 # line ends in opening token
24391 = $is_opening_type{$type_end}
24395 # line has either single opening token
24396 $iend_next == $ibeg_next
24398 # or is a single token followed by opening token.
24399 # Note that sub identifiers have blanks like 'sub doit'
24400 # $token_beg !~ /\s+/
24401 || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 )
24404 # and limit total to 10 character widths
24405 && token_sequence_length( $ibeg_next, $iend_next ) <= 10;
24407 # remember indentation of lines containing opening containers for
24408 # later use by sub get_final_indentation
24409 $self->save_opening_indentation( $ri_first, $ri_last,
24410 $rindentation_list, $this_batch->[_runmatched_opening_indexes_] )
24411 if ( $this_batch->[_runmatched_opening_indexes_]
24412 || $types_to_go[$max_index_to_go] eq 'q' );
24414 # output any new -cscw block comment
24415 if ($cscw_block_comment) {
24416 $self->flush_vertical_aligner();
24417 my $file_writer_object = $self->[_file_writer_object_];
24418 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
24421 } ## end sub convey_batch_to_vertical_aligner
24423 sub check_batch_summed_lengths {
24425 my ( $self, $msg ) = @_;
24426 $msg = EMPTY_STRING unless defined($msg);
24427 my $rLL = $self->[_rLL_];
24429 # Verify that the summed lengths are correct. We want to be sure that
24430 # errors have not been introduced by programming changes. Summed lengths
24431 # are defined in sub store_token. Operations like padding and unmasking
24432 # semicolons can change token lengths, but those operations are expected to
24433 # update the summed lengths when they make changes. So the summed lengths
24434 # should always be correct.
24435 foreach my $i ( 0 .. $max_index_to_go ) {
24437 $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
24438 my $len_tok_i = $token_lengths_to_go[$i];
24439 my $KK = $K_to_go[$i];
24441 if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
24442 if ( $len_by_sum != $len_tok_i
24443 || defined($len_tok_K) && $len_by_sum != $len_tok_K )
24445 my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
24446 $KK = 'undef' unless defined($KK);
24447 my $tok = $tokens_to_go[$i];
24448 my $type = $types_to_go[$i];
24450 Summed lengths are appear to be incorrect. $msg
24451 lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
24452 near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
24457 } ## end sub check_batch_summed_lengths
24459 { ## begin closure set_vertical_alignment_markers
24460 my %is_vertical_alignment_type;
24461 my %is_not_vertical_alignment_token;
24462 my %is_vertical_alignment_keyword;
24463 my %is_terminal_alignment_type;
24464 my %is_low_level_alignment_token;
24470 # Replaced =~ and // in the list. // had been removed in RT 119588
24472 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
24473 { ? : => && || ~~ !~~ =~ !~ // <=> ->
24475 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
24477 # These 'tokens' are not aligned. We need this to remove [
24478 # from the above list because it has type ='{'
24480 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
24482 # these are the only types aligned at a line end
24484 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
24486 # these tokens only align at line level
24488 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
24490 # eq and ne were removed from this list to improve alignment chances
24491 @q = qw(if unless and or err for foreach while until);
24492 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
24495 sub set_vertical_alignment_markers {
24497 my ( $self, $ri_first, $ri_last ) = @_;
24499 #----------------------------------------------------------------------
24500 # This routine looks at output lines for certain tokens which can serve
24501 # as vertical alignment markers (such as an '=').
24502 #----------------------------------------------------------------------
24504 # Input parameters:
24505 # $ri_first = ref to list of starting line indexes in _to_go arrays
24506 # $ri_last = ref to list of ending line indexes in _to_go arrays
24508 # Method: We look at each token $i in this output batch and set
24509 # $ralignment_type_to_go->[$i] equal to those tokens at which we would
24510 # accept vertical alignment.
24512 my $ralignment_type_to_go;
24513 my $ralignment_counts = [];
24514 my $ralignment_hash_by_line = [];
24516 # NOTE: closing side comments can insert up to 2 additional tokens
24517 # beyond the original $max_index_to_go, so we need to check ri_last for
24519 my $max_line = @{$ri_first} - 1;
24520 my $max_i = $ri_last->[$max_line];
24521 if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
24523 # -----------------------------------------------------------------
24525 # - no alignments if there is only 1 token.
24526 # - and nothing to do if we aren't allowed to change whitespace.
24527 # -----------------------------------------------------------------
24528 if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
24529 return ( $ralignment_type_to_go, $ralignment_counts,
24530 $ralignment_hash_by_line );
24533 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
24534 my $ris_function_call_paren = $self->[_ris_function_call_paren_];
24535 my $rLL = $self->[_rLL_];
24537 # -------------------------------
24538 # First handle any side comment.
24539 # -------------------------------
24540 my $i_terminal = $max_i;
24541 if ( $types_to_go[$max_i] eq '#' ) {
24543 # We know $max_i > 0 if we get here.
24545 if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
24549 my $token = $tokens_to_go[$max_i];
24550 my $KK = $K_to_go[$max_i];
24552 # Do not align various special side comments
24553 my $do_not_align = (
24555 # it is any specially marked side comment
24556 ( defined($KK) && $rspecial_side_comment_type->{$KK} )
24558 # or it is a static side comment
24559 || ( $rOpts->{'static-side-comments'}
24560 && $token =~ /$static_side_comment_pattern/ )
24562 # or a closing side comment
24563 || ( $types_to_go[$i_terminal] eq '}'
24564 && $tokens_to_go[$i_terminal] eq '}'
24565 && $token =~ /$closing_side_comment_prefix_pattern/ )
24568 # - For the specific combination -vc -nvsc, we put all side comments
24569 # at fixed locations. Note that we will lose hanging side comment
24570 # alignments. Otherwise, hsc's can move to strange locations.
24571 # - For -nvc -nvsc we make all side comments vertical alignments
24572 # because the vertical aligner will check for -nvsc and be able
24573 # to reduce the final padding to the side comments for long lines.
24574 # and keep hanging side comments aligned.
24575 if ( !$do_not_align
24576 && !$rOpts_valign_side_comments
24577 && $rOpts_valign_code )
24581 my $ipad = $max_i - 1;
24582 if ( $types_to_go[$ipad] eq 'b' ) {
24584 $rOpts->{'minimum-space-to-comment'} -
24585 $token_lengths_to_go[$ipad];
24586 $self->pad_token( $ipad, $pad_spaces );
24590 if ( !$do_not_align ) {
24591 $ralignment_type_to_go->[$max_i] = '#';
24592 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
24593 $ralignment_counts->[$max_line]++;
24597 # ----------------------------------------------
24598 # Nothing more to do on this line if -nvc is set
24599 # ----------------------------------------------
24600 if ( !$rOpts_valign_code ) {
24601 return ( $ralignment_type_to_go, $ralignment_counts,
24602 $ralignment_hash_by_line );
24605 # -------------------------------------
24606 # Loop over each line of this batch ...
24607 # -------------------------------------
24608 my $last_vertical_alignment_BEFORE_index;
24609 my $vert_last_nonblank_type;
24610 my $vert_last_nonblank_token;
24612 foreach my $line ( 0 .. $max_line ) {
24614 my $ibeg = $ri_first->[$line];
24615 my $iend = $ri_last->[$line];
24617 next if ( $iend <= $ibeg );
24619 # back up before any side comment
24620 if ( $iend > $i_terminal ) { $iend = $i_terminal }
24622 my $level_beg = $levels_to_go[$ibeg];
24623 my $token_beg = $tokens_to_go[$ibeg];
24624 my $type_beg = $types_to_go[$ibeg];
24625 my $type_beg_special_char =
24626 ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
24628 $last_vertical_alignment_BEFORE_index = -1;
24629 $vert_last_nonblank_type = $type_beg;
24630 $vert_last_nonblank_token = $token_beg;
24632 # ----------------------------------------------------------------
24633 # Initialization code merged from 'sub delete_needless_alignments'
24634 # ----------------------------------------------------------------
24635 my $i_good_paren = -1;
24636 my $i_elsif_close = $ibeg - 1;
24637 my $i_elsif_open = $iend + 1;
24639 if ( $type_beg eq 'k' ) {
24641 # Initialization for paren patch: mark a location of a paren we
24642 # should keep, such as one following something like a leading
24644 $i_good_paren = $ibeg + 1;
24645 if ( $types_to_go[$i_good_paren] eq 'b' ) {
24649 # Initialization for 'elsif' patch: remember the paren range of
24650 # an elsif, and do not make alignments within them because this
24651 # can cause loss of padding and overall brace alignment in the
24652 # vertical aligner.
24653 if ( $token_beg eq 'elsif'
24654 && $i_good_paren < $iend
24655 && $tokens_to_go[$i_good_paren] eq '(' )
24657 $i_elsif_open = $i_good_paren;
24658 $i_elsif_close = $mate_index_to_go[$i_good_paren];
24660 } ## end if ( $type_beg eq 'k' )
24662 # --------------------------------------------
24663 # Loop over each token in this output line ...
24664 # --------------------------------------------
24665 foreach my $i ( $ibeg + 1 .. $iend ) {
24667 next if ( $types_to_go[$i] eq 'b' );
24669 my $type = $types_to_go[$i];
24670 my $token = $tokens_to_go[$i];
24671 my $alignment_type = EMPTY_STRING;
24673 # ----------------------------------------------
24674 # Check for 'paren patch' : Remove excess parens
24675 # ----------------------------------------------
24677 # Excess alignment of parens can prevent other good alignments.
24678 # For example, note the parens in the first two rows of the
24679 # following snippet. They would normally get marked for
24680 # alignment and aligned as follows:
24682 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
24683 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
24684 # my $img = new Gimp::Image( $w, $h, RGB );
24686 # This causes unnecessary paren alignment and prevents the
24687 # third equals from aligning. If we remove the unwanted
24688 # alignments we get:
24690 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
24691 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
24692 # my $img = new Gimp::Image( $w, $h, RGB );
24694 # A rule for doing this which works well is to remove alignment
24695 # of parens whose containers do not contain other aligning
24696 # tokens, with the exception that we always keep alignment of
24697 # the first opening paren on a line (for things like 'if' and
24698 # 'elsif' statements).
24699 if ( $token eq ')' && @imatch_list ) {
24701 # undo the corresponding opening paren if:
24702 # - it is at the top of the stack
24703 # - and not the first overall opening paren
24704 # - does not follow a leading keyword on this line
24705 my $imate = $mate_index_to_go[$i];
24706 if ( $imatch_list[-1] eq $imate
24707 && ( $ibeg > 1 || @imatch_list > 1 )
24708 && $imate > $i_good_paren )
24710 if ( $ralignment_type_to_go->[$imate] ) {
24711 $ralignment_type_to_go->[$imate] = EMPTY_STRING;
24712 $ralignment_counts->[$line]--;
24713 delete $ralignment_hash_by_line->[$line]->{$imate};
24719 # do not align tokens at lower level than start of line
24720 # except for side comments
24721 if ( $levels_to_go[$i] < $level_beg ) {
24725 #--------------------------------------------------------
24726 # First see if we want to align BEFORE this token
24727 #--------------------------------------------------------
24729 # The first possible token that we can align before
24730 # is index 2 because: 1) it doesn't normally make sense to
24731 # align before the first token and 2) the second
24732 # token must be a blank if we are to align before
24734 if ( $i < $ibeg + 2 ) { }
24736 # must follow a blank token
24737 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
24739 # otherwise, do not align two in a row to create a
24741 elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
24743 # align before one of these keywords
24744 # (within a line, since $i>1)
24745 elsif ( $type eq 'k' ) {
24747 # /^(if|unless|and|or|eq|ne)$/
24748 if ( $is_vertical_alignment_keyword{$token} ) {
24749 $alignment_type = $token;
24753 # align qw in a 'use' statement (issue git #93)
24754 elsif ( $type eq 'q' ) {
24755 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
24756 $alignment_type = $type;
24760 # align before one of these types..
24761 elsif ( $is_vertical_alignment_type{$type}
24762 && !$is_not_vertical_alignment_token{$token} )
24764 $alignment_type = $token;
24766 # Do not align a terminal token. Although it might
24767 # occasionally look ok to do this, this has been found to be
24768 # a good general rule. The main problems are:
24769 # (1) that the terminal token (such as an = or :) might get
24770 # moved far to the right where it is hard to see because
24771 # nothing follows it, and
24772 # (2) doing so may prevent other good alignments.
24773 # Current exceptions are && and || and =>
24774 if ( $i == $iend ) {
24775 $alignment_type = EMPTY_STRING
24776 unless ( $is_terminal_alignment_type{$type} );
24779 # Do not align leading ': (' or '. ('. This would prevent
24780 # alignment in something like the following:
24782 # ( $input_line_number < 10 ) ? " "
24783 # : ( $input_line_number < 100 ) ? " "
24787 # ( $case_matters ? $accessor : " lc($accessor) " )
24788 # . ( $yesno ? " eq " : " ne " )
24790 # Also, do not align a ( following a leading ? so we can
24791 # align something like this:
24792 # $converter{$_}->{ushortok} =
24793 # $PDL::IO::Pic::biggrays
24794 # ? ( m/GIF/ ? 0 : 1 )
24795 # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
24796 if ( $type_beg_special_char
24798 && $types_to_go[ $i - 1 ] eq 'b' )
24800 $alignment_type = EMPTY_STRING;
24803 # Certain tokens only align at the same level as the
24804 # initial line level
24805 if ( $is_low_level_alignment_token{$token}
24806 && $levels_to_go[$i] != $level_beg )
24808 $alignment_type = EMPTY_STRING;
24811 if ( $token eq '(' ) {
24813 # For a paren after keyword, only align if-like parens,
24816 # elsif ( $b ) { &b }
24817 # ^-------------------aligned parens
24818 if ( $vert_last_nonblank_type eq 'k'
24819 && !$is_if_unless_elsif{$vert_last_nonblank_token} )
24821 $alignment_type = EMPTY_STRING;
24824 # Do not align a spaced-function-paren if requested.
24825 # Issue git #53, #73.
24826 if ( !$rOpts_function_paren_vertical_alignment ) {
24827 my $seqno = $type_sequence_to_go[$i];
24828 $alignment_type = EMPTY_STRING
24829 if ( $ris_function_call_paren->{$seqno} );
24832 # make () align with qw in a 'use' statement (git #93)
24833 if ( $tokens_to_go[0] eq 'use'
24834 && $types_to_go[0] eq 'k'
24835 && $mate_index_to_go[$i] == $i + 1 )
24837 $alignment_type = 'q';
24839 ## Note on discussion git #101. We could make this
24840 ## a separate type '()' to separate it from qw's:
24841 ## $alignment_type =
24842 ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()';
24846 # be sure the alignment tokens are unique
24847 # This didn't work well: reason not determined
24848 # if ($token ne $type) {$alignment_type .= $type}
24851 # NOTE: This is deactivated because it causes the previous
24852 # if/elsif alignment to fail
24853 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
24854 #{ $alignment_type = $type; }
24856 if ($alignment_type) {
24857 $last_vertical_alignment_BEFORE_index = $i;
24860 #--------------------------------------------------------
24861 # Next see if we want to align AFTER the previous nonblank
24862 #--------------------------------------------------------
24864 # We want to line up ',' and interior ';' tokens, with the added
24865 # space AFTER these tokens. (Note: interior ';' is included
24866 # because it may occur in short blocks).
24869 # previous token IS one of these:
24871 $vert_last_nonblank_type eq ','
24872 || $vert_last_nonblank_type eq ';'
24875 # and it follows a blank
24876 && $types_to_go[ $i - 1 ] eq 'b'
24878 # and it's NOT one of these
24879 && !$is_closing_token{$type}
24881 # then go ahead and align
24885 $alignment_type = $vert_last_nonblank_type;
24888 #-----------------------
24889 # Set the alignment type
24890 #-----------------------
24891 if ($alignment_type) {
24893 # but do not align the opening brace of an anonymous sub
24895 && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
24900 # and do not make alignments within 'elsif' parens
24901 elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
24905 # and ignore any tokens which have leading padded spaces
24906 # example: perl527/lop.t
24907 elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
24912 $ralignment_type_to_go->[$i] = $alignment_type;
24913 $ralignment_hash_by_line->[$line]->{$i} =
24915 $ralignment_counts->[$line]++;
24916 push @imatch_list, $i;
24920 $vert_last_nonblank_type = $type;
24921 $vert_last_nonblank_token = $token;
24925 return ( $ralignment_type_to_go, $ralignment_counts,
24926 $ralignment_hash_by_line );
24927 } ## end sub set_vertical_alignment_markers
24928 } ## end closure set_vertical_alignment_markers
24930 sub make_vertical_alignments {
24931 my ( $self, $ri_first, $ri_last ) = @_;
24933 #----------------------------
24934 # Shortcut for a single token
24935 #----------------------------
24936 if ( $max_index_to_go == 0 ) {
24937 if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
24939 my $rfields = [ $tokens_to_go[0] ];
24940 my $rpatterns = [ $types_to_go[0] ];
24941 my $rfield_lengths =
24942 [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
24943 return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
24946 # Strange line packing, not fatal but should not happen
24947 elsif (DEVEL_MODE) {
24948 my $max_line = @{$ri_first} - 1;
24949 my $ibeg = $ri_first->[0];
24950 my $iend = $ri_last->[0];
24951 my $tok_b = $tokens_to_go[$ibeg];
24952 my $tok_e = $tokens_to_go[$iend];
24953 my $type_b = $types_to_go[$ibeg];
24954 my $type_e = $types_to_go[$iend];
24956 "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
24961 #---------------------------------------------------------
24962 # Step 1: Define the alignment tokens for the entire batch
24963 #---------------------------------------------------------
24964 my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line );
24966 # We only need to make this call if vertical alignment of code is
24967 # requested or if a line might have a side comment.
24968 if ( $rOpts_valign_code
24969 || $types_to_go[$max_index_to_go] eq '#' )
24971 ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
24972 = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
24975 #----------------------------------------------
24976 # Step 2: Break each line into alignment fields
24977 #----------------------------------------------
24978 my $rline_alignments = [];
24979 my $max_line = @{$ri_first} - 1;
24980 foreach my $line ( 0 .. $max_line ) {
24982 my $ibeg = $ri_first->[$line];
24983 my $iend = $ri_last->[$line];
24985 my $rtok_fld_pat_len = $self->make_alignment_patterns(
24986 $ibeg, $iend, $ralignment_type_to_go,
24987 $ralignment_counts->[$line],
24988 $ralignment_hash_by_line->[$line]
24990 push @{$rline_alignments}, $rtok_fld_pat_len;
24992 return $rline_alignments;
24993 } ## end sub make_vertical_alignments
24997 # get opening and closing sequence numbers of a token for the vertical
24998 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
24999 # to be treated somewhat like opening and closing tokens for stacking
25000 # tokens by the vertical aligner.
25001 my ( $self, $ii, $ending_in_quote ) = @_;
25003 my $rLL = $self->[_rLL_];
25005 my $KK = $K_to_go[$ii];
25006 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
25008 if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
25010 my $token = $rLL->[$KK]->[_TOKEN_];
25012 $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
25015 if ( !$ending_in_quote ) {
25016 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
25021 } ## end sub get_seqno
25024 my %undo_extended_ci;
25026 sub initialize_undo_ci {
25027 %undo_extended_ci = ();
25033 # Undo continuation indentation in certain sequences
25034 my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
25035 my ( $line_1, $line_2, $lev_last );
25036 my $max_line = @{$ri_first} - 1;
25038 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
25040 # Prepare a list of controlling indexes for each line if required.
25041 # This is used for efficient processing below. Note: this is
25042 # critical for speed. In the initial implementation I just looped
25043 # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
25044 # found that this routine was causing a huge run time in large lists.
25045 # On a very large list test case, this new coding dropped the run time
25046 # of this routine from 30 seconds to 169 milliseconds.
25047 my @i_controlling_ci;
25048 if ( @{$rix_seqno_controlling_ci} ) {
25049 my @tmp = reverse @{$rix_seqno_controlling_ci};
25050 my $ix_next = pop @tmp;
25051 foreach my $line ( 0 .. $max_line ) {
25052 my $iend = $ri_last->[$line];
25053 while ( defined($ix_next) && $ix_next <= $iend ) {
25054 push @{ $i_controlling_ci[$line] }, $ix_next;
25055 $ix_next = pop @tmp;
25060 # Loop over all lines of the batch ...
25062 # Workaround originally created for problem c007, in which the
25063 # combination -lp -xci could produce a "Program bug" message in unusual
25065 my $skip_SECTION_1;
25066 if ( $rOpts_line_up_parentheses
25067 && $rOpts_extended_continuation_indentation )
25070 # Only set this flag if -lp is actually used here
25071 foreach my $line ( 0 .. $max_line ) {
25072 my $ibeg = $ri_first->[$line];
25073 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
25074 $skip_SECTION_1 = 1;
25080 foreach my $line ( 0 .. $max_line ) {
25082 my $ibeg = $ri_first->[$line];
25083 my $iend = $ri_last->[$line];
25084 my $lev = $levels_to_go[$ibeg];
25086 #-----------------------------------
25087 # SECTION 1: Undo needless common CI
25088 #-----------------------------------
25090 # We are looking at leading tokens and looking for a sequence all
25091 # at the same level and all at a higher level than enclosing lines.
25093 # For example, we can undo continuation indentation in sort/map/grep
25096 # my $dat1 = pack( "n*",
25097 # map { $_, $lookup->{$_} }
25098 # sort { $a <=> $b }
25099 # grep { $lookup->{$_} ne $default } keys %$lookup );
25103 # my $dat1 = pack( "n*",
25104 # map { $_, $lookup->{$_} }
25105 # sort { $a <=> $b }
25106 # grep { $lookup->{$_} ne $default } keys %$lookup );
25108 if ( $line > 0 && !$skip_SECTION_1 ) {
25110 # if we have started a chain..
25113 # see if it continues..
25114 if ( $lev == $lev_last ) {
25115 if ( $types_to_go[$ibeg] eq 'k'
25116 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
25119 # chain continues...
25120 # check for chain ending at end of a statement
25121 my $is_semicolon_terminated = (
25124 $types_to_go[$iend] eq ';'
25126 # with possible side comment
25127 || ( $types_to_go[$iend] eq '#'
25128 && $iend - $ibeg >= 2
25129 && $types_to_go[ $iend - 2 ] eq ';'
25130 && $types_to_go[ $iend - 1 ] eq 'b' )
25135 if ($is_semicolon_terminated);
25143 elsif ( $lev < $lev_last ) {
25145 # chain ends with previous line
25146 $line_2 = $line - 1;
25148 elsif ( $lev > $lev_last ) {
25154 # undo the continuation indentation if a chain ends
25155 if ( defined($line_2) && defined($line_1) ) {
25156 my $continuation_line_count = $line_2 - $line_1 + 1;
25157 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
25158 = (0) x ($continuation_line_count)
25159 if ( $continuation_line_count >= 0 );
25160 @leading_spaces_to_go[ @{$ri_first}
25161 [ $line_1 .. $line_2 ] ] =
25162 @reduced_spaces_to_go[ @{$ri_first}
25163 [ $line_1 .. $line_2 ] ];
25168 # not in a chain yet..
25171 # look for start of a new sort/map/grep chain
25172 if ( $lev > $lev_last ) {
25173 if ( $types_to_go[$ibeg] eq 'k'
25174 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
25182 #-------------------------------------
25183 # SECTION 2: Undo ci at cuddled blocks
25184 #-------------------------------------
25186 # Note that sub get_final_indentation will be called later to
25187 # actually do this, but for now we will tentatively mark cuddled
25188 # lines with ci=0 so that the the -xci loop which follows will be
25189 # correct at cuddles.
25191 $types_to_go[$ibeg] eq '}'
25192 && ( $nesting_depth_to_go[$iend] + 1 ==
25193 $nesting_depth_to_go[$ibeg] )
25196 my $terminal_type = $types_to_go[$iend];
25197 if ( $terminal_type eq '#' && $iend > $ibeg ) {
25198 $terminal_type = $types_to_go[ $iend - 1 ];
25199 if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
25200 $terminal_type = $types_to_go[ $iend - 2 ];
25204 # Patch for rt144979, part 2. Coordinated with part 1.
25205 # Skip cuddled braces.
25206 my $seqno_beg = $type_sequence_to_go[$ibeg];
25207 my $is_cuddled_closing_brace = $seqno_beg
25208 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
25210 if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) {
25211 my $Kbeg = $K_to_go[$ibeg];
25212 $ci_levels_to_go[$ibeg] = 0;
25216 #--------------------------------------------------------
25217 # SECTION 3: Undo ci set by sub extended_ci if not needed
25218 #--------------------------------------------------------
25220 # Undo the ci of the leading token if its controlling token
25221 # went out on a previous line without ci
25222 if ( $ci_levels_to_go[$ibeg] ) {
25223 my $Kbeg = $K_to_go[$ibeg];
25224 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
25225 if ( $seqno && $undo_extended_ci{$seqno} ) {
25227 # but do not undo ci set by the -lp flag
25228 if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
25229 $ci_levels_to_go[$ibeg] = 0;
25230 $leading_spaces_to_go[$ibeg] =
25231 $reduced_spaces_to_go[$ibeg];
25236 # Flag any controlling opening tokens in lines without ci. This
25237 # will be used later in the above if statement to undo the ci which
25238 # they added. The array i_controlling_ci[$line] was prepared at
25239 # the top of this routine.
25240 if ( !$ci_levels_to_go[$ibeg]
25241 && defined( $i_controlling_ci[$line] ) )
25243 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
25244 my $seqno = $type_sequence_to_go[$i];
25245 $undo_extended_ci{$seqno} = 1;
25253 } ## end sub undo_ci
25256 { ## begin closure set_logical_padding
25261 my @q = qw( + - * / );
25262 @is_math_op{@q} = (1) x scalar(@q);
25265 sub set_logical_padding {
25267 # Look at a batch of lines and see if extra padding can improve the
25268 # alignment when there are certain leading operators. Here is an
25269 # example, in which some extra space is introduced before
25270 # '( $year' to make it line up with the subsequent lines:
25272 # if ( ( $Year < 1601 )
25273 # || ( $Year > 2899 )
25274 # || ( $EndYear < 1601 )
25275 # || ( $EndYear > 2899 ) )
25277 # &Error_OutOfRange;
25280 my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_;
25281 my $max_line = @{$ri_first} - 1;
25283 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
25284 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
25286 # Patch to produce padding in the first line of short code blocks.
25287 # This is part of an update to fix cases b562 .. b983.
25288 # This is needed to compensate for a change which was made in 'sub
25289 # starting_one_line_block' to prevent blinkers. Previously, that sub
25290 # would not look at the total block size and rely on sub
25291 # break_long_lines to break up long blocks. Consequently, the
25292 # first line of those batches would end in the opening block brace of a
25293 # sort/map/grep/eval block. When this was changed to immediately check
25294 # for blocks which were too long, the opening block brace would go out
25295 # in a single batch, and the block contents would go out as the next
25296 # batch. This caused the logic in this routine which decides if the
25297 # first line should be padded to be incorrect. To fix this, we set a
25298 # flag if the previous batch ended in an opening sort/map/grep/eval
25299 # block brace, and use it to adjust the logic to compensate.
25301 # For example, the following would have previously been a single batch
25302 # but now is two batches. We want to pad the line starting in '$dir':
25303 # my (@indices) = # batch n-1 (prev batch n)
25304 # sort { # batch n-1 (prev batch n)
25305 # $dir eq 'left' # batch n
25306 # ? $cells[$a] <=> $cells[$b] # batch n
25307 # : $cells[$b] <=> $cells[$a]; # batch n
25308 # } ( 0 .. $#cells ); # batch n
25310 my $rLL = $self->[_rLL_];
25311 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
25313 my $is_short_block;
25314 if ( $K_to_go[0] > 0 ) {
25315 my $Kp = $K_to_go[0] - 1;
25316 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
25319 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
25321 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
25325 my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
25327 my $block_type = $rblock_type_of_seqno->{$seqno};
25329 $is_short_block = $is_sort_map_grep_eval{$block_type};
25330 $is_short_block ||= $want_one_line_block{$block_type};
25335 # looking at each line of this batch..
25336 foreach my $line ( 0 .. $max_line - 1 ) {
25338 # see if the next line begins with a logical operator
25339 $ibeg = $ri_first->[$line];
25340 $iend = $ri_last->[$line];
25341 $ibeg_next = $ri_first->[ $line + 1 ];
25342 $tok_next = $tokens_to_go[$ibeg_next];
25343 $type_next = $types_to_go[$ibeg_next];
25345 $has_leading_op_next = ( $tok_next =~ /^\w/ )
25346 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
25347 : $is_chain_operator{$type_next}; # and, or
25349 next unless ($has_leading_op_next);
25351 # next line must not be at lesser depth
25353 if ( $nesting_depth_to_go[$ibeg] >
25354 $nesting_depth_to_go[$ibeg_next] );
25356 # identify the token in this line to be padded on the left
25359 # handle lines at same depth...
25360 if ( $nesting_depth_to_go[$ibeg] ==
25361 $nesting_depth_to_go[$ibeg_next] )
25364 # if this is not first line of the batch ...
25367 # and we have leading operator..
25368 next if $has_leading_op;
25370 # Introduce padding if..
25371 # 1. the previous line is at lesser depth, or
25372 # 2. the previous line ends in an assignment
25373 # 3. the previous line ends in a 'return'
25374 # 4. the previous line ends in a comma
25375 # Example 1: previous line at lesser depth
25376 # if ( ( $Year < 1601 ) # <- we are here but
25377 # || ( $Year > 2899 ) # list has not yet
25378 # || ( $EndYear < 1601 ) # collapsed vertically
25379 # || ( $EndYear > 2899 ) )
25382 # Example 2: previous line ending in assignment:
25384 # $year % 4 ? 0 # <- We are here
25385 # : $year % 100 ? 1
25386 # : $year % 400 ? 0
25389 # Example 3: previous line ending in comma:
25396 # be sure levels agree (never indent after an indented 'if')
25398 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
25400 # allow padding on first line after a comma but only if:
25401 # (1) this is line 2 and
25402 # (2) there are at more than three lines and
25403 # (3) lines 3 and 4 have the same leading operator
25404 # These rules try to prevent padding within a long
25405 # comma-separated list.
25407 if ( $types_to_go[$iendm] eq ','
25411 my $ibeg_next_next = $ri_first->[ $line + 2 ];
25412 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
25413 $ok_comma = $tok_next_next eq $tok_next;
25418 $is_assignment{ $types_to_go[$iendm] }
25420 || ( $nesting_depth_to_go[$ibegm] <
25421 $nesting_depth_to_go[$ibeg] )
25422 || ( $types_to_go[$iendm] eq 'k'
25423 && $tokens_to_go[$iendm] eq 'return' )
25426 # we will add padding before the first token
25430 # for first line of the batch..
25433 # WARNING: Never indent if first line is starting in a
25434 # continued quote, which would change the quote.
25435 next if $starting_in_quote;
25437 # if this is text after closing '}'
25438 # then look for an interior token to pad
25439 if ( $types_to_go[$ibeg] eq '}' ) {
25443 # otherwise, we might pad if it looks really good
25444 elsif ($is_short_block) {
25449 # we might pad token $ibeg, so be sure that it
25450 # is at the same depth as the next line.
25452 if ( $nesting_depth_to_go[$ibeg] !=
25453 $nesting_depth_to_go[$ibeg_next] );
25455 # We can pad on line 1 of a statement if at least 3
25456 # lines will be aligned. Otherwise, it
25457 # can look very confusing.
25459 # We have to be careful not to pad if there are too few
25460 # lines. The current rule is:
25461 # (1) in general we require at least 3 consecutive lines
25462 # with the same leading chain operator token,
25463 # (2) but an exception is that we only require two lines
25464 # with leading colons if there are no more lines. For example,
25465 # the first $i in the following snippet would get padding
25466 # by the second rule:
25468 # $i == 1 ? ( "First", "Color" )
25469 # : $i == 2 ? ( "Then", "Rarity" )
25470 # : ( "Then", "Name" );
25472 next if ( $max_line <= 1 );
25474 my $leading_token = $tokens_to_go[$ibeg_next];
25477 # never indent line 1 of a '.' series because
25478 # previous line is most likely at same level.
25479 # TODO: we should also look at the leading_spaces
25480 # of the last output line and skip if it is same
25482 next if ( $leading_token eq '.' );
25485 foreach my $l ( 2 .. 3 ) {
25486 last if ( $line + $l > $max_line );
25488 my $ibeg_next_next = $ri_first->[ $line + $l ];
25490 if ( $tokens_to_go[$ibeg_next_next] eq
25492 $tokens_differ = 1;
25495 next if ($tokens_differ);
25496 next if ( $count < 3 && $leading_token ne ':' );
25502 # find interior token to pad if necessary
25503 if ( !defined($ipad) ) {
25505 foreach my $i ( $ibeg .. $iend - 1 ) {
25507 # find any unclosed container
25509 unless ( $type_sequence_to_go[$i]
25510 && $mate_index_to_go[$i] > $iend );
25512 # find next nonblank token to pad
25513 $ipad = $inext_to_go[$i];
25516 last if ( !$ipad || $ipad > $iend );
25519 # We cannot pad the first leading token of a file because
25520 # it could cause a bug in which the starting indentation
25521 # level is guessed incorrectly each time the code is run
25522 # though perltidy, thus causing the code to march off to
25523 # the right. For example, the following snippet would have
25526 ## ov_method mycan( $package, '(""' ), $package
25527 ## or ov_method mycan( $package, '(0+' ), $package
25528 ## or ov_method mycan( $package, '(bool' ), $package
25529 ## or ov_method mycan( $package, '(nomethod' ), $package;
25531 # If this snippet is within a block this won't happen
25532 # unless the user just processes the snippet alone within
25533 # an editor. In that case either the user will see and
25534 # fix the problem or it will be corrected next time the
25535 # entire file is processed with perltidy.
25536 my $this_batch = $self->[_this_batch_];
25537 my $peak_batch_size = $this_batch->[_peak_batch_size_];
25538 next if ( $ipad == 0 && $peak_batch_size <= 1 );
25540 # next line must not be at greater depth
25541 my $iend_next = $ri_last->[ $line + 1 ];
25543 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
25544 $nesting_depth_to_go[$ipad] );
25546 # lines must be somewhat similar to be padded..
25547 my $inext_next = $inext_to_go[$ibeg_next];
25548 my $type = $types_to_go[$ipad];
25550 # see if there are multiple continuation lines
25551 my $logical_continuation_lines = 1;
25552 if ( $line + 2 <= $max_line ) {
25553 my $leading_token = $tokens_to_go[$ibeg_next];
25554 my $ibeg_next_next = $ri_first->[ $line + 2 ];
25555 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
25556 && $nesting_depth_to_go[$ibeg_next] eq
25557 $nesting_depth_to_go[$ibeg_next_next] )
25559 $logical_continuation_lines++;
25563 # see if leading types match
25564 my $types_match = $types_to_go[$inext_next] eq $type;
25565 my $matches_without_bang;
25567 # if first line has leading ! then compare the following token
25568 if ( !$types_match && $type eq '!' ) {
25569 $types_match = $matches_without_bang =
25570 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
25574 # either we have multiple continuation lines to follow
25575 # and we are not padding the first token
25577 $logical_continuation_lines > 1
25578 && ( $ipad > 0 || $is_short_block )
25587 # and keywords must match if keyword
25590 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
25596 #----------------------begin special checks--------------
25599 # A check is needed before we can make the pad.
25600 # If we are in a list with some long items, we want each
25601 # item to stand out. So in the following example, the
25602 # first line beginning with '$casefold->' would look good
25603 # padded to align with the next line, but then it
25604 # would be indented more than the last line, so we
25608 # $casefold->{code} eq '0041'
25609 # && $casefold->{status} eq 'C'
25610 # && $casefold->{mapping} eq '0061',
25615 # It would be faster, and almost as good, to use a comma
25616 # count, and not pad if comma_count > 1 and the previous
25617 # line did not end with a comma.
25621 my $ibg = $ri_first->[ $line + 1 ];
25622 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
25624 # just use simplified formula for leading spaces to avoid
25625 # needless sub calls
25626 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
25628 # look at each line beyond the next ..
25630 foreach my $ltest ( $line + 2 .. $max_line ) {
25632 my $ibeg_t = $ri_first->[$l];
25634 # quit looking at the end of this container
25636 if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth )
25637 || ( $nesting_depth_to_go[$ibeg_t] < $depth );
25639 # cannot do the pad if a later line would be
25641 if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] <
25649 # don't pad if we end in a broken list
25650 if ( $l == $max_line ) {
25651 my $i2 = $ri_last->[$l];
25652 if ( $types_to_go[$i2] eq '#' ) {
25653 my $i1 = $ri_first->[$l];
25654 next if terminal_type_i( $i1, $i2 ) eq ',';
25659 # a minus may introduce a quoted variable, and we will
25660 # add the pad only if this line begins with a bare word,
25661 # such as for the word 'Button' here:
25663 # Button => "Print letter \"~$_\"",
25664 # -command => [ sub { print "$_[0]\n" }, $_ ],
25665 # -accelerator => "Meta+$_"
25668 # On the other hand, if 'Button' is quoted, it looks best
25671 # 'Button' => "Print letter \"~$_\"",
25672 # -command => [ sub { print "$_[0]\n" }, $_ ],
25673 # -accelerator => "Meta+$_"
25675 if ( $types_to_go[$ibeg_next] eq 'm' ) {
25676 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
25679 next unless $ok_to_pad;
25681 #----------------------end special check---------------
25683 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
25684 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
25685 $pad_spaces = $length_2 - $length_1;
25687 # If the first line has a leading ! and the second does
25688 # not, then remove one space to try to align the next
25689 # leading characters, which are often the same. For example:
25691 # || $ts == $self->Holder
25692 # || $self->Holder->Type eq "Arena" )
25694 # This usually helps readability, but if there are subsequent
25695 # ! operators things will still get messed up. For example:
25697 # if ( !exists $Net::DNS::typesbyname{$qtype}
25698 # && exists $Net::DNS::classesbyname{$qtype}
25699 # && !exists $Net::DNS::classesbyname{$qclass}
25700 # && exists $Net::DNS::typesbyname{$qclass} )
25701 # We can't fix that.
25702 if ($matches_without_bang) { $pad_spaces-- }
25704 # make sure this won't change if -lp is used
25705 my $indentation_1 = $leading_spaces_to_go[$ibeg];
25706 if ( ref($indentation_1)
25707 && $indentation_1->get_recoverable_spaces() == 0 )
25709 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
25710 if ( ref($indentation_2)
25711 && $indentation_2->get_recoverable_spaces() != 0 )
25717 # we might be able to handle a pad of -1 by removing a blank
25719 if ( $pad_spaces < 0 ) {
25721 # Deactivated for -kpit due to conflict. This block deletes
25722 # a space in an attempt to improve alignment in some cases,
25723 # but it may conflict with user spacing requests. For now
25724 # it is just deactivated if the -kpit option is used.
25725 if ( $pad_spaces == -1 ) {
25727 && $types_to_go[ $ipad - 1 ] eq 'b'
25728 && !%keyword_paren_inner_tightness )
25730 $self->pad_token( $ipad - 1, $pad_spaces );
25736 # now apply any padding for alignment
25737 if ( $ipad >= 0 && $pad_spaces ) {
25739 my $length_t = total_line_length( $ibeg, $iend );
25740 if ( $pad_spaces + $length_t <=
25741 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
25743 $self->pad_token( $ipad, $pad_spaces );
25751 $has_leading_op = $has_leading_op_next;
25752 } ## end of loop over lines
25754 } ## end sub set_logical_padding
25755 } ## end closure set_logical_padding
25759 # insert $pad_spaces before token number $ipad
25760 my ( $self, $ipad, $pad_spaces ) = @_;
25761 my $rLL = $self->[_rLL_];
25762 my $KK = $K_to_go[$ipad];
25763 my $tok = $rLL->[$KK]->[_TOKEN_];
25764 my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
25766 if ( $pad_spaces > 0 ) {
25767 $tok = SPACE x $pad_spaces . $tok;
25768 $tok_len += $pad_spaces;
25770 elsif ( $pad_spaces == 0 ) {
25773 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
25774 $tok = EMPTY_STRING;
25781 && Fault("unexpected request for pad spaces = $pad_spaces\n");
25785 $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
25786 $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
25788 $token_lengths_to_go[$ipad] += $pad_spaces;
25789 $tokens_to_go[$ipad] = $tok;
25791 foreach my $i ( $ipad .. $max_index_to_go ) {
25792 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
25795 } ## end sub pad_token
25799 # Remove one indentation space from unbroken containers marked with
25800 # 'K_extra_space'. These are mostly two-line lists with short names
25801 # formatted with -xlp -pt=2.
25803 # Before this fix (extra space in line 2):
25804 # is($module->VERSION, $expected,
25805 # "$main_module->VERSION matches $module->VERSION ($expected)");
25808 # is($module->VERSION, $expected,
25809 # "$main_module->VERSION matches $module->VERSION ($expected)");
25812 # - This fixes issue git #106
25813 # - This must be called after 'set_logical_padding'.
25814 # - This is currently only applied to -xlp. It would also work for -lp
25815 # but that style is essentially frozen.
25817 my ( $self, $ri_first, $ri_last ) = @_;
25819 # Must be 2 or more lines
25820 return unless ( @{$ri_first} > 1 );
25822 # Pull indentation object from start of second line
25823 my $ibeg_1 = $ri_first->[1];
25824 my $lp_object = $leading_spaces_to_go[$ibeg_1];
25825 return if ( !ref($lp_object) );
25827 # This only applies to an indentation object with a marked token
25828 my $K_extra_space = $lp_object->get_K_extra_space();
25829 return unless ($K_extra_space);
25831 # Look for the marked token within the first line of this batch
25832 my $ibeg_0 = $ri_first->[0];
25833 my $iend_0 = $ri_last->[0];
25834 my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0];
25835 return if ( $ii <= $ibeg_0 || $ii > $iend_0 );
25837 # Skip padded tokens, they have already been aligned
25838 my $tok = $tokens_to_go[$ii];
25839 return if ( substr( $tok, 0, 1 ) eq SPACE );
25841 # Skip 'if'-like statements, this does not improve them
25843 if ( $types_to_go[$ibeg_0] eq 'k'
25844 && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } );
25846 # Looks okay, reduce indentation by 1 space if possible
25847 my $spaces = $lp_object->get_spaces();
25848 if ( $spaces > 0 ) {
25849 $lp_object->decrease_SPACES(1);
25855 { ## begin closure make_alignment_patterns
25860 my %is_my_local_our;
25863 my %is_binary_type;
25864 my %is_binary_keyword;
25869 # Note: %block_type_map is now global to enable the -gal=s option
25871 # map certain keywords to the same 'if' class to align
25872 # long if/elsif sequences. [elsif.pl]
25878 'default' => 'given',
25879 'case' => 'switch',
25881 # treat an 'undef' similar to numbers and quotes
25885 # map certain operators to the same class for pattern matching
25900 # leading keywords which to skip for efficiency when making parenless
25902 my @q = qw( my local our return );
25903 @{is_my_local_our}{@q} = (1) x scalar(@q);
25905 # leading keywords where we should just join one token to form
25908 @{is_use_like}{@q} = (1) x scalar(@q);
25910 # leading token types which may be used to make a container name
25912 @{is_kwU}{@q} = (1) x scalar(@q);
25914 # token types which prevent using leading word as a container name
25916 x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
25917 &= // >> ~. &. |. ^.
25918 **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
25921 @{is_binary_type}{@q} = (1) x scalar(@q);
25923 # token keywords which prevent using leading word as a container name
25924 @q = qw(and or err eq ne cmp);
25925 @is_binary_keyword{@q} = (1) x scalar(@q);
25927 # Some common function calls whose args can be aligned. These do not
25928 # give good alignments if the lengths differ significantly.
25930 'unlike' => 'like',
25932 ##'is_deeply' => 'is', # poor; names lengths too different
25937 sub make_alignment_patterns {
25939 my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
25943 #------------------------------------------------------------------
25944 # This sub creates arrays of vertical alignment info for one output
25946 #------------------------------------------------------------------
25948 # Input parameters:
25949 # $ibeg, $iend - index range of this line in the _to_go arrays
25950 # $ralignment_type_to_go - alignment type of tokens, like '=', if any
25951 # $alignment_count - number of alignment tokens in the line
25952 # $ralignment_hash - this contains all of the alignments for this
25953 # line. It is not yet used but is available for future coding in
25954 # case there is a need to do a preliminary scan of alignment tokens.
25956 # The arrays which are created contain strings that can be tested by
25957 # the vertical aligner to see if consecutive lines can be aligned
25960 # The four arrays are indexed on the vertical
25961 # alignment fields and are:
25962 # @tokens - a list of any vertical alignment tokens for this line.
25963 # These are tokens, such as '=' '&&' '#' etc which
25964 # we want to might align vertically. These are
25965 # decorated with various information such as
25966 # nesting depth to prevent unwanted vertical
25967 # alignment matches.
25968 # @fields - the actual text of the line between the vertical alignment
25970 # @patterns - a modified list of token types, one for each alignment
25971 # field. These should normally each match before alignment is
25972 # allowed, even when the alignment tokens match.
25973 # @field_lengths - the display width of each field
25977 if ( defined($ralignment_hash) ) {
25978 $new_count = keys %{$ralignment_hash};
25980 my $old_count = $alignment_count;
25981 $old_count = 0 unless ($old_count);
25982 if ( $new_count != $old_count ) {
25983 my $K = $K_to_go[$ibeg];
25984 my $rLL = $self->[_rLL_];
25985 my $lnl = $rLL->[$K]->[_LINE_INDEX_];
25987 "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
25992 # -------------------------------------
25993 # Shortcut for lines without alignments
25994 # -------------------------------------
25995 if ( !$alignment_count ) {
25997 my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
25998 $summed_lengths_to_go[$ibeg] ];
26001 if ( $ibeg == $iend ) {
26002 $rfields = [ $tokens_to_go[$ibeg] ];
26003 $rpatterns = [ $types_to_go[$ibeg] ];
26007 [ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ];
26009 [ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ];
26011 return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
26014 my $i_start = $ibeg;
26016 my $i_depth_prev = $i_start;
26017 my $depth_prev = $depth;
26018 my %container_name = ( 0 => EMPTY_STRING );
26023 my @field_lengths = ();
26025 #-------------------------------------------------------------
26026 # Make a container name for any uncontained commas, issue c089
26027 #-------------------------------------------------------------
26028 # This is a generalization of the fix for rt136416 which was a
26029 # specialized patch just for 'use Module' statements.
26030 # We restrict this to semicolon-terminated statements; that way
26031 # we know that the top level commas are not in a list container.
26032 if ( $ibeg == 0 && $iend == $max_index_to_go ) {
26033 my $iterm = $max_index_to_go;
26034 if ( $types_to_go[$iterm] eq '#' ) {
26035 $iterm = $iprev_to_go[$iterm];
26038 # Alignment lines ending like '=> sub {'; fixes issue c093
26039 my $term_type_ok = $types_to_go[$iterm] eq ';';
26041 $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
26043 if ( $iterm > $ibeg
26045 && !$is_my_local_our{ $tokens_to_go[$ibeg] }
26046 && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
26048 $container_name{'0'} =
26049 make_uncontained_comma_name( $iterm, $ibeg, $iend );
26053 #--------------------------------
26054 # Begin main loop over all tokens
26055 #--------------------------------
26056 my $j = 0; # field index
26058 $patterns[0] = EMPTY_STRING;
26060 for my $i ( $ibeg .. $iend ) {
26062 #-------------------------------------------------------------
26063 # Part 1: keep track of containers balanced on this line only.
26064 #-------------------------------------------------------------
26065 # These are used below to prevent unwanted cross-line alignments.
26066 # Unbalanced containers already avoid aligning across
26067 # container boundaries.
26068 my $type = $types_to_go[$i];
26069 if ( $type_sequence_to_go[$i] ) {
26070 my $token = $tokens_to_go[$i];
26071 if ( $is_opening_token{$token} ) {
26073 # if container is balanced on this line...
26074 my $i_mate = $mate_index_to_go[$i];
26075 if ( $i_mate > $i && $i_mate <= $iend ) {
26076 $i_depth_prev = $i;
26077 $depth_prev = $depth;
26080 # Append the previous token name to make the container name
26081 # more unique. This name will also be given to any commas
26082 # within this container, and it helps avoid undesirable
26083 # alignments of different types of containers.
26085 # Containers beginning with { and [ are given those names
26086 # for uniqueness. That way commas in different containers
26087 # will not match. Here is an example of what this prevents:
26088 # a => [ 1, 2, 3 ],
26089 # b => { b1 => 4, b2 => 5 },
26090 # Here is another example of what we avoid by labeling the
26093 # is_d( [ $a, $a ], [ $b, $c ] );
26094 # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
26095 # is_d( [ \$a, \$a ], [ \$b, \$c ] );
26098 $token eq '(' ? $self->make_paren_name($i) : $token;
26100 # name cannot be '.', so change to something else if so
26101 if ( $name eq '.' ) { $name = 'dot' }
26103 $container_name{$depth} = "+" . $name;
26105 # Make the container name even more unique if necessary.
26106 # If we are not vertically aligning this opening paren,
26107 # append a character count to avoid bad alignment since
26108 # it usually looks bad to align commas within containers
26109 # for which the opening parens do not align. Here
26110 # is an example very BAD alignment of commas (because
26111 # the atan2 functions are not all aligned):
26113 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
26114 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
26115 # $X * atan2( $X, 1 ) -
26116 # $Y * atan2( $Y, 1 );
26118 # On the other hand, it is usually okay to align commas
26119 # if opening parens align, such as:
26120 # glVertex3d( $cx + $s * $xs, $cy, $z );
26121 # glVertex3d( $cx, $cy + $s * $ys, $z );
26122 # glVertex3d( $cx - $s * $xs, $cy, $z );
26123 # glVertex3d( $cx, $cy - $s * $ys, $z );
26125 # To distinguish between these situations, we append
26126 # the length of the line from the previous matching
26127 # token, or beginning of line, to the function name.
26128 # This will allow the vertical aligner to reject
26129 # undesirable matches.
26131 # if we are not aligning on this paren...
26132 if ( !$ralignment_type_to_go->[$i] ) {
26134 my $len = length_tag( $i, $ibeg, $i_start );
26136 # tack this length onto the container name to try
26137 # to make a unique token name
26138 $container_name{$depth} .= "-" . $len;
26139 } ## end if ( !$ralignment_type_to_go...)
26140 } ## end if ( $i_mate > $i && $i_mate...)
26141 } ## end if ( $is_opening_token...)
26143 elsif ( $is_closing_type{$token} ) {
26144 $i_depth_prev = $i;
26145 $depth_prev = $depth;
26146 $depth-- if $depth > 0;
26148 } ## end if ( $type_sequence_to_go...)
26150 #------------------------------------------------------------
26151 # Part 2: if we find a new synchronization token, we are done
26153 #------------------------------------------------------------
26154 if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
26156 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
26158 # map similar items
26159 my $tok_map = $operator_map{$tok};
26160 $tok = $tok_map if ($tok_map);
26162 # make separators in different nesting depths unique
26163 # by appending the nesting depth digit.
26164 if ( $raw_tok ne '#' ) {
26165 $tok .= "$nesting_depth_to_go[$i]";
26168 # also decorate commas with any container name to avoid
26169 # unwanted cross-line alignments.
26170 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
26172 # If we are at an opening token which increased depth, we have
26173 # to use the name from the previous depth.
26174 my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth;
26176 ( $depth_last < $depth ? $depth_last : $depth );
26177 if ( $container_name{$depth_p} ) {
26178 $tok .= $container_name{$depth_p};
26182 # Patch to avoid aligning leading and trailing if, unless.
26183 # Mark trailing if, unless statements with container names.
26184 # This makes them different from leading if, unless which
26185 # are not so marked at present. If we ever need to name
26186 # them too, we could use ci to distinguish them.
26187 # Example problem to avoid:
26188 # return ( 2, "DBERROR" )
26189 # if ( $retval == 2 );
26190 # if ( scalar @_ ) {
26191 # my ( $a, $b, $c, $d, $e, $f ) = @_;
26193 if ( $raw_tok eq '(' ) {
26194 if ( $ci_levels_to_go[$ibeg]
26195 && $container_name{$depth} =~ /^\+(if|unless)/ )
26197 $tok .= $container_name{$depth};
26201 # Decorate block braces with block types to avoid
26202 # unwanted alignments such as the following:
26203 # foreach ( @{$routput_array} ) { $fh->print($_) }
26204 # eval { $fh->close() };
26205 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
26206 my $block_type = $block_type_to_go[$i];
26208 # map certain related block types to allow
26209 # else blocks to align
26210 $block_type = $block_type_map{$block_type}
26211 if ( defined( $block_type_map{$block_type} ) );
26213 # remove sub names to allow one-line sub braces to align
26214 # regardless of name
26215 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
26217 # allow all control-type blocks to align
26218 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
26220 $tok .= $block_type;
26223 # Mark multiple copies of certain tokens with the copy number
26224 # This will allow the aligner to decide if they are matched.
26225 # For now, only do this for equals. For example, the two
26226 # equals on the next line will be labeled '=0' and '=0.2'.
26227 # Later, the '=0.2' will be ignored in alignment because it
26230 # $| = $debug = 1 if $opt_d;
26231 # $full_index = 1 if $opt_i;
26233 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
26234 $token_count{$tok}++;
26235 if ( $token_count{$tok} > 1 ) {
26236 $tok .= '.' . $token_count{$tok};
26240 # concatenate the text of the consecutive tokens to form
26243 join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) );
26245 push @field_lengths,
26246 $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
26248 # store the alignment token for this field
26249 push( @tokens, $tok );
26251 # get ready for the next batch
26254 $patterns[$j] = EMPTY_STRING;
26255 } ## end if ( new synchronization token
26257 #-----------------------------------------------
26258 # Part 3: continue accumulating the next pattern
26259 #-----------------------------------------------
26261 # for keywords we have to use the actual text
26262 if ( $type eq 'k' ) {
26264 my $tok_fix = $tokens_to_go[$i];
26266 # but map certain keywords to a common string to allow
26268 $tok_fix = $keyword_map{$tok_fix}
26269 if ( defined( $keyword_map{$tok_fix} ) );
26270 $patterns[$j] .= $tok_fix;
26273 elsif ( $type eq 'b' ) {
26274 $patterns[$j] .= $type;
26277 # Mark most things before arrows as a quote to
26278 # get them to line up. Testfile: mixed.pl.
26280 # handle $type =~ /^[wnC]$/
26281 elsif ( $is_w_n_C{$type} ) {
26283 my $type_fix = $type;
26285 if ( $i < $iend - 1 ) {
26286 my $next_type = $types_to_go[ $i + 1 ];
26287 my $i_next_nonblank =
26288 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
26290 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
26293 # Patch to ignore leading minus before words,
26294 # by changing pattern 'mQ' into just 'Q',
26295 # so that we can align things like this:
26296 # Button => "Print letter \"~$_\"",
26297 # -command => [ sub { print "$_[0]\n" }, $_ ],
26298 if ( $patterns[$j] eq 'm' ) {
26299 $patterns[$j] = EMPTY_STRING;
26304 # Convert a bareword within braces into a quote for
26305 # matching. This will allow alignment of expressions like
26307 # local ( $SIG{'INT'} ) = IGNORE;
26308 # local ( $SIG{ALRM} ) = 'POSTMAN';
26312 && $types_to_go[ $i - 1 ] eq 'L'
26313 && $types_to_go[ $i + 1 ] eq 'R' )
26318 # patch to make numbers and quotes align
26319 if ( $type eq 'n' ) { $type_fix = 'Q' }
26321 $patterns[$j] .= $type_fix;
26322 } ## end elsif ( $is_w_n_C{$type} )
26324 # ignore any ! in patterns
26325 elsif ( $type eq '!' ) { }
26329 $patterns[$j] .= $type;
26331 # remove any zero-level name at first fat comma
26332 if ( $depth == 0 && $type eq '=>' ) {
26333 $container_name{$depth} = EMPTY_STRING;
26337 } ## end for my $i ( $ibeg .. $iend)
26339 #---------------------------------------------------------------
26340 # End of main loop .. join text of tokens to make the last field
26341 #---------------------------------------------------------------
26343 join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
26344 push @field_lengths,
26345 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
26347 return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
26348 } ## end sub make_alignment_patterns
26350 sub make_uncontained_comma_name {
26351 my ( $iterm, $ibeg, $iend ) = @_;
26353 # Make a container name by combining all leading barewords,
26354 # keywords and functions.
26355 my $name = EMPTY_STRING;
26360 for ( $ibeg .. $iterm ) {
26361 my $type = $types_to_go[$_];
26363 if ( $type eq 'b' ) {
26368 my $token = $tokens_to_go[$_];
26370 # Give up if we find an opening paren, binary operator or
26371 # comma within or after the proposed container name.
26373 || $is_binary_type{$type}
26374 || $type eq 'k' && $is_binary_keyword{$token} )
26376 $name = EMPTY_STRING;
26380 # The container name is only built of certain types:
26381 last if ( !$is_kwU{$type} );
26383 # Normally it is made of one word, but two words for 'use'
26384 if ( $count == 0 ) {
26386 && $is_use_like{ $tokens_to_go[$_] } )
26394 elsif ( defined($count_max) && $count >= $count_max ) {
26398 if ( defined( $name_map{$token} ) ) {
26399 $token = $name_map{$token};
26402 $name .= SPACE . $token;
26407 # Require a space after the container name token(s)
26409 && defined($ilast_blank)
26410 && $ilast_blank > $iname_end )
26412 $name = substr( $name, 1 );
26415 } ## end sub make_uncontained_comma_name
26419 my ( $i, $ibeg, $i_start ) = @_;
26421 # Generate a line length to be used as a tag for rejecting bad
26422 # alignments. The tag is the length of the line from the previous
26423 # matching token, or beginning of line, to the function name. This
26424 # will allow the vertical aligner to reject undesirable matches.
26426 # The basic method: sum length from previous alignment
26427 my $len = token_sequence_length( $i_start, $i - 1 );
26429 # Minor patch: do not include the length of any '!'.
26430 # Otherwise, commas in the following line will not
26432 # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
26433 # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
26434 if ( grep { $_ eq '!' } @types_to_go[ $i_start .. $i - 1 ] ) {
26438 if ( $i_start == $ibeg ) {
26440 # For first token, use distance from start of
26441 # line but subtract off the indentation due to
26442 # level. Otherwise, results could vary with
26445 leading_spaces_to_go($ibeg) -
26446 $levels_to_go[$i_start] * $rOpts_indent_columns;
26448 if ( $len < 0 ) { $len = 0 }
26450 } ## end sub length_tag
26452 } ## end closure make_alignment_patterns
26454 sub make_paren_name {
26455 my ( $self, $i ) = @_;
26457 # The token at index $i is a '('.
26458 # Create an alignment name for it to avoid incorrect alignments.
26460 # Start with the name of the previous nonblank token...
26461 my $name = EMPTY_STRING;
26463 return EMPTY_STRING if ( $im < 0 );
26464 if ( $types_to_go[$im] eq 'b' ) { $im--; }
26465 return EMPTY_STRING if ( $im < 0 );
26466 $name = $tokens_to_go[$im];
26468 # Prepend any sub name to an isolated -> to avoid unwanted alignments
26469 # [test case is test8/penco.pl]
26470 if ( $name eq '->' ) {
26472 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
26473 $name = $tokens_to_go[$im] . $name;
26477 # Finally, remove any leading arrows
26478 if ( substr( $name, 0, 2 ) eq '->' ) {
26479 $name = substr( $name, 2 );
26482 } ## end sub make_paren_name
26484 { ## begin closure get_final_indentation
26486 my ( $last_indentation_written, $last_unadjusted_indentation,
26487 $last_leading_token );
26489 sub initialize_get_final_indentation {
26490 $last_indentation_written = 0;
26491 $last_unadjusted_indentation = 0;
26492 $last_leading_token = EMPTY_STRING;
26496 sub get_final_indentation {
26507 $rindentation_list,
26509 $starting_in_quote,
26510 $is_static_block_comment,
26514 #--------------------------------------------------------------
26515 # This routine makes any necessary adjustments to get the final
26516 # indentation of a line in the Formatter.
26517 #--------------------------------------------------------------
26519 # It starts with the basic indentation which has been defined for the
26520 # leading token, and then takes into account any options that the user
26521 # has set regarding special indenting and outdenting.
26523 # This routine has to resolve a number of complex interacting issues,
26525 # 1. The various -cti=n type flags, which contain the desired change in
26526 # indentation for lines ending in commas and semicolons, should be
26528 # 2. qw quotes require special processing and do not fit perfectly
26529 # with normal containers,
26530 # 3. formatting with -wn can complicate things, especially with qw
26532 # 4. formatting with the -lp option is complicated, and does not
26533 # work well with qw quotes and with -wn formatting.
26534 # 5. a number of special situations, such as 'cuddled' formatting.
26535 # 6. This routine is mainly concerned with outdenting closing tokens
26536 # but note that there is some overlap with the functions of sub
26537 # undo_ci, which was processed earlier, so care has to be taken to
26538 # keep them coordinated.
26540 # Find the last code token of this line
26541 my $i_terminal = $iend;
26542 my $terminal_type = $types_to_go[$iend];
26543 if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
26545 $terminal_type = $types_to_go[$i_terminal];
26546 if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
26548 $terminal_type = $types_to_go[$i_terminal];
26552 my $is_outdented_line;
26554 my $type_beg = $types_to_go[$ibeg];
26555 my $token_beg = $tokens_to_go[$ibeg];
26556 my $level_beg = $levels_to_go[$ibeg];
26557 my $block_type_beg = $block_type_to_go[$ibeg];
26558 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
26559 my $seqno_beg = $type_sequence_to_go[$ibeg];
26560 my $is_closing_type_beg = $is_closing_type{$type_beg};
26562 # QW INDENTATION PATCH 3:
26563 my $seqno_qw_closing;
26564 if ( $type_beg eq 'q' && $ibeg == 0 ) {
26565 my $KK = $K_to_go[$ibeg];
26566 $seqno_qw_closing =
26567 $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
26570 my $is_semicolon_terminated = $terminal_type eq ';'
26571 && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
26572 || $seqno_qw_closing );
26574 # NOTE: A future improvement would be to make it semicolon terminated
26575 # even if it does not have a semicolon but is followed by a closing
26576 # block brace. This would undo ci even for something like the
26577 # following, in which the final paren does not have a semicolon because
26578 # it is a possible weld location:
26580 # if ($BOLD_MATH) {
26582 # $labels, $comment,
26583 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
26588 # MOJO patch: Set a flag if this lines begins with ')->'
26589 my $leading_paren_arrow = (
26590 $is_closing_type_beg
26591 && $token_beg eq ')'
26593 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
26594 || ( $ibeg < $i_terminal - 1
26595 && $types_to_go[ $ibeg + 1 ] eq 'b'
26596 && $types_to_go[ $ibeg + 2 ] eq '->' )
26600 #---------------------------------------------------------
26601 # Section 1: set a flag and a default indentation
26603 # Most lines are indented according to the initial token.
26604 # But it is common to outdent to the level just after the
26605 # terminal token in certain cases...
26606 # adjust_indentation flag:
26607 # 0 - do not adjust
26609 # 2 - vertically align with opening token
26611 #---------------------------------------------------------
26613 my $adjust_indentation = 0;
26614 my $default_adjust_indentation = 0;
26616 # Parameters needed for option 2, aligning with opening token:
26618 $opening_indentation, $opening_offset,
26619 $is_leading, $opening_exists
26622 #-------------------------------------
26624 # if line starts with a sequenced item
26625 #-------------------------------------
26626 if ( $seqno_beg || $seqno_qw_closing ) {
26628 # This can be tedious so we let a sub do it
26630 $adjust_indentation,
26631 $default_adjust_indentation,
26632 $opening_indentation,
26637 ) = $self->get_closing_token_indentation(
26643 $rindentation_list,
26646 $is_semicolon_terminated,
26652 #--------------------------------------------------------
26654 # if at ');', '};', '>;', and '];' of a terminal qw quote
26655 #--------------------------------------------------------
26657 substr( $rpatterns->[0], 0, 2 ) eq 'qb'
26658 && substr( $rfields->[0], -1, 1 ) eq ';'
26659 ## $rpatterns->[0] =~ /^qb*;$/
26660 && $rfields->[0] =~ /^([\)\}\]\>]);$/
26663 if ( $closing_token_indentation{$1} == 0 ) {
26664 $adjust_indentation = 1;
26667 $adjust_indentation = 3;
26671 #---------------------------------------------------------
26672 # Section 2: set indentation according to flag set above
26674 # Select the indentation object to define leading
26675 # whitespace. If we are outdenting something like '} } );'
26676 # then we want to use one level below the last token
26677 # ($i_terminal) in order to get it to fully outdent through
26679 #---------------------------------------------------------
26682 my $level_end = $levels_to_go[$iend];
26684 #------------------------------------
26685 # Section 2A: adjust_indentation == 0
26686 # No change in indentation
26687 #------------------------------------
26688 if ( $adjust_indentation == 0 ) {
26689 $indentation = $leading_spaces_beg;
26693 #-------------------------------------------------------------------
26694 # Secton 2B: adjust_indentation == 1
26695 # Change the indentation to be that of a different token on the line
26696 #-------------------------------------------------------------------
26697 elsif ( $adjust_indentation == 1 ) {
26699 # Previously, the indentation of the terminal token was used:
26701 # $indentation = $reduced_spaces_to_go[$i_terminal];
26702 # $lev = $levels_to_go[$i_terminal];
26704 # Generalization for MOJO patch:
26705 # Use the lowest level indentation of the tokens on the line.
26706 # For example, here we can use the indentation of the ending ';':
26707 # } until ($selection > 0 and $selection < 10); # ok to use ';'
26708 # But this will not outdent if we use the terminal indentation:
26709 # )->then( sub { # use indentation of the ->, not the {
26710 # Warning: reduced_spaces_to_go[] may be a reference, do not
26711 # do numerical checks with it
26714 $indentation = $reduced_spaces_to_go[$i_ind];
26715 $lev = $levels_to_go[$i_ind];
26716 while ( $i_ind < $i_terminal ) {
26718 if ( $levels_to_go[$i_ind] < $lev ) {
26719 $indentation = $reduced_spaces_to_go[$i_ind];
26720 $lev = $levels_to_go[$i_ind];
26725 #--------------------------------------------------------------
26726 # Secton 2C: adjust_indentation == 2
26727 # Handle indented closing token which aligns with opening token
26728 #--------------------------------------------------------------
26729 elsif ( $adjust_indentation == 2 ) {
26731 # handle option to align closing token with opening token
26734 # calculate spaces needed to align with opening token
26736 get_spaces($opening_indentation) + $opening_offset;
26738 # Indent less than the previous line.
26740 # Problem: For -lp we don't exactly know what it was if there
26741 # were recoverable spaces sent to the aligner. A good solution
26742 # would be to force a flush of the vertical alignment buffer, so
26743 # that we would know. For now, this rule is used for -lp:
26745 # When the last line did not start with a closing token we will
26746 # be optimistic that the aligner will recover everything wanted.
26748 # This rule will prevent us from breaking a hierarchy of closing
26749 # tokens, and in a worst case will leave a closing paren too far
26750 # indented, but this is better than frequently leaving it not
26752 my $last_spaces = get_spaces($last_indentation_written);
26754 if ( ref($last_indentation_written)
26755 && !$is_closing_token{$last_leading_token} )
26758 get_recoverable_spaces($last_indentation_written);
26761 # reset the indentation to the new space count if it works
26762 # only options are all or none: nothing in-between looks good
26765 my $diff = $last_spaces - $space_count;
26767 $indentation = $space_count;
26771 # We need to fix things ... but there is no good way to do it.
26772 # The best solution is for the user to use a longer maximum
26773 # line length. We could get a smooth variation if we just move
26774 # the paren in using
26775 # $space_count -= ( 1 - $diff );
26776 # But unfortunately this can give a rather unbalanced look.
26778 # For -xlp we currently allow a tolerance of one indentation
26779 # level and then revert to a simpler default. This will jump
26780 # suddenly but keeps a balanced look.
26781 if ( $rOpts_extended_line_up_parentheses
26782 && $diff >= -$rOpts_indent_columns
26783 && $space_count > $leading_spaces_beg )
26785 $indentation = $space_count;
26788 # Otherwise revert to defaults
26789 elsif ( $default_adjust_indentation == 0 ) {
26790 $indentation = $leading_spaces_beg;
26792 elsif ( $default_adjust_indentation == 1 ) {
26793 $indentation = $reduced_spaces_to_go[$i_terminal];
26794 $lev = $levels_to_go[$i_terminal];
26799 #-------------------------------------------------------------
26800 # Secton 2D: adjust_indentation == 3
26801 # Full indentation of closing tokens (-icb and -icp or -cti=2)
26802 #-------------------------------------------------------------
26805 # handle -icb (indented closing code block braces)
26806 # Updated method for indented block braces: indent one full level if
26807 # there is no continuation indentation. This will occur for major
26808 # structures such as sub, if, else, but not for things like map
26811 # Note: only code blocks without continuation indentation are
26812 # handled here (if, else, unless, ..). In the following snippet,
26813 # the terminal brace of the sort block will have continuation
26814 # indentation as shown so it will not be handled by the coding
26815 # here. We would have to undo the continuation indentation to do
26816 # this, but it probably looks ok as is. This is a possible future
26817 # update for semicolon terminated lines.
26819 # if ($sortby eq 'date' or $sortby eq 'size') {
26821 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
26826 if ( $block_type_beg
26827 && $ci_levels_to_go[$i_terminal] == 0 )
26829 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
26830 $indentation = $spaces + $rOpts_indent_columns;
26832 # NOTE: for -lp we could create a new indentation object, but
26833 # there is probably no need to do it
26836 # handle -icp and any -icb block braces which fall through above
26837 # test such as the 'sort' block mentioned above.
26840 # There are currently two ways to handle -icp...
26841 # One way is to use the indentation of the previous line:
26842 # $indentation = $last_indentation_written;
26844 # The other way is to use the indentation that the previous line
26845 # would have had if it hadn't been adjusted:
26846 $indentation = $last_unadjusted_indentation;
26848 # Current method: use the minimum of the two. This avoids
26849 # inconsistent indentation.
26850 if ( get_spaces($last_indentation_written) <
26851 get_spaces($indentation) )
26853 $indentation = $last_indentation_written;
26857 # use previous indentation but use own level
26858 # to cause list to be flushed properly
26862 #-------------------------------------------------------------
26863 # Remember indentation except for multi-line quotes, which get
26865 #-------------------------------------------------------------
26866 if ( !( $ibeg == 0 && $starting_in_quote ) ) {
26867 $last_indentation_written = $indentation;
26868 $last_unadjusted_indentation = $leading_spaces_beg;
26869 $last_leading_token = $token_beg;
26871 # Patch to make a line which is the end of a qw quote work with the
26872 # -lp option. Make $token_beg look like a closing token as some
26873 # type even if it is not. This variable will become
26874 # $last_leading_token at the end of this loop. Then, if the -lp
26875 # style is selected, and the next line is also a
26876 # closing token, it will not get more indentation than this line.
26877 # We need to do this because qw quotes (at present) only get
26878 # continuation indentation, not one level of indentation, so we
26879 # need to turn off the -lp indentation.
26881 # ... a picture is worth a thousand words:
26883 # perltidy -wn -gnu (Without this patch):
26885 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
26889 # perltidy -wn -gnu (With this patch):
26891 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
26894 if ( $seqno_qw_closing
26895 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
26897 $last_leading_token = ')';
26901 #---------------------------------------------------------------------
26902 # Rule: lines with leading closing tokens should not be outdented more
26903 # than the line which contained the corresponding opening token.
26904 #---------------------------------------------------------------------
26906 # Updated per bug report in alex_bug.pl: we must not
26907 # mess with the indentation of closing logical braces, so
26908 # we must treat something like '} else {' as if it were
26909 # an isolated brace
26910 my $is_isolated_block_brace = $block_type_beg
26911 && ( $i_terminal == $ibeg
26912 || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
26915 # only do this for a ':; which is aligned with its leading '?'
26916 my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
26919 defined($opening_indentation)
26920 && !$leading_paren_arrow # MOJO patch
26921 && !$is_isolated_block_brace
26922 && !$is_unaligned_colon
26925 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
26926 $indentation = $opening_indentation;
26930 #----------------------------------------------------
26931 # remember the indentation of each line of this batch
26932 #----------------------------------------------------
26933 push @{$rindentation_list}, $indentation;
26935 #---------------------------------------------
26936 # outdent lines with certain leading tokens...
26937 #---------------------------------------------
26940 # must be first word of this batch
26946 # certain leading keywords if requested
26947 $rOpts_outdent_keywords
26948 && $type_beg eq 'k'
26949 && $outdent_keyword{$token_beg}
26951 # or labels if requested
26952 || $rOpts_outdent_labels && $type_beg eq 'J'
26954 # or static block comments if requested
26955 || $is_static_block_comment
26956 && $rOpts_outdent_static_block_comments
26960 my $space_count = leading_spaces_to_go($ibeg);
26961 if ( $space_count > 0 ) {
26962 $space_count -= $rOpts_continuation_indentation;
26963 $is_outdented_line = 1;
26964 if ( $space_count < 0 ) { $space_count = 0 }
26966 # do not promote a spaced static block comment to non-spaced;
26967 # this is not normally necessary but could be for some
26968 # unusual user inputs (such as -ci = -i)
26969 if ( $type_beg eq '#' && $space_count == 0 ) {
26973 $indentation = $space_count;
26983 $is_outdented_line,
26986 } ## end sub get_final_indentation
26988 sub get_closing_token_indentation {
26990 # Determine indentation adjustment for a line with a leading closing
26991 # token - i.e. one of these: ) ] } :
27000 $rindentation_list,
27003 $is_semicolon_terminated,
27008 my $adjust_indentation = 0;
27009 my $default_adjust_indentation = $adjust_indentation;
27010 my $terminal_type = $types_to_go[$i_terminal];
27012 my $type_beg = $types_to_go[$ibeg];
27013 my $token_beg = $tokens_to_go[$ibeg];
27014 my $level_beg = $levels_to_go[$ibeg];
27015 my $block_type_beg = $block_type_to_go[$ibeg];
27016 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
27017 my $seqno_beg = $type_sequence_to_go[$ibeg];
27018 my $is_closing_type_beg = $is_closing_type{$type_beg};
27021 $opening_indentation, $opening_offset,
27022 $is_leading, $opening_exists
27025 # Honor any flag to reduce -ci set by the -bbxi=n option
27026 if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) {
27028 # if this is an opening, it must be alone on the line ...
27029 if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
27030 $adjust_indentation = 1;
27033 # ... or a single welded unit (fix for b1173)
27034 elsif ($total_weld_count) {
27035 my $K_beg = $K_to_go[$ibeg];
27036 my $Kterm = $K_to_go[$i_terminal];
27037 my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm};
27038 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
27039 $Kterm = $Kterm_test;
27041 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
27045 my $ris_bli_container = $self->[_ris_bli_container_];
27046 my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
27048 # Update the $is_bli flag as we go. It is initially 1.
27049 # We note seeing a leading opening brace by setting it to 2.
27050 # If we get to the closing brace without seeing the opening then we
27051 # turn it off. This occurs if the opening brace did not get output
27052 # at the start of a line, so we will then indent the closing brace
27053 # in the default way.
27054 if ( $is_bli_beg && $is_bli_beg == 1 ) {
27055 my $K_opening_container = $self->[_K_opening_container_];
27056 my $K_opening = $K_opening_container->{$seqno_beg};
27057 my $K_beg = $K_to_go[$ibeg];
27058 if ( $K_beg eq $K_opening ) {
27059 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
27061 else { $is_bli_beg = 0 }
27064 # QW PATCH for the combination -lp -wn
27065 # For -lp formatting use $ibeg_weld_fix to get around the problem
27066 # that with -lp type formatting the opening and closing tokens to not
27067 # have sequence numbers.
27068 my $ibeg_weld_fix = $ibeg;
27069 if ( $seqno_qw_closing && $total_weld_count ) {
27070 my $i_plus = $inext_to_go[$ibeg];
27071 if ( $i_plus <= $max_index_to_go ) {
27072 my $K_plus = $K_to_go[$i_plus];
27073 if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) {
27074 $ibeg_weld_fix = $i_plus;
27079 # if we are at a closing token of some type..
27080 if ( $is_closing_type_beg || $seqno_qw_closing ) {
27082 my $K_beg = $K_to_go[$ibeg];
27084 # get the indentation of the line containing the corresponding
27087 $opening_indentation, $opening_offset,
27088 $is_leading, $opening_exists
27090 = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
27091 $ri_last, $rindentation_list, $seqno_qw_closing );
27093 # Patch for rt144979, part 1. Coordinated with part 2.
27094 # Do not undo ci for a cuddled closing brace control; it
27095 # needs to be treated exactly the same ci as an isolated
27097 my $is_cuddled_closing_brace = $seqno_beg
27098 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
27100 # First set the default behavior:
27103 # default behavior is to outdent closing lines
27104 # of the form: "); }; ]; )->xxx;"
27105 $is_semicolon_terminated
27107 # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT
27108 # #123749]: the TYPES here were incorrectly ')' and '('. The
27109 # corrected TYPES are '}' and '{'. But skip a cuddled block.
27111 $terminal_type eq '{'
27112 && $type_beg eq '}'
27113 && ( $nesting_depth_to_go[$iend] + 1 ==
27114 $nesting_depth_to_go[$ibeg] )
27115 && !$is_cuddled_closing_brace
27118 # remove continuation indentation for any line like
27120 # or without ending '{' and unbalanced, such as
27121 # such as '}->{$operator}'
27125 && ( $types_to_go[$iend] eq '{'
27126 || $levels_to_go[$iend] < $level_beg )
27128 # but not if a cuddled block
27129 && !$is_cuddled_closing_brace
27132 # and when the next line is at a lower indentation level...
27134 # PATCH #1: and only if the style allows undoing continuation
27135 # for all closing token types. We should really wait until
27136 # the indentation of the next line is known and then make
27137 # a decision, but that would require another pass.
27139 # PATCH #2: and not if this token is under -xci control
27140 || ( $level_jump < 0
27141 && !$some_closing_token_indentation
27142 && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} )
27144 # Patch for -wn=2, multiple welded closing tokens
27145 || ( $i_terminal > $ibeg
27146 && $is_closing_type{ $types_to_go[$iend] } )
27148 # Alternate Patch for git #51, isolated closing qw token not
27149 # outdented if no-delete-old-newlines is set. This works, but
27150 # a more general patch elsewhere fixes the real problem: ljump.
27151 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
27155 $adjust_indentation = 1;
27158 # outdent something like '),'
27160 $terminal_type eq ','
27162 # Removed this constraint for -wn
27163 # OLD: allow just one character before the comma
27164 # && $i_terminal == $ibeg + 1
27166 # require LIST environment; otherwise, we may outdent too much -
27167 # this can happen in calls without parentheses (overload.t);
27168 && $self->is_in_list_by_i($i_terminal)
27171 $adjust_indentation = 1;
27174 # undo continuation indentation of a terminal closing token if
27175 # it is the last token before a level decrease. This will allow
27176 # a closing token to line up with its opening counterpart, and
27177 # avoids an indentation jump larger than 1 level.
27178 my $rLL = $self->[_rLL_];
27179 my $Klimit = $self->[_Klimit_];
27180 if ( $i_terminal == $ibeg
27181 && $is_closing_type_beg
27183 && $K_beg < $Klimit )
27185 my $K_plus = $K_beg + 1;
27186 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
27188 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
27189 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
27192 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
27193 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
27194 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
27195 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
27198 # Note: we have skipped past just one comment (perhaps a
27199 # side comment). There could be more, and we could easily
27200 # skip past all the rest with the following code, or with a
27201 # while loop. It would be rare to have to do this, and
27202 # those block comments would still be indented, so it would
27203 # to leave them indented. So it seems best to just stop at
27204 # a maximum of one comment.
27205 ##if ($type_plus eq '#') {
27206 ## $K_plus = $self->K_next_code($K_plus);
27210 if ( !$is_bli_beg && defined($K_plus) ) {
27211 my $lev = $level_beg;
27212 my $level_next = $rLL->[$K_plus]->[_LEVEL_];
27214 # and do not undo ci if it was set by the -xci option
27215 $adjust_indentation = 1
27216 if ( $level_next < $lev
27217 && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} );
27220 # Patch for RT #96101, in which closing brace of anonymous subs
27221 # was not outdented. We should look ahead and see if there is
27222 # a level decrease at the next token (i.e., a closing token),
27223 # but right now we do not have that information. For now
27224 # we see if we are in a list, and this works well.
27225 # See test files 'sub*.t' for good test cases.
27226 if ( !$rOpts_indent_closing_brace
27228 && $self->[_ris_asub_block_]->{$seqno_beg}
27229 && $self->is_in_list_by_i($i_terminal) )
27232 $opening_indentation, $opening_offset,
27233 $is_leading, $opening_exists
27235 = $self->get_opening_indentation( $ibeg, $ri_first,
27236 $ri_last, $rindentation_list );
27237 my $indentation = $leading_spaces_beg;
27238 if ( defined($opening_indentation)
27239 && get_spaces($indentation) >
27240 get_spaces($opening_indentation) )
27242 $adjust_indentation = 1;
27247 # YVES patch 1 of 2:
27248 # Undo ci of line with leading closing eval brace,
27249 # but not beyond the indentation of the line with
27250 # the opening brace.
27251 if ( $block_type_beg eq 'eval'
27252 && !ref($leading_spaces_beg)
27253 && !$rOpts_indent_closing_brace )
27256 $opening_indentation, $opening_offset,
27257 $is_leading, $opening_exists
27259 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
27260 $rindentation_list );
27261 my $indentation = $leading_spaces_beg;
27262 if ( defined($opening_indentation)
27263 && get_spaces($indentation) >
27264 get_spaces($opening_indentation) )
27266 $adjust_indentation = 1;
27270 # patch for issue git #40: -bli setting has priority
27271 $adjust_indentation = 0 if ($is_bli_beg);
27273 $default_adjust_indentation = $adjust_indentation;
27275 # Now modify default behavior according to user request:
27276 # handle option to indent non-blocks of the form ); }; ];
27277 # But don't do special indentation to something like ')->pack('
27278 if ( !$block_type_beg ) {
27280 # Note that logical padding has already been applied, so we may
27281 # need to remove some spaces to get a valid hash key.
27282 my $tok = $token_beg;
27283 my $cti = $closing_token_indentation{$tok};
27285 # Fix the value of 'cti' for an isolated non-welded closing qw
27287 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
27289 # A quote delimiter which is not a container will not have
27290 # a cti value defined. In this case use the style of a
27291 # paren. For example
27299 if ( !defined($cti) && length($tok) == 1 ) {
27301 # something other than ')', '}', ']' ; use flag for ')'
27302 $cti = $closing_token_indentation{')'};
27304 # But for now, do not outdent non-container qw
27305 # delimiters because it would would change existing
27307 if ( $tok ne '>' ) { $cti = 3 }
27310 # A non-welded closing qw cannot currently use -cti=1
27311 # because that option requires a sequence number to find
27312 # the opening indentation, and qw quote delimiters are not
27314 if ( defined($cti) && $cti == 1 ) { $cti = 0 }
27317 if ( !defined($cti) ) {
27319 # $cti may not be defined for several reasons.
27320 # -padding may have been applied so the character
27322 # - we may have welded to a closing quote token.
27323 # Here is an example (perltidy -wn):
27324 # __PACKAGE__->load_components( qw(
27328 $adjust_indentation = 0;
27331 elsif ( $cti == 1 ) {
27332 if ( $i_terminal <= $ibeg + 1
27333 || $is_semicolon_terminated )
27335 $adjust_indentation = 2;
27338 $adjust_indentation = 0;
27341 elsif ( $cti == 2 ) {
27342 if ($is_semicolon_terminated) {
27343 $adjust_indentation = 3;
27346 $adjust_indentation = 0;
27349 elsif ( $cti == 3 ) {
27350 $adjust_indentation = 3;
27354 # handle option to indent blocks
27357 $rOpts_indent_closing_brace
27359 $i_terminal == $ibeg # isolated terminal '}'
27360 || $is_semicolon_terminated
27364 $adjust_indentation = 3;
27367 } ## end if ( $is_closing_type_beg || $seqno_qw_closing )
27369 # if line begins with a ':', align it with any
27370 # previous line leading with corresponding ?
27371 elsif ( $type_beg eq ':' ) {
27373 $opening_indentation, $opening_offset,
27374 $is_leading, $opening_exists
27376 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
27377 $rindentation_list );
27378 if ($is_leading) { $adjust_indentation = 2; }
27383 $adjust_indentation,
27384 $default_adjust_indentation,
27385 $opening_indentation,
27392 } ## end closure get_final_indentation
27394 sub get_opening_indentation {
27396 # get the indentation of the line which output the opening token
27397 # corresponding to a given closing token in the current output batch.
27400 # $i_closing - index in this line of a closing token ')' '}' or ']'
27402 # $ri_first - reference to list of the first index $i for each output
27403 # line in this batch
27404 # $ri_last - reference to list of the last index $i for each output line
27406 # $rindentation_list - reference to a list containing the indentation
27407 # used for each line.
27408 # $qw_seqno - optional sequence number to use if normal seqno not defined
27409 # (NOTE: would be more general to just look this up from index i)
27412 # -the indentation of the line which contained the opening token
27413 # which matches the token at index $i_opening
27414 # -and its offset (number of columns) from the start of the line
27416 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
27419 # first, see if the opening token is in the current batch
27420 my $i_opening = $mate_index_to_go[$i_closing];
27421 my ( $indent, $offset, $is_leading, $exists );
27423 if ( defined($i_opening) && $i_opening >= 0 ) {
27425 # it is..look up the indentation
27426 ( $indent, $offset, $is_leading ) =
27427 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
27428 $rindentation_list );
27431 # if not, it should have been stored in the hash by a previous batch
27433 my $seqno = $type_sequence_to_go[$i_closing];
27434 $seqno = $qw_seqno unless ($seqno);
27435 ( $indent, $offset, $is_leading, $exists ) =
27436 get_saved_opening_indentation($seqno);
27438 return ( $indent, $offset, $is_leading, $exists );
27439 } ## end sub get_opening_indentation
27441 sub examine_vertical_tightness_flags {
27444 # For efficiency, we will set a flag to skip all calls to sub
27445 # 'set_vertical_tightness_flags' if vertical tightness is not possible with
27446 # the user input parameters. If vertical tightness is possible, we will
27447 # simply leave the flag undefined and return.
27449 # Vertical tightness is never possible with --freeze-whitespace
27450 if ($rOpts_freeze_whitespace) {
27451 $self->[_no_vertical_tightness_flags_] = 1;
27455 # This sub is coordinated with sub set_vertical_tightness_flags.
27456 # The Section numbers in the following comments are the sections
27457 # in sub set_vertical_tightness_flags:
27459 # Examine controls for Section 1a:
27460 return if ($rOpts_line_up_parentheses);
27462 foreach my $key ( keys %opening_vertical_tightness ) {
27463 return if ( $opening_vertical_tightness{$key} );
27466 # Examine controls for Section 1b:
27467 foreach my $key ( keys %closing_vertical_tightness ) {
27468 return if ( $closing_vertical_tightness{$key} );
27471 # Examine controls for Section 1c:
27472 foreach my $key ( keys %opening_token_right ) {
27473 return if ( $opening_token_right{$key} );
27476 # Examine controls for Section 1d:
27477 foreach my $key ( keys %stack_opening_token ) {
27478 return if ( $stack_opening_token{$key} );
27480 foreach my $key ( keys %stack_closing_token ) {
27481 return if ( $stack_closing_token{$key} );
27484 # Examine controls for Section 2:
27485 return if ($rOpts_block_brace_vertical_tightness);
27487 # Examine controls for Section 3:
27488 return if ($rOpts_stack_closing_block_brace);
27490 # None of the controls used for vertical tightness are set, so
27491 # we can skip all calls to sub set_vertical_tightness_flags
27492 $self->[_no_vertical_tightness_flags_] = 1;
27496 sub set_vertical_tightness_flags {
27498 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
27499 $ending_in_quote, $closing_side_comment )
27502 # Define vertical tightness controls for the nth line of a batch.
27503 # Note: do not call this sub for a block comment or if
27504 # $rOpts_freeze_whitespace is set.
27506 # These parameters are passed to the vertical aligner to indicated
27507 # if we should combine this line with the next line to achieve the
27508 # desired vertical tightness. This was previously an array but
27509 # has been converted to a hash:
27514 # 0 _vt_type: 1=opening non-block 2=closing non-block
27515 # 3=opening block brace 4=closing block brace
27517 # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
27518 # 1b _vt_closing_flag: spaces of padding to use if closing
27519 # 2 _vt_seqno: sequence number of container
27520 # 3 _vt_valid flag: do not append if this flag is false. Will be
27521 # true if appropriate -vt flag is set. Otherwise, Will be
27522 # made true only for 2 line container in parens with -lp
27523 # 4 _vt_seqno_beg: sequence number of first token of line
27524 # 5 _vt_seqno_end: sequence number of last token of line
27525 # 6 _vt_min_lines: min number of lines for joining opening cache,
27527 # 7 _vt_max_lines: max number of lines for joining opening cache,
27530 # The vertical tightness mechanism can add whitespace, so whitespace can
27531 # continually increase if we allowed it when the -fws flag is set.
27532 # See case b499 for an example.
27534 # Define these values...
27536 my $vt_opening_flag = 0;
27537 my $vt_closing_flag = 0;
27539 my $vt_valid_flag = 0;
27540 my $vt_seqno_beg = 0;
27541 my $vt_seqno_end = 0;
27542 my $vt_min_lines = 0;
27543 my $vt_max_lines = 0;
27545 # Uses these global parameters:
27546 # $rOpts_block_brace_tightness
27547 # $rOpts_block_brace_vertical_tightness
27548 # $rOpts_stack_closing_block_brace
27549 # $rOpts_line_up_parentheses
27550 # %opening_vertical_tightness
27551 # %closing_vertical_tightness
27552 # %opening_token_right
27553 # %stack_closing_token
27554 # %stack_opening_token
27556 #--------------------------------------------------------------
27557 # Vertical Tightness Flags Section 1:
27558 # Handle Lines 1 .. n-1 but not the last line
27559 # For non-BLOCK tokens, we will need to examine the next line
27560 # too, so we won't consider the last line.
27561 #--------------------------------------------------------------
27562 if ( $n < $n_last_line ) {
27564 #--------------------------------------------------------------
27565 # Vertical Tightness Flags Section 1a:
27566 # Look for Type 1, last token of this line is a non-block opening token
27567 #--------------------------------------------------------------
27568 my $ibeg_next = $ri_first->[ $n + 1 ];
27569 my $token_end = $tokens_to_go[$iend];
27570 my $iend_next = $ri_last->[ $n + 1 ];
27573 $type_sequence_to_go[$iend]
27574 && !$block_type_to_go[$iend]
27575 && $is_opening_token{$token_end}
27577 $opening_vertical_tightness{$token_end} > 0
27579 # allow 2-line method call to be closed up
27580 || ( $rOpts_line_up_parentheses
27581 && $token_end eq '('
27582 && $self->[_rlp_object_by_seqno_]
27583 ->{ $type_sequence_to_go[$iend] }
27585 && $types_to_go[ $iend - 1 ] ne 'b' )
27589 # avoid multiple jumps in nesting depth in one line if
27591 my $ovt = $opening_vertical_tightness{$token_end};
27593 # Turn off the -vt flag if the next line ends in a weld.
27594 # This avoids an instability with one-line welds (fixes b1183).
27595 my $type_end_next = $types_to_go[$iend_next];
27597 if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
27598 && $is_closing_type{$type_end_next} );
27600 # The flag '_rwant_container_open_' avoids conflict of -bom and -pt=1
27601 # or -pt=2; fixes b1270. See similar patch above for $cvt.
27602 my $seqno = $type_sequence_to_go[$iend];
27604 && $self->[_rwant_container_open_]->{$seqno} )
27609 # The flag '_rmax_vertical_tightness_' avoids welding conflicts.
27610 if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) {
27612 min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} );
27617 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
27618 $nesting_depth_to_go[$ibeg_next] )
27622 # If -vt flag has not been set, mark this as invalid
27623 # and aligner will validate it if it sees the closing paren
27625 my $valid_flag = $ovt;
27628 $vt_opening_flag = $ovt;
27629 $vt_seqno = $type_sequence_to_go[$iend];
27630 $vt_valid_flag = $valid_flag;
27634 #--------------------------------------------------------------
27635 # Vertical Tightness Flags Section 1b:
27636 # Look for Type 2, first token of next line is a non-block closing
27637 # token .. and be sure this line does not have a side comment
27638 #--------------------------------------------------------------
27639 my $token_next = $tokens_to_go[$ibeg_next];
27640 if ( $type_sequence_to_go[$ibeg_next]
27641 && !$block_type_to_go[$ibeg_next]
27642 && $is_closing_token{$token_next}
27643 && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
27645 my $cvt = $closing_vertical_tightness{$token_next};
27647 # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
27648 # See similar patch above for $ovt.
27649 my $seqno = $type_sequence_to_go[$ibeg_next];
27650 if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
27654 # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
27655 # otherwise. Added for rt136417.
27657 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
27660 # The unusual combination -pvtc=2 -dws -naws can be unstable.
27661 # This fixes b1282, b1283. This can be moved to set_options.
27663 && $rOpts_delete_old_whitespace
27664 && !$rOpts_add_whitespace )
27669 # Fix for b1379, b1380, b1381, b1382, b1384 part 2,
27670 # instablility with adding and deleting trailing commas:
27671 # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380.
27672 # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382.
27673 # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384
27675 && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} )
27682 # Never append a trailing line like ')->pack(' because it
27683 # will throw off later alignment. So this line must start at a
27684 # deeper level than the next line (fix1 for welding, git #45).
27686 $nesting_depth_to_go[$ibeg_next] >=
27687 $nesting_depth_to_go[ $iend_next + 1 ] + 1
27692 !$self->is_in_list_by_i($ibeg_next)
27696 # allow closing up 2-line method calls
27697 || ( $rOpts_line_up_parentheses
27698 && $token_next eq ')'
27699 && $self->[_rlp_object_by_seqno_]
27700 ->{ $type_sequence_to_go[$ibeg_next] } )
27707 # decide which trailing closing tokens to append..
27709 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
27711 my $str = join( EMPTY_STRING,
27712 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
27714 # append closing token if followed by comment or ';'
27715 # or another closing token (fix2 for welding, git #45)
27716 if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
27720 my $valid_flag = $cvt;
27724 # Fix for b1187 and b1188: Blinking can occur if we allow
27725 # welded tokens to re-form into one-line blocks during
27726 # vertical alignment when -lp used. So for this case we
27727 # set the minimum number of lines to be 1 instead of 0.
27728 # The maximum should be 1 if -vtc is not used. If -vtc is
27729 # used, we turn the valid
27730 # flag off and set the maximum to 0. This is equivalent to
27731 # using a large number.
27732 my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
27733 if ( $rOpts_line_up_parentheses
27734 && $total_weld_count
27735 && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
27736 && $self->is_welded_at_seqno($seqno_ibeg_next) )
27739 $max_lines = $cvt ? 0 : 1;
27744 $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
27745 $vt_seqno = $type_sequence_to_go[$ibeg_next];
27746 $vt_valid_flag = $valid_flag;
27747 $vt_min_lines = $min_lines;
27748 $vt_max_lines = $max_lines;
27753 #--------------------------------------------------------------
27754 # Vertical Tightness Flags Section 1c:
27755 # Implement the Opening Token Right flag (Type 2)..
27756 # If requested, move an isolated trailing opening token to the end of
27757 # the previous line which ended in a comma. We could do this
27758 # in sub recombine_breakpoints but that would cause problems
27759 # with -lp formatting. The problem is that indentation will
27760 # quickly move far to the right in nested expressions. By
27761 # doing it after indentation has been set, we avoid changes
27762 # to the indentation. Actual movement of the token takes place
27763 # in sub valign_output_step_B.
27765 # Note added 4 May 2021: the man page suggests that the -otr flags
27766 # are mainly for opening tokens following commas. But this seems
27767 # to have been generalized long ago to include other situations.
27768 # I checked the coding back to 2012 and it is essentially the same
27769 # as here, so it is best to leave this unchanged for now.
27770 #--------------------------------------------------------------
27772 $opening_token_right{ $tokens_to_go[$ibeg_next] }
27774 # previous line is not opening
27775 # (use -sot to combine with it)
27776 && !$is_opening_token{$token_end}
27778 # previous line ended in one of these
27779 # (add other cases if necessary; '=>' and '.' are not necessary
27780 && !$block_type_to_go[$ibeg_next]
27782 # this is a line with just an opening token
27783 && ( $iend_next == $ibeg_next
27784 || $iend_next == $ibeg_next + 2
27785 && $types_to_go[$iend_next] eq '#' )
27787 # Fix for case b1060 when both -baoo and -otr are set:
27788 # to avoid blinking, honor the -baoo flag over the -otr flag.
27789 && $token_end ne '||' && $token_end ne '&&'
27791 # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
27792 # Generalized from '=' to $is_assignment to fix b1375.
27794 $is_assignment{ $types_to_go[$iend] }
27795 && $rOpts_line_up_parentheses
27796 && $self->[_rlp_object_by_seqno_]
27797 ->{ $type_sequence_to_go[$ibeg_next] }
27800 # looks bad if we align vertically with the wrong container
27801 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
27804 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
27807 $vt_closing_flag = $spaces;
27808 $vt_seqno = $type_sequence_to_go[$ibeg_next];
27809 $vt_valid_flag = 1;
27812 #--------------------------------------------------------------
27813 # Vertical Tightness Flags Section 1d:
27814 # Stacking of opening and closing tokens (Type 2)
27815 #--------------------------------------------------------------
27817 my $token_beg_next = $tokens_to_go[$ibeg_next];
27819 # patch to make something like 'qw(' behave like an opening paren
27821 if ( $types_to_go[$ibeg_next] eq 'q' ) {
27822 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
27823 $token_beg_next = $1;
27827 if ( $is_closing_token{$token_end}
27828 && $is_closing_token{$token_beg_next} )
27831 # avoid instability of combo -bom and -sct; b1179
27832 my $seq_next = $type_sequence_to_go[$ibeg_next];
27833 $stackable = $stack_closing_token{$token_beg_next}
27834 unless ( $block_type_to_go[$ibeg_next]
27835 || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
27837 elsif ($is_opening_token{$token_end}
27838 && $is_opening_token{$token_beg_next} )
27840 $stackable = $stack_opening_token{$token_beg_next}
27841 unless ( $block_type_to_go[$ibeg_next] )
27842 ; # shouldn't happen; just checking
27847 my $is_semicolon_terminated;
27848 if ( $n + 1 == $n_last_line ) {
27849 my ( $terminal_type, $i_terminal ) =
27850 terminal_type_i( $ibeg_next, $iend_next );
27851 $is_semicolon_terminated = $terminal_type eq ';'
27852 && $nesting_depth_to_go[$iend_next] <
27853 $nesting_depth_to_go[$ibeg_next];
27856 # this must be a line with just an opening token
27857 # or end in a semicolon
27859 $is_semicolon_terminated
27860 || ( $iend_next == $ibeg_next
27861 || $iend_next == $ibeg_next + 2
27862 && $types_to_go[$iend_next] eq '#' )
27865 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
27868 $vt_closing_flag = $spaces;
27869 $vt_seqno = $type_sequence_to_go[$ibeg_next];
27870 $vt_valid_flag = 1;
27876 #--------------------------------------------------------------
27877 # Vertical Tightness Flags Section 2:
27878 # Handle type 3, opening block braces on last line of the batch
27879 # Check for a last line with isolated opening BLOCK curly
27880 #--------------------------------------------------------------
27881 elsif ($rOpts_block_brace_vertical_tightness
27883 && $types_to_go[$iend] eq '{'
27884 && $block_type_to_go[$iend] =~
27885 /$block_brace_vertical_tightness_pattern/ )
27888 $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
27890 $vt_valid_flag = 1;
27893 #--------------------------------------------------------------
27894 # Vertical Tightness Flags Section 3:
27895 # Handle type 4, a closing block brace on the last line of the batch Check
27896 # for a last line with isolated closing BLOCK curly
27897 # Patch: added a check for any new closing side comment which the
27898 # -csc option may generate. If it exists, there will be a side comment
27899 # so we cannot combine with a brace on the next line. This issue
27900 # occurs for the combination -scbb and -csc is used.
27901 #--------------------------------------------------------------
27902 elsif ($rOpts_stack_closing_block_brace
27904 && $block_type_to_go[$iend]
27905 && $types_to_go[$iend] eq '}'
27906 && ( !$closing_side_comment || $n < $n_last_line ) )
27908 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
27911 $vt_closing_flag = $spaces;
27912 $vt_seqno = $type_sequence_to_go[$iend];
27913 $vt_valid_flag = 1;
27917 # get the sequence numbers of the ends of this line
27918 $vt_seqno_beg = $type_sequence_to_go[$ibeg];
27919 if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
27920 $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
27923 $vt_seqno_end = $type_sequence_to_go[$iend];
27924 if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
27925 $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
27928 my $rvertical_tightness_flags = {
27929 _vt_type => $vt_type,
27930 _vt_opening_flag => $vt_opening_flag,
27931 _vt_closing_flag => $vt_closing_flag,
27932 _vt_seqno => $vt_seqno,
27933 _vt_valid_flag => $vt_valid_flag,
27934 _vt_seqno_beg => $vt_seqno_beg,
27935 _vt_seqno_end => $vt_seqno_end,
27936 _vt_min_lines => $vt_min_lines,
27937 _vt_max_lines => $vt_max_lines,
27940 return ($rvertical_tightness_flags);
27941 } ## end sub set_vertical_tightness_flags
27943 ##########################################################
27944 # CODE SECTION 14: Code for creating closing side comments
27945 ##########################################################
27947 { ## begin closure accumulate_csc_text
27949 # These routines are called once per batch when the --closing-side-comments flag
27952 my %block_leading_text;
27953 my %block_opening_line_number;
27954 my $csc_new_statement_ok;
27955 my $csc_last_label;
27956 my %csc_block_label;
27957 my $accumulating_text_for_block;
27958 my $leading_block_text;
27959 my $rleading_block_if_elsif_text;
27960 my $leading_block_text_level;
27961 my $leading_block_text_length_exceeded;
27962 my $leading_block_text_line_length;
27963 my $leading_block_text_line_number;
27965 sub initialize_csc_vars {
27966 %block_leading_text = ();
27967 %block_opening_line_number = ();
27968 $csc_new_statement_ok = 1;
27969 $csc_last_label = EMPTY_STRING;
27970 %csc_block_label = ();
27971 $rleading_block_if_elsif_text = [];
27972 $accumulating_text_for_block = EMPTY_STRING;
27973 reset_block_text_accumulator();
27975 } ## end sub initialize_csc_vars
27977 sub reset_block_text_accumulator {
27979 # save text after 'if' and 'elsif' to append after 'else'
27980 if ($accumulating_text_for_block) {
27982 ## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
27983 if ( $is_if_elsif{$accumulating_text_for_block} ) {
27984 push @{$rleading_block_if_elsif_text}, $leading_block_text;
27987 $accumulating_text_for_block = EMPTY_STRING;
27988 $leading_block_text = EMPTY_STRING;
27989 $leading_block_text_level = 0;
27990 $leading_block_text_length_exceeded = 0;
27991 $leading_block_text_line_number = 0;
27992 $leading_block_text_line_length = 0;
27994 } ## end sub reset_block_text_accumulator
27996 sub set_block_text_accumulator {
27997 my ( $self, $i ) = @_;
27998 $accumulating_text_for_block = $tokens_to_go[$i];
27999 if ( $accumulating_text_for_block !~ /^els/ ) {
28000 $rleading_block_if_elsif_text = [];
28002 $leading_block_text = EMPTY_STRING;
28003 $leading_block_text_level = $levels_to_go[$i];
28004 $leading_block_text_line_number = $self->get_output_line_number();
28005 $leading_block_text_length_exceeded = 0;
28007 # this will contain the column number of the last character
28008 # of the closing side comment
28009 $leading_block_text_line_length =
28010 length($csc_last_label) +
28011 length($accumulating_text_for_block) +
28012 length( $rOpts->{'closing-side-comment-prefix'} ) +
28013 $leading_block_text_level * $rOpts_indent_columns + 3;
28015 } ## end sub set_block_text_accumulator
28017 sub accumulate_block_text {
28018 my ( $self, $i ) = @_;
28020 # accumulate leading text for -csc, ignoring any side comments
28021 if ( $accumulating_text_for_block
28022 && !$leading_block_text_length_exceeded
28023 && $types_to_go[$i] ne '#' )
28026 my $added_length = $token_lengths_to_go[$i];
28027 $added_length += 1 if $i == 0;
28028 my $new_line_length =
28029 $leading_block_text_line_length + $added_length;
28031 # we can add this text if we don't exceed some limits..
28034 # we must not have already exceeded the text length limit
28035 length($leading_block_text) <
28036 $rOpts_closing_side_comment_maximum_text
28039 # the new total line length must be below the line length limit
28040 # or the new length must be below the text length limit
28041 # (ie, we may allow one token to exceed the text length limit)
28044 $maximum_line_length_at_level[$leading_block_text_level]
28046 || length($leading_block_text) + $added_length <
28047 $rOpts_closing_side_comment_maximum_text
28050 # UNLESS: we are adding a closing paren before the brace we seek.
28051 # This is an attempt to avoid situations where the ... to be
28052 # added are longer than the omitted right paren, as in:
28054 # foreach my $item (@a_rather_long_variable_name_here) {
28056 # } ## end foreach my $item (@a_rather_long_variable_name_here...
28059 $tokens_to_go[$i] eq ')'
28062 $i + 1 <= $max_index_to_go
28063 && $block_type_to_go[ $i + 1 ] eq
28064 $accumulating_text_for_block
28066 || ( $i + 2 <= $max_index_to_go
28067 && $block_type_to_go[ $i + 2 ] eq
28068 $accumulating_text_for_block )
28074 # add an extra space at each newline
28075 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
28076 $leading_block_text .= SPACE;
28079 # add the token text
28080 $leading_block_text .= $tokens_to_go[$i];
28081 $leading_block_text_line_length = $new_line_length;
28084 # show that text was truncated if necessary
28085 elsif ( $types_to_go[$i] ne 'b' ) {
28086 $leading_block_text_length_exceeded = 1;
28087 $leading_block_text .= '...';
28091 } ## end sub accumulate_block_text
28093 sub accumulate_csc_text {
28097 # called once per output buffer when -csc is used. Accumulates
28098 # the text placed after certain closing block braces.
28099 # Defines and returns the following for this buffer:
28101 my $block_leading_text =
28102 EMPTY_STRING; # the leading text of the last '}'
28103 my $rblock_leading_if_elsif_text;
28104 my $i_block_leading_text =
28105 -1; # index of token owning block_leading_text
28106 my $block_line_count = 100; # how many lines the block spans
28107 my $terminal_type = 'b'; # type of last nonblank token
28108 my $i_terminal = 0; # index of last nonblank token
28109 my $terminal_block_type = EMPTY_STRING;
28111 # update most recent statement label
28112 $csc_last_label = EMPTY_STRING unless ($csc_last_label);
28113 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
28114 my $block_label = $csc_last_label;
28116 # Loop over all tokens of this batch
28117 for my $i ( 0 .. $max_index_to_go ) {
28118 my $type = $types_to_go[$i];
28119 my $block_type = $block_type_to_go[$i];
28120 my $token = $tokens_to_go[$i];
28122 # remember last nonblank token type
28123 if ( $type ne '#' && $type ne 'b' ) {
28124 $terminal_type = $type;
28125 $terminal_block_type = $block_type;
28129 my $type_sequence = $type_sequence_to_go[$i];
28130 if ( $block_type && $type_sequence ) {
28132 if ( $token eq '}' ) {
28134 # restore any leading text saved when we entered this block
28135 if ( defined( $block_leading_text{$type_sequence} ) ) {
28136 ( $block_leading_text, $rblock_leading_if_elsif_text )
28137 = @{ $block_leading_text{$type_sequence} };
28138 $i_block_leading_text = $i;
28139 delete $block_leading_text{$type_sequence};
28140 $rleading_block_if_elsif_text =
28141 $rblock_leading_if_elsif_text;
28144 if ( defined( $csc_block_label{$type_sequence} ) ) {
28145 $block_label = $csc_block_label{$type_sequence};
28146 delete $csc_block_label{$type_sequence};
28149 # if we run into a '}' then we probably started accumulating
28150 # at something like a trailing 'if' clause..no harm done.
28151 if ( $accumulating_text_for_block
28152 && $levels_to_go[$i] <= $leading_block_text_level )
28154 my $lev = $levels_to_go[$i];
28155 reset_block_text_accumulator();
28158 if ( defined( $block_opening_line_number{$type_sequence} ) )
28160 my $output_line_number =
28161 $self->get_output_line_number();
28162 $block_line_count =
28163 $output_line_number -
28164 $block_opening_line_number{$type_sequence} + 1;
28165 delete $block_opening_line_number{$type_sequence};
28169 # Error: block opening line undefined for this line..
28170 # This shouldn't be possible, but it is not a
28171 # significant problem.
28175 elsif ( $token eq '{' ) {
28177 my $line_number = $self->get_output_line_number();
28178 $block_opening_line_number{$type_sequence} = $line_number;
28180 # set a label for this block, except for
28181 # a bare block which already has the label
28182 # A label can only be used on the next {
28183 if ( $block_type =~ /:$/ ) {
28184 $csc_last_label = EMPTY_STRING;
28186 $csc_block_label{$type_sequence} = $csc_last_label;
28187 $csc_last_label = EMPTY_STRING;
28189 if ( $accumulating_text_for_block
28190 && $levels_to_go[$i] == $leading_block_text_level )
28193 if ( $accumulating_text_for_block eq $block_type ) {
28195 # save any leading text before we enter this block
28196 $block_leading_text{$type_sequence} = [
28197 $leading_block_text,
28198 $rleading_block_if_elsif_text
28200 $block_opening_line_number{$type_sequence} =
28201 $leading_block_text_line_number;
28202 reset_block_text_accumulator();
28206 # shouldn't happen, but not a serious error.
28207 # We were accumulating -csc text for block type
28208 # $accumulating_text_for_block and unexpectedly
28209 # encountered a '{' for block type $block_type.
28216 && $csc_new_statement_ok
28217 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
28218 && $token =~ /$closing_side_comment_list_pattern/ )
28220 $self->set_block_text_accumulator($i);
28224 # note: ignoring type 'q' because of tricks being played
28225 # with 'q' for hanging side comments
28226 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
28227 $csc_new_statement_ok =
28228 ( $block_type || $type eq 'J' || $type eq ';' );
28231 && $accumulating_text_for_block
28232 && $levels_to_go[$i] == $leading_block_text_level )
28234 reset_block_text_accumulator();
28237 $self->accumulate_block_text($i);
28242 # Treat an 'else' block specially by adding preceding 'if' and
28243 # 'elsif' text. Otherwise, the 'end else' is not helpful,
28244 # especially for cuddled-else formatting.
28245 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
28246 $block_leading_text =
28247 $self->make_else_csc_text( $i_terminal, $terminal_block_type,
28248 $block_leading_text, $rblock_leading_if_elsif_text );
28251 # if this line ends in a label then remember it for the next pass
28252 $csc_last_label = EMPTY_STRING;
28253 if ( $terminal_type eq 'J' ) {
28254 $csc_last_label = $tokens_to_go[$i_terminal];
28257 return ( $terminal_type, $i_terminal, $i_block_leading_text,
28258 $block_leading_text, $block_line_count, $block_label );
28259 } ## end sub accumulate_csc_text
28261 sub make_else_csc_text {
28263 # create additional -csc text for an 'else' and optionally 'elsif',
28264 # depending on the value of switch
28266 # = 0 add 'if' text to trailing else
28267 # = 1 same as 0 plus:
28268 # add 'if' to 'elsif's if can fit in line length
28269 # add last 'elsif' to trailing else if can fit in one line
28270 # = 2 same as 1 but do not check if exceed line length
28272 # $rif_elsif_text = a reference to a list of all previous closing
28273 # side comments created for this if block
28275 my ( $self, $i_terminal, $block_type, $block_leading_text,
28278 my $csc_text = $block_leading_text;
28280 if ( $block_type eq 'elsif'
28281 && $rOpts_closing_side_comment_else_flag == 0 )
28286 my $count = @{$rif_elsif_text};
28287 return $csc_text unless ($count);
28289 my $if_text = '[ if' . $rif_elsif_text->[0];
28291 # always show the leading 'if' text on 'else'
28292 if ( $block_type eq 'else' ) {
28293 $csc_text .= $if_text;
28296 # see if that's all
28297 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
28301 my $last_elsif_text = EMPTY_STRING;
28302 if ( $count > 1 ) {
28303 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
28304 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
28307 # tentatively append one more item
28308 my $saved_text = $csc_text;
28309 if ( $block_type eq 'else' ) {
28310 $csc_text .= $last_elsif_text;
28313 $csc_text .= SPACE . $if_text;
28316 # all done if no length checks requested
28317 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
28321 # undo it if line length exceeded
28323 length($csc_text) +
28324 length($block_type) +
28325 length( $rOpts->{'closing-side-comment-prefix'} ) +
28326 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
28328 $length > $maximum_line_length_at_level[$leading_block_text_level] )
28330 $csc_text = $saved_text;
28333 } ## end sub make_else_csc_text
28334 } ## end closure accumulate_csc_text
28336 { ## begin closure balance_csc_text
28338 # Some additional routines for handling the --closing-side-comments option
28353 sub balance_csc_text {
28355 # Append characters to balance a closing side comment so that editors
28356 # such as vim can correctly jump through code.
28358 # input = ## end foreach my $foo ( sort { $b ...
28359 # output = ## end foreach my $foo ( sort { $b ...})
28361 # NOTE: This routine does not currently filter out structures within
28362 # quoted text because the bounce algorithms in text editors do not
28363 # necessarily do this either (a version of vim was checked and
28364 # did not do this).
28366 # Some complex examples which will cause trouble for some editors:
28367 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
28368 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
28369 # if ( $1 eq '{' ) {
28370 # test file test1/braces.pl has many such examples.
28374 # loop to examine characters one-by-one, RIGHT to LEFT and
28375 # build a balancing ending, LEFT to RIGHT.
28376 foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) {
28378 my $char = substr( $csc, $pos, 1 );
28380 # ignore everything except structural characters
28381 next unless ( $matching_char{$char} );
28383 # pop most recently appended character
28384 my $top = chop($csc);
28386 # push it back plus the mate to the newest character
28387 # unless they balance each other.
28388 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
28391 # return the balanced string
28393 } ## end sub balance_csc_text
28394 } ## end closure balance_csc_text
28396 sub add_closing_side_comment {
28398 my ( $self, $ri_first, $ri_last ) = @_;
28399 my $rLL = $self->[_rLL_];
28401 # add closing side comments after closing block braces if -csc used
28402 my ( $closing_side_comment, $cscw_block_comment );
28404 #---------------------------------------------------------------
28405 # Step 1: loop through all tokens of this line to accumulate
28406 # the text needed to create the closing side comments. Also see
28407 # how the line ends.
28408 #---------------------------------------------------------------
28410 my ( $terminal_type, $i_terminal, $i_block_leading_text,
28411 $block_leading_text, $block_line_count, $block_label )
28412 = $self->accumulate_csc_text();
28414 #---------------------------------------------------------------
28415 # Step 2: make the closing side comment if this ends a block
28416 #---------------------------------------------------------------
28417 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
28419 # if this line might end in a block closure..
28421 $terminal_type eq '}'
28423 # Fix 1 for c091, this is only for blocks
28424 && $block_type_to_go[$i_terminal]
28429 # the block is long enough
28430 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
28432 # or there is an existing comment to check
28433 || ( $have_side_comment
28434 && $rOpts->{'closing-side-comment-warnings'} )
28437 # .. and if this is one of the types of interest
28438 && $block_type_to_go[$i_terminal] =~
28439 /$closing_side_comment_list_pattern/
28441 # .. but not an anonymous sub
28442 # These are not normally of interest, and their closing braces are
28443 # often followed by commas or semicolons anyway. This also avoids
28444 # possible erratic output due to line numbering inconsistencies
28445 # in the cases where their closing braces terminate a line.
28446 && $block_type_to_go[$i_terminal] ne 'sub'
28448 # ..and the corresponding opening brace must is not in this batch
28449 # (because we do not need to tag one-line blocks, although this
28450 # should also be caught with a positive -csci value)
28451 && $mate_index_to_go[$i_terminal] < 0
28456 # this is the last token (line doesn't have a side comment)
28457 !$have_side_comment
28459 # or the old side comment is a closing side comment
28460 || $tokens_to_go[$max_index_to_go] =~
28461 /$closing_side_comment_prefix_pattern/
28466 # then make the closing side comment text
28467 if ($block_label) { $block_label .= SPACE }
28469 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
28471 # append any extra descriptive text collected above
28472 if ( $i_block_leading_text == $i_terminal ) {
28473 $token .= $block_leading_text;
28476 $token = balance_csc_text($token)
28477 if $rOpts->{'closing-side-comments-balanced'};
28479 $token =~ s/\s*$//; # trim any trailing whitespace
28481 # handle case of existing closing side comment
28482 if ($have_side_comment) {
28484 # warn if requested and tokens differ significantly
28485 if ( $rOpts->{'closing-side-comment-warnings'} ) {
28486 my $old_csc = $tokens_to_go[$max_index_to_go];
28487 my $new_csc = $token;
28488 $new_csc =~ s/\s+//g; # trim all whitespace
28489 $old_csc =~ s/\s+//g; # trim all whitespace
28490 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
28491 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
28492 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
28493 my $new_trailing_dots = $1;
28494 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
28496 # Patch to handle multiple closing side comments at
28497 # else and elsif's. These have become too complicated
28498 # to check, so if we see an indication of
28499 # '[ if' or '[ # elsif', then assume they were made
28501 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
28502 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
28504 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
28505 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
28508 # if old comment is contained in new comment,
28509 # only compare the common part.
28510 if ( length($new_csc) > length($old_csc) ) {
28511 $new_csc = substr( $new_csc, 0, length($old_csc) );
28514 # if the new comment is shorter and has been limited,
28515 # only compare the common part.
28516 if ( length($new_csc) < length($old_csc)
28517 && $new_trailing_dots )
28519 $old_csc = substr( $old_csc, 0, length($new_csc) );
28522 # any remaining difference?
28523 if ( $new_csc ne $old_csc ) {
28525 # just leave the old comment if we are below the threshold
28526 # for creating side comments
28527 if ( $block_line_count <
28528 $rOpts->{'closing-side-comment-interval'} )
28533 # otherwise we'll make a note of it
28537 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
28540 # save the old side comment in a new trailing block
28542 my $timestamp = EMPTY_STRING;
28543 if ( $rOpts->{'timestamp'} ) {
28544 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
28547 $timestamp = "$year-$month-$day";
28549 $cscw_block_comment =
28550 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
28551 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
28555 # No differences.. we can safely delete old comment if we
28556 # are below the threshold
28557 elsif ( $block_line_count <
28558 $rOpts->{'closing-side-comment-interval'} )
28560 # Since the line breaks have already been set, we have
28561 # to remove the token from the _to_go array and also
28562 # from the line range (this fixes issue c081).
28563 # Note that we can only get here if -cscw has been set
28564 # because otherwise the old comment is already deleted.
28566 my $ibeg = $ri_first->[-1];
28567 my $iend = $ri_last->[-1];
28569 && $iend == $max_index_to_go
28570 && $types_to_go[$max_index_to_go] eq '#' )
28573 $max_index_to_go--;
28575 && $types_to_go[$max_index_to_go] eq 'b' )
28578 $max_index_to_go--;
28580 $ri_last->[-1] = $iend;
28585 # switch to the new csc (unless we deleted it!)
28588 my $len_tok = length($token); # NOTE: length no longer important
28590 $len_tok - $token_lengths_to_go[$max_index_to_go];
28592 $tokens_to_go[$max_index_to_go] = $token;
28593 $token_lengths_to_go[$max_index_to_go] = $len_tok;
28594 my $K = $K_to_go[$max_index_to_go];
28595 $rLL->[$K]->[_TOKEN_] = $token;
28596 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
28597 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
28601 # handle case of NO existing closing side comment
28604 # To avoid inserting a new token in the token arrays, we
28605 # will just return the new side comment so that it can be
28606 # inserted just before it is needed in the call to the
28607 # vertical aligner.
28608 $closing_side_comment = $token;
28611 return ( $closing_side_comment, $cscw_block_comment );
28612 } ## end sub add_closing_side_comment
28614 ############################
28615 # CODE SECTION 15: Summarize
28616 ############################
28620 # This is the last routine called when a file is formatted.
28621 # Flush buffer and write any informative messages
28622 my ( $self, $severe_error ) = @_;
28625 my $file_writer_object = $self->[_file_writer_object_];
28626 $file_writer_object->decrement_output_line_number()
28627 ; # fix up line number since it was incremented
28628 we_are_at_the_last_line();
28630 my $max_depth = $self->[_maximum_BLOCK_level_];
28631 my $at_line = $self->[_maximum_BLOCK_level_at_line_];
28632 write_logfile_entry(
28633 "Maximum leading structural depth is $max_depth in input at line $at_line\n"
28636 my $added_semicolon_count = $self->[_added_semicolon_count_];
28637 my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
28638 my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
28640 if ( $added_semicolon_count > 0 ) {
28641 my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
28643 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
28644 write_logfile_entry("$added_semicolon_count $what added:\n");
28645 write_logfile_entry(
28646 " $first at input line $first_added_semicolon_at\n");
28648 if ( $added_semicolon_count > 1 ) {
28649 write_logfile_entry(
28650 " Last at input line $last_added_semicolon_at\n");
28652 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
28653 write_logfile_entry("\n");
28656 my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
28657 my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
28658 my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
28659 if ( $deleted_semicolon_count > 0 ) {
28660 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
28662 ( $deleted_semicolon_count > 1 )
28663 ? "semicolons were"
28665 write_logfile_entry(
28666 "$deleted_semicolon_count unnecessary $what deleted:\n");
28667 write_logfile_entry(
28668 " $first at input line $first_deleted_semicolon_at\n");
28670 if ( $deleted_semicolon_count > 1 ) {
28671 write_logfile_entry(
28672 " Last at input line $last_deleted_semicolon_at\n");
28674 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
28675 write_logfile_entry("\n");
28678 my $embedded_tab_count = $self->[_embedded_tab_count_];
28679 my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
28680 my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
28681 if ( $embedded_tab_count > 0 ) {
28682 my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING;
28684 ( $embedded_tab_count > 1 )
28685 ? "quotes or patterns"
28686 : "quote or pattern";
28687 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
28688 write_logfile_entry(
28689 "This means the display of this script could vary with device or software\n"
28691 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
28693 if ( $embedded_tab_count > 1 ) {
28694 write_logfile_entry(
28695 " Last at input line $last_embedded_tab_at\n");
28697 write_logfile_entry("\n");
28700 my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
28701 my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
28702 my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
28703 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
28705 if ($first_tabbing_disagreement) {
28706 write_logfile_entry(
28707 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
28711 my $first_btd = $self->[_first_brace_tabbing_disagreement_];
28714 "First closing brace indentation disagreement started at input line $first_btd\n";
28715 write_logfile_entry($msg);
28717 # leave a hint in the .ERR file if there was a brace error
28718 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
28721 my $in_btd = $self->[_in_brace_tabbing_disagreement_];
28724 "Ending with brace indentation disagreement which started at input line $in_btd\n";
28725 write_logfile_entry($msg);
28727 # leave a hint in the .ERR file if there was a brace error
28728 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
28731 if ($in_tabbing_disagreement) {
28733 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
28734 write_logfile_entry($msg);
28738 if ($last_tabbing_disagreement) {
28740 write_logfile_entry(
28741 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
28745 write_logfile_entry("No indentation disagreement seen\n");
28749 if ($first_tabbing_disagreement) {
28750 write_logfile_entry(
28751 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
28754 write_logfile_entry("\n");
28756 my $vao = $self->[_vertical_aligner_object_];
28757 $vao->report_anything_unusual();
28759 $file_writer_object->report_line_length_errors();
28761 # Define the formatter self-check for convergence.
28762 $self->[_converged_] =
28764 || $file_writer_object->get_convergence_check()
28765 || $rOpts->{'indent-only'};
28768 } ## end sub wrapup
28770 } ## end package Perl::Tidy::Formatter