1 ####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 #####################################################################
9 # CODE SECTION 1: Preliminary code, global definitions and sub new
11 # CODE SECTION 2: Some Basic Utilities
12 # CODE SECTION 3: Check and process options
14 # CODE SECTION 4: Receive lines from the tokenizer
16 # CODE SECTION 5: Pre-process the entire file
17 # sub finish_formatting
18 # CODE SECTION 6: Process line-by-line
19 # sub process_all_lines
20 # CODE SECTION 7: Process lines of code
21 # process_line_of_CODE
22 # CODE SECTION 8: Utilities for setting breakpoints
23 # sub set_forced_breakpoint
24 # CODE SECTION 9: Process batches of code
25 # sub grind_batch_of_CODE
26 # CODE SECTION 10: Code to break long statements
27 # sub break_long_lines
28 # CODE SECTION 11: Code to break long lists
30 # CODE SECTION 12: Code for setting indentation
31 # CODE SECTION 13: Preparing batch of lines for vertical alignment
32 # sub convey_batch_to_vertical_aligner
33 # CODE SECTION 14: Code for creating closing side comments
34 # sub add_closing_side_comment
35 # CODE SECTION 15: Summarize
38 #######################################################################
39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
40 #######################################################################
42 package Perl::Tidy::Formatter;
46 # DEVEL_MODE gets switched on during automated testing for extra checking
47 use constant DEVEL_MODE => 0;
48 use constant EMPTY_STRING => q{};
49 use constant SPACE => q{ };
51 { #<<< A non-indenting brace to contain all lexical variables
54 use English qw( -no_match_vars );
55 use List::Util qw( min max ); # min, max are in Perl 5.8
56 our $VERSION = '20230309';
58 # The Tokenizer will be loaded with the Formatter
59 ##use Perl::Tidy::Tokenizer; # for is_keyword()
63 # Catch any undefined sub calls so that we are sure to get
64 # some diagnostic information. This sub should never be called
65 # except for a programming error.
67 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
68 my ( $pkg, $fname, $lno ) = caller();
69 my $my_package = __PACKAGE__;
71 ======================================================================
72 Error detected in package '$my_package', version $VERSION
73 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
74 Called from package: '$pkg'
75 Called from File '$fname' at line '$lno'
76 This error is probably due to a recent programming change
77 ======================================================================
84 $self->_decrement_count();
90 Perl::Tidy::Die($msg);
91 croak "unexpected return from Perl::Tidy::Die";
96 Perl::Tidy::Warn($msg);
103 # This routine is called for errors that really should not occur
104 # except if there has been a bug introduced by a recent program change.
105 # Please add comments at calls to Fault to explain why the call
106 # should not occur, and where to look to fix it.
107 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
108 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
109 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
110 my $pkg = __PACKAGE__;
112 my $input_stream_name = get_input_stream_name();
115 ==============================================================================
116 While operating on input stream with name: '$input_stream_name'
117 A fault was detected at line $line0 of sub '$subroutine1'
119 which was called from line $line1 of sub '$subroutine2'
121 This is probably an error introduced by a recent programming change.
122 $pkg reports VERSION='$VERSION'.
123 ==============================================================================
126 # We shouldn't get here, but this return is to keep Perl-Critic from
134 # This is the same as Fault except that it calls Warn instead of Die
136 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
137 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
138 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
139 my $input_stream_name = get_input_stream_name();
142 ==============================================================================
143 While operating on input stream with name: '$input_stream_name'
144 A fault was detected at line $line0 of sub '$subroutine1'
146 which was called from line $line1 of sub '$subroutine2'
148 This is probably an error introduced by a recent programming change.
149 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
150 ==============================================================================
154 } ## end sub Fault_Warn
158 Perl::Tidy::Exit($msg);
159 croak "unexpected return from Perl::Tidy::Exit";
162 # Global variables ...
165 #-----------------------------------------------------------------
166 # Section 1: Global variables which are either always constant or
167 # are constant after being configured by user-supplied
168 # parameters. They remain constant as a file is being processed.
169 #-----------------------------------------------------------------
171 # INITIALIZER: sub check_options
174 # short-cut option variables
175 # INITIALIZER: sub initialize_global_option_vars
177 $rOpts_add_whitespace,
178 $rOpts_add_trailing_commas,
179 $rOpts_blank_lines_after_opening_block,
180 $rOpts_block_brace_tightness,
181 $rOpts_block_brace_vertical_tightness,
182 $rOpts_brace_follower_vertical_tightness,
183 $rOpts_break_after_labels,
184 $rOpts_break_at_old_attribute_breakpoints,
185 $rOpts_break_at_old_comma_breakpoints,
186 $rOpts_break_at_old_keyword_breakpoints,
187 $rOpts_break_at_old_logical_breakpoints,
188 $rOpts_break_at_old_semicolon_breakpoints,
189 $rOpts_break_at_old_ternary_breakpoints,
190 $rOpts_break_open_compact_parens,
191 $rOpts_closing_side_comments,
192 $rOpts_closing_side_comment_else_flag,
193 $rOpts_closing_side_comment_maximum_text,
194 $rOpts_comma_arrow_breakpoints,
195 $rOpts_continuation_indentation,
196 $rOpts_cuddled_paren_brace,
197 $rOpts_delete_closing_side_comments,
198 $rOpts_delete_old_whitespace,
199 $rOpts_delete_side_comments,
200 $rOpts_delete_trailing_commas,
201 $rOpts_delete_weld_interfering_commas,
202 $rOpts_extended_continuation_indentation,
203 $rOpts_format_skipping,
204 $rOpts_freeze_whitespace,
205 $rOpts_function_paren_vertical_alignment,
206 $rOpts_fuzzy_line_length,
207 $rOpts_ignore_old_breakpoints,
208 $rOpts_ignore_side_comment_lengths,
209 $rOpts_indent_closing_brace,
210 $rOpts_indent_columns,
212 $rOpts_keep_interior_semicolons,
213 $rOpts_line_up_parentheses,
214 $rOpts_logical_padding,
215 $rOpts_maximum_consecutive_blank_lines,
216 $rOpts_maximum_fields_per_table,
217 $rOpts_maximum_line_length,
218 $rOpts_one_line_block_semicolons,
219 $rOpts_opening_brace_always_on_right,
220 $rOpts_outdent_keywords,
221 $rOpts_outdent_labels,
222 $rOpts_outdent_long_comments,
223 $rOpts_outdent_long_quotes,
224 $rOpts_outdent_static_block_comments,
226 $rOpts_short_concatenation_item_length,
227 $rOpts_space_prototype_paren,
228 $rOpts_stack_closing_block_brace,
229 $rOpts_static_block_comments,
230 $rOpts_tee_block_comments,
232 $rOpts_tee_side_comments,
233 $rOpts_variable_maximum_line_length,
235 $rOpts_valign_side_comments,
236 $rOpts_whitespace_cycle,
237 $rOpts_extended_line_up_parentheses,
240 # INITIALIZER: BEGIN block
243 %is_if_unless_and_or_last_next_redo_return,
244 %is_if_elsif_else_unless_while_until_for_foreach,
245 %is_if_unless_while_until_for_foreach,
246 %is_last_next_redo_return,
250 %is_if_unless_elsif_else,
254 %is_block_without_semicolon,
255 %ok_to_add_semicolon_for_block_type,
261 %is_equal_or_fat_comma,
263 %is_opening_sequence_token,
264 %is_closing_sequence_token,
266 %is_container_label_type,
267 %is_die_confess_croak_warn,
269 %is_soft_keep_break_type,
270 %is_indirect_object_taker,
273 %is_anon_sub_brace_follower,
274 %is_anon_sub_1_brace_follower,
275 %is_other_brace_follower,
277 # INITIALIZER: sub check_options
278 $controlled_comma_style,
279 %keep_break_before_type,
280 %keep_break_after_type,
282 %keyword_paren_inner_tightness,
283 %container_indentation_options,
285 %line_up_parentheses_control_hash,
286 $line_up_parentheses_control_is_lxpl,
288 # These can be modified by grep-alias-list
289 # INITIALIZER: sub initialize_grep_and_friends
291 %is_sort_map_grep_eval,
292 %is_sort_map_grep_eval_do,
294 %is_keyword_returning_list,
295 %block_type_map, # initialized in BEGIN, but may be changed
296 %want_one_line_block, # may be changed in prepare_cuddled_block_types
298 # INITIALIZER: sub prepare_cuddled_block_types
299 $rcuddled_block_types,
301 # INITIALIZER: sub initialize_whitespace_hashes
306 # INITIALIZER: sub initialize_bond_strength_hashes
307 %right_bond_strength,
310 # INITIALIZER: sub initialize_token_break_preferences
312 %break_before_container_types,
314 # INITIALIZER: sub initialize_space_after_keyword
315 %space_after_keyword,
317 # INITIALIZED BY initialize_global_option_vars
318 %opening_vertical_tightness,
319 %closing_vertical_tightness,
320 %closing_token_indentation,
321 $some_closing_token_indentation,
322 %opening_token_right,
323 %stack_opening_token,
324 %stack_closing_token,
326 # INITIALIZER: sub initialize_weld_nested_exclusion_rules
327 %weld_nested_exclusion_rules,
329 # INITIALIZER: sub initialize_weld_fat_comma_rules
330 %weld_fat_comma_rules,
332 # INITIALIZER: sub initialize_trailing_comma_rules
333 %trailing_comma_rules,
335 # regex patterns for text identification.
336 # Most can be configured by user parameters.
337 # Most are initialized in a sub make_**_pattern during configuration.
339 # INITIALIZER: sub make_sub_matching_pattern
343 # INITIALIZER: make_static_block_comment_pattern
344 $static_block_comment_pattern,
346 # INITIALIZER: sub make_static_side_comment_pattern
347 $static_side_comment_pattern,
349 # INITIALIZER: make_format_skipping_pattern
350 $format_skipping_pattern_begin,
351 $format_skipping_pattern_end,
353 # INITIALIZER: sub make_non_indenting_brace_pattern
354 $non_indenting_brace_pattern,
356 # INITIALIZER: sub make_bl_pattern
357 $bl_exclusion_pattern,
359 # INITIALIZER: make_bl_pattern
362 # INITIALIZER: sub make_bli_pattern
363 $bli_exclusion_pattern,
365 # INITIALIZER: sub make_bli_pattern
368 # INITIALIZER: sub make_block_brace_vertical_tightness_pattern
369 $block_brace_vertical_tightness_pattern,
371 # INITIALIZER: sub make_blank_line_pattern
372 $blank_lines_after_opening_block_pattern,
373 $blank_lines_before_closing_block_pattern,
375 # INITIALIZER: sub make_keyword_group_list_pattern
376 $keyword_group_list_pattern,
377 $keyword_group_list_comment_pattern,
379 # INITIALIZER: sub make_closing_side_comment_prefix
380 $closing_side_comment_prefix_pattern,
382 # INITIALIZER: sub make_closing_side_comment_list_pattern
383 $closing_side_comment_list_pattern,
385 # Table to efficiently find indentation and max line length
387 # INITIALIZER: sub initialize_line_length_vars
388 @maximum_line_length_at_level,
389 @maximum_text_length_at_level,
394 # Total number of sequence items in a weld, for quick checks
395 # INITIALIZER: weld_containers
398 #--------------------------------------------------------
399 # Section 2: Work arrays for the current batch of tokens.
400 #--------------------------------------------------------
402 # These are re-initialized for each batch of code
403 # INITIALIZER: sub initialize_batch_variables
406 @type_sequence_to_go,
407 @forced_breakpoint_to_go,
408 @token_lengths_to_go,
409 @summed_lengths_to_go,
411 @leading_spaces_to_go,
412 @reduced_spaces_to_go,
415 @nesting_depth_to_go,
417 @old_breakpoint_to_go,
424 # forced breakpoint variables associated with each batch of code
425 $forced_breakpoint_count,
426 $forced_breakpoint_undo_count,
427 $index_max_forced_break,
432 # Index names for token variables.
433 # Do not combine with other BEGIN blocks (c101).
437 _CUMULATIVE_LENGTH_ => $i++,
438 _LINE_INDEX_ => $i++,
439 _KNEXT_SEQ_ITEM_ => $i++,
442 _TOKEN_LENGTH_ => $i++,
444 _TYPE_SEQUENCE_ => $i++,
446 # Number of token variables; must be last in list:
453 # Index names for $self variables.
454 # Do not combine with other BEGIN blocks (c101).
460 _rdepth_of_opening_seqno_ => $i++,
462 _Iss_opening_ => $i++,
463 _Iss_closing_ => $i++,
464 _rblock_type_of_seqno_ => $i++,
465 _ris_asub_block_ => $i++,
466 _ris_sub_block_ => $i++,
467 _K_opening_container_ => $i++,
468 _K_closing_container_ => $i++,
469 _K_opening_ternary_ => $i++,
470 _K_closing_ternary_ => $i++,
471 _K_first_seq_item_ => $i++,
472 _rtype_count_by_seqno_ => $i++,
473 _ris_function_call_paren_ => $i++,
474 _rlec_count_by_seqno_ => $i++,
475 _ris_broken_container_ => $i++,
476 _ris_permanently_broken_ => $i++,
477 _rblank_and_comment_count_ => $i++,
479 _rhas_broken_list_ => $i++,
480 _rhas_broken_list_with_lec_ => $i++,
481 _rfirst_comma_line_index_ => $i++,
482 _rhas_code_block_ => $i++,
483 _rhas_broken_code_block_ => $i++,
484 _rhas_ternary_ => $i++,
485 _ris_excluded_lp_container_ => $i++,
486 _rlp_object_by_seqno_ => $i++,
487 _rwant_reduced_ci_ => $i++,
488 _rno_xci_by_seqno_ => $i++,
489 _rbrace_left_ => $i++,
490 _ris_bli_container_ => $i++,
491 _rparent_of_seqno_ => $i++,
492 _rchildren_of_seqno_ => $i++,
493 _ris_list_by_seqno_ => $i++,
494 _ris_cuddled_closing_brace_ => $i++,
495 _rbreak_container_ => $i++,
496 _rshort_nested_ => $i++,
497 _length_function_ => $i++,
498 _is_encoded_data_ => $i++,
500 _sink_object_ => $i++,
501 _file_writer_object_ => $i++,
502 _vertical_aligner_object_ => $i++,
503 _logger_object_ => $i++,
504 _radjusted_levels_ => $i++,
505 _this_batch_ => $i++,
507 _ris_special_identifier_token_ => $i++,
508 _last_output_short_opening_token_ => $i++,
510 _last_line_leading_type_ => $i++,
511 _last_line_leading_level_ => $i++,
513 _added_semicolon_count_ => $i++,
514 _first_added_semicolon_at_ => $i++,
515 _last_added_semicolon_at_ => $i++,
517 _deleted_semicolon_count_ => $i++,
518 _first_deleted_semicolon_at_ => $i++,
519 _last_deleted_semicolon_at_ => $i++,
521 _embedded_tab_count_ => $i++,
522 _first_embedded_tab_at_ => $i++,
523 _last_embedded_tab_at_ => $i++,
525 _first_tabbing_disagreement_ => $i++,
526 _last_tabbing_disagreement_ => $i++,
527 _tabbing_disagreement_count_ => $i++,
528 _in_tabbing_disagreement_ => $i++,
529 _first_brace_tabbing_disagreement_ => $i++,
530 _in_brace_tabbing_disagreement_ => $i++,
532 _saw_VERSION_in_this_file_ => $i++,
533 _saw_END_or_DATA_ => $i++,
535 _rK_weld_left_ => $i++,
536 _rK_weld_right_ => $i++,
537 _rweld_len_right_at_K_ => $i++,
539 _rspecial_side_comment_type_ => $i++,
541 _rseqno_controlling_my_ci_ => $i++,
542 _ris_seqno_controlling_ci_ => $i++,
543 _save_logfile_ => $i++,
544 _maximum_level_ => $i++,
545 _maximum_level_at_line_ => $i++,
546 _maximum_BLOCK_level_ => $i++,
547 _maximum_BLOCK_level_at_line_ => $i++,
549 _rKrange_code_without_comments_ => $i++,
550 _rbreak_before_Kfirst_ => $i++,
551 _rbreak_after_Klast_ => $i++,
554 _rstarting_multiline_qw_seqno_by_K_ => $i++,
555 _rending_multiline_qw_seqno_by_K_ => $i++,
556 _rKrange_multiline_qw_by_seqno_ => $i++,
557 _rmultiline_qw_has_extra_level_ => $i++,
559 _rcollapsed_length_by_seqno_ => $i++,
560 _rbreak_before_container_by_seqno_ => $i++,
561 _roverride_cab3_ => $i++,
562 _ris_assigned_structure_ => $i++,
563 _ris_short_broken_eval_block_ => $i++,
564 _ris_bare_trailing_comma_by_seqno_ => $i++,
566 _rseqno_non_indenting_brace_by_ix_ => $i++,
567 _rmax_vertical_tightness_ => $i++,
569 _no_vertical_tightness_flags_ => $i++,
571 _LAST_SELF_INDEX_ => $i - 1,
577 # Index names for batch variables.
578 # Do not combine with other BEGIN blocks (c101).
579 # These are stored in _this_batch_, which is a sub-array of $self.
582 _starting_in_quote_ => $i++,
583 _ending_in_quote_ => $i++,
584 _is_static_block_comment_ => $i++,
587 _do_not_pad_ => $i++,
588 _peak_batch_size_ => $i++,
589 _batch_count_ => $i++,
590 _rix_seqno_controlling_ci_ => $i++,
591 _batch_CODE_type_ => $i++,
592 _ri_starting_one_line_block_ => $i++,
593 _runmatched_opening_indexes_ => $i++,
599 # Sequence number assigned to the root of sequence tree.
600 # The minimum of the actual sequences numbers is 4, so we can use 1
601 use constant SEQ_ROOT => 1;
603 # Codes for insertion and deletion of blanks
604 use constant DELETE => 0;
605 use constant STABLE => 1;
606 use constant INSERT => 2;
609 use constant WS_YES => 1;
610 use constant WS_OPTIONAL => 0;
611 use constant WS_NO => -1;
613 # Token bond strengths.
614 use constant NO_BREAK => 10_000;
615 use constant VERY_STRONG => 100;
616 use constant STRONG => 2.1;
617 use constant NOMINAL => 1.1;
618 use constant WEAK => 0.8;
619 use constant VERY_WEAK => 0.55;
621 # values for testing indexes in output array
622 use constant UNDEFINED_INDEX => -1;
624 # Maximum number of little messages; probably need not be changed.
625 use constant MAX_NAG_MESSAGES => 6;
627 # This is the decimal range of printable characters in ASCII. It is used to
628 # make quick preliminary checks before resorting to using a regex.
629 use constant ORD_PRINTABLE_MIN => 33;
630 use constant ORD_PRINTABLE_MAX => 126;
632 # Initialize constant hashes ...
636 = **= += *= &= <<= &&=
641 @is_assignment{@q} = (1) x scalar(@q);
643 # a hash needed by break_lists for efficiency:
644 push @q, qw{ ; < > ~ f };
645 @is_non_list_type{@q} = (1) x scalar(@q);
647 @q = qw(is if unless and or err last next redo return);
648 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
650 # These block types may have text between the keyword and opening
651 # curly. Note: 'else' does not, but must be included to allow trailing
652 # if/elsif text to be appended.
653 # patch for SWITCH/CASE: added 'case' and 'when'
654 @q = qw(if elsif else unless while until for foreach case when catch);
655 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
658 @q = qw(if unless while until for foreach);
659 @is_if_unless_while_until_for_foreach{@q} =
662 @q = qw(last next redo return);
663 @is_last_next_redo_return{@q} = (1) x scalar(@q);
665 # Map related block names into a common name to allow vertical alignment
666 # used by sub make_alignment_patterns. Note: this is normally unchanged,
667 # but it contains 'grep' and can be re-initialized in
668 # sub initialize_grep_and_friends in a testing mode.
681 @is_if_unless{@q} = (1) x scalar(@q);
684 @is_if_elsif{@q} = (1) x scalar(@q);
686 @q = qw(if unless elsif);
687 @is_if_unless_elsif{@q} = (1) x scalar(@q);
689 @q = qw(if unless elsif else);
690 @is_if_unless_elsif_else{@q} = (1) x scalar(@q);
693 @is_elsif_else{@q} = (1) x scalar(@q);
696 @is_and_or{@q} = (1) x scalar(@q);
698 # Identify certain operators which often occur in chains.
699 # Note: the minus (-) causes a side effect of padding of the first line in
700 # something like this (by sub set_logical_padding):
701 # Checkbutton => 'Transmission checked',
702 # -variable => \$TRANS
703 # This usually improves appearance so it seems ok.
704 @q = qw(&& || and or : ? . + - * /);
705 @is_chain_operator{@q} = (1) x scalar(@q);
707 # Operators that the user can request break before or after.
708 # Note that some are keywords
709 @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
710 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
711 . : ? && || and or err xor
714 # We can remove semicolons after blocks preceded by these keywords
716 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
717 unless while until for foreach given when default);
718 @is_block_without_semicolon{@q} = (1) x scalar(@q);
720 # We will allow semicolons to be added within these block types
721 # as well as sub and package blocks.
723 # 1. Note that these keywords are omitted:
724 # switch case given when default sort map grep
725 # 2. It is also ok to add for sub and package blocks and a labeled block
726 # 3. But not okay for other perltidy types including:
728 # 4. Test files: blktype.t, blktype1.t, semicolon.t
730 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
731 unless do while until eval for foreach );
732 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
734 # 'L' is token for opening { at hash key
736 @is_opening_type{@q} = (1) x scalar(@q);
738 # 'R' is token for closing } at hash key
740 @is_closing_type{@q} = (1) x scalar(@q);
743 @is_opening_token{@q} = (1) x scalar(@q);
746 @is_closing_token{@q} = (1) x scalar(@q);
749 @is_ternary{@q} = (1) x scalar(@q);
752 @is_opening_sequence_token{@q} = (1) x scalar(@q);
755 @is_closing_sequence_token{@q} = (1) x scalar(@q);
769 # a hash needed by sub break_lists for labeling containers
770 @q = qw( k => && || ? : . );
771 @is_container_label_type{@q} = (1) x scalar(@q);
773 @q = qw( die confess croak warn );
774 @is_die_confess_croak_warn{@q} = (1) x scalar(@q);
776 @q = qw( my our local );
777 @is_my_our_local{@q} = (1) x scalar(@q);
779 # Braces -bbht etc must follow these. Note: experimentation with
780 # including a simple comma shows that it adds little and can lead
781 # to poor formatting in complex lists.
783 @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
787 @is_counted_type{@q} = (1) x scalar(@q);
789 # Tokens where --keep-old-break-xxx flags make soft breaks instead
790 # of hard breaks. See b1433 and b1436.
791 # NOTE: $type is used as the hash key for now; if other container tokens
792 # are added it might be necessary to use a token/type mixture.
793 @q = qw# -> ? : && || + - / * #;
794 @is_soft_keep_break_type{@q} = (1) x scalar(@q);
796 # these functions allow an identifier in the indirect object slot
797 @q = qw( print printf sort exec system say);
798 @is_indirect_object_taker{@q} = (1) x scalar(@q);
800 # Define here tokens which may follow the closing brace of a do statement
801 # on the same line, as in:
802 # } while ( $something);
803 my @dof = qw(until while unless if ; : );
805 @is_do_follower{@dof} = (1) x scalar(@dof);
807 # what can follow a multi-line anonymous sub definition closing curly:
808 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
810 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
812 # what can follow a one-line anonymous sub closing curly:
813 # one-line anonymous subs also have ']' here...
814 # see tk3.t and PP.pm
815 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
817 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
819 # What can follow a closing curly of a block
820 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
821 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
822 my @obf = qw# ; : => or and && || ) #;
824 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
828 { ## begin closure to count instances
830 # methods to count instances
832 sub get_count { return $_count; }
833 sub _increment_count { return ++$_count }
834 sub _decrement_count { return --$_count }
835 } ## end closure to count instances
839 my ( $class, @args ) = @_;
841 # we are given an object with a write_line() method to take lines
843 sink_object => undef,
844 diagnostics_object => undef,
845 logger_object => undef,
846 length_function => sub { return length( $_[0] ) },
847 is_encoded_data => EMPTY_STRING,
850 my %args = ( %defaults, @args );
852 my $length_function = $args{length_function};
853 my $is_encoded_data = $args{is_encoded_data};
854 my $fh_tee = $args{fh_tee};
855 my $logger_object = $args{logger_object};
856 my $diagnostics_object = $args{diagnostics_object};
858 # we create another object with a get_line() and peek_ahead() method
859 my $sink_object = $args{sink_object};
860 my $file_writer_object =
861 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
863 # initialize closure variables...
864 set_logger_object($logger_object);
865 set_diagnostics_object($diagnostics_object);
866 initialize_lp_vars();
867 initialize_csc_vars();
868 initialize_break_lists();
869 initialize_undo_ci();
870 initialize_process_line_of_CODE();
871 initialize_grind_batch_of_CODE();
872 initialize_get_final_indentation();
873 initialize_postponed_breakpoint();
874 initialize_batch_variables();
875 initialize_write_line();
877 my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
879 file_writer_object => $file_writer_object,
880 logger_object => $logger_object,
881 diagnostics_object => $diagnostics_object,
882 length_function => $length_function,
885 write_logfile_entry("\nStarting tokenization pass...\n");
887 if ( $rOpts->{'entab-leading-whitespace'} ) {
889 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
892 elsif ( $rOpts->{'tabs'} ) {
893 write_logfile_entry("Indentation will be with a tab character\n");
897 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
900 # Initialize the $self array reference.
901 # To add an item, first add a constant index in the BEGIN block above.
904 # Basic data structures...
905 $self->[_rlines_] = []; # = ref to array of lines of the file
907 # 'rLL' = reference to the continuous liner array of all tokens in a file.
908 # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
909 # 'LL' stuck because it is easy to type. The 'rLL' array is updated
910 # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
911 # with '$K' by convention.
913 $self->[_Klimit_] = undef; # = maximum K index for rLL.
915 # Indexes into the rLL list
916 $self->[_K_opening_container_] = {};
917 $self->[_K_closing_container_] = {};
918 $self->[_K_opening_ternary_] = {};
919 $self->[_K_closing_ternary_] = {};
920 $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
922 # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
923 # numbers with + or - indicating opening or closing. This list represents
924 # the entire container tree and is invariant under reformatting. It can be
925 # used to quickly travel through the tree. Indexes in the rSS array begin
926 # with '$I' by convention. The 'Iss' arrays give the indexes in this list
927 # of opening and closing sequence numbers.
929 $self->[_Iss_opening_] = [];
930 $self->[_Iss_closing_] = [];
932 # Arrays to help traverse the tree
933 $self->[_rdepth_of_opening_seqno_] = [];
934 $self->[_rblock_type_of_seqno_] = {};
935 $self->[_ris_asub_block_] = {};
936 $self->[_ris_sub_block_] = {};
938 # Mostly list characteristics and processing flags
939 $self->[_rtype_count_by_seqno_] = {};
940 $self->[_ris_function_call_paren_] = {};
941 $self->[_rlec_count_by_seqno_] = {};
942 $self->[_ris_broken_container_] = {};
943 $self->[_ris_permanently_broken_] = {};
944 $self->[_rblank_and_comment_count_] = {};
945 $self->[_rhas_list_] = {};
946 $self->[_rhas_broken_list_] = {};
947 $self->[_rhas_broken_list_with_lec_] = {};
948 $self->[_rfirst_comma_line_index_] = {};
949 $self->[_rhas_code_block_] = {};
950 $self->[_rhas_broken_code_block_] = {};
951 $self->[_rhas_ternary_] = {};
952 $self->[_ris_excluded_lp_container_] = {};
953 $self->[_rlp_object_by_seqno_] = {};
954 $self->[_rwant_reduced_ci_] = {};
955 $self->[_rno_xci_by_seqno_] = {};
956 $self->[_rbrace_left_] = {};
957 $self->[_ris_bli_container_] = {};
958 $self->[_rparent_of_seqno_] = {};
959 $self->[_rchildren_of_seqno_] = {};
960 $self->[_ris_list_by_seqno_] = {};
961 $self->[_ris_cuddled_closing_brace_] = {};
963 $self->[_rbreak_container_] = {}; # prevent one-line blocks
964 $self->[_rshort_nested_] = {}; # blocks not forced open
965 $self->[_length_function_] = $length_function;
966 $self->[_is_encoded_data_] = $is_encoded_data;
969 $self->[_fh_tee_] = $fh_tee;
970 $self->[_sink_object_] = $sink_object;
971 $self->[_file_writer_object_] = $file_writer_object;
972 $self->[_vertical_aligner_object_] = $vertical_aligner_object;
973 $self->[_logger_object_] = $logger_object;
975 # Reference to the batch being processed
976 $self->[_this_batch_] = [];
978 # Memory of processed text...
979 $self->[_ris_special_identifier_token_] = {};
980 $self->[_last_line_leading_level_] = 0;
981 $self->[_last_line_leading_type_] = '#';
982 $self->[_last_output_short_opening_token_] = 0;
983 $self->[_added_semicolon_count_] = 0;
984 $self->[_first_added_semicolon_at_] = 0;
985 $self->[_last_added_semicolon_at_] = 0;
986 $self->[_deleted_semicolon_count_] = 0;
987 $self->[_first_deleted_semicolon_at_] = 0;
988 $self->[_last_deleted_semicolon_at_] = 0;
989 $self->[_embedded_tab_count_] = 0;
990 $self->[_first_embedded_tab_at_] = 0;
991 $self->[_last_embedded_tab_at_] = 0;
992 $self->[_first_tabbing_disagreement_] = 0;
993 $self->[_last_tabbing_disagreement_] = 0;
994 $self->[_tabbing_disagreement_count_] = 0;
995 $self->[_in_tabbing_disagreement_] = 0;
996 $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
997 $self->[_saw_END_or_DATA_] = 0;
998 $self->[_first_brace_tabbing_disagreement_] = undef;
999 $self->[_in_brace_tabbing_disagreement_] = undef;
1001 # Hashes related to container welding...
1002 $self->[_radjusted_levels_] = [];
1004 # Weld data structures
1005 $self->[_rK_weld_left_] = {};
1006 $self->[_rK_weld_right_] = {};
1007 $self->[_rweld_len_right_at_K_] = {};
1010 $self->[_rseqno_controlling_my_ci_] = {};
1011 $self->[_ris_seqno_controlling_ci_] = {};
1013 $self->[_rspecial_side_comment_type_] = {};
1014 $self->[_maximum_level_] = 0;
1015 $self->[_maximum_level_at_line_] = 0;
1016 $self->[_maximum_BLOCK_level_] = 0;
1017 $self->[_maximum_BLOCK_level_at_line_] = 0;
1019 $self->[_rKrange_code_without_comments_] = [];
1020 $self->[_rbreak_before_Kfirst_] = {};
1021 $self->[_rbreak_after_Klast_] = {};
1022 $self->[_converged_] = 0;
1025 $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
1026 $self->[_rending_multiline_qw_seqno_by_K_] = {};
1027 $self->[_rKrange_multiline_qw_by_seqno_] = {};
1028 $self->[_rmultiline_qw_has_extra_level_] = {};
1030 $self->[_rcollapsed_length_by_seqno_] = {};
1031 $self->[_rbreak_before_container_by_seqno_] = {};
1032 $self->[_roverride_cab3_] = {};
1033 $self->[_ris_assigned_structure_] = {};
1034 $self->[_ris_short_broken_eval_block_] = {};
1035 $self->[_ris_bare_trailing_comma_by_seqno_] = {};
1037 $self->[_rseqno_non_indenting_brace_by_ix_] = {};
1038 $self->[_rmax_vertical_tightness_] = {};
1040 $self->[_no_vertical_tightness_flags_] = 0;
1042 # This flag will be updated later by a call to get_save_logfile()
1043 $self->[_save_logfile_] = defined($logger_object);
1045 # Be sure all variables in $self have been initialized above. To find the
1046 # correspondence of index numbers and array names, copy a list to a file
1047 # and use the unix 'nl' command to number lines 1..
1050 foreach ( 0 .. _LAST_SELF_INDEX_ ) {
1051 if ( !exists( $self->[$_] ) ) {
1052 push @non_existant, $_;
1055 if (@non_existant) {
1056 Fault("These indexes in self not initialized: (@non_existant)\n");
1060 bless $self, $class;
1062 # Safety check..this is not a class yet
1063 if ( _increment_count() > 1 ) {
1065 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
1070 ######################################
1071 # CODE SECTION 2: Some Basic Utilities
1072 ######################################
1076 # Verify that the rLL array has not been auto-vivified
1077 my ( $self, $msg ) = @_;
1078 my $rLL = $self->[_rLL_];
1079 my $Klimit = $self->[_Klimit_];
1081 if ( ( defined($Klimit) && $Klimit != $num - 1 )
1082 || ( !defined($Klimit) && $num > 0 ) )
1085 # This fault can occur if the array has been accessed for an index
1086 # greater than $Klimit, which is the last token index. Just accessing
1087 # the array above index $Klimit, not setting a value, can cause @rLL to
1088 # increase beyond $Klimit. If this occurs, the problem can be located
1089 # by making calls to this routine at different locations in
1090 # sub 'finish_formatting'.
1091 $Klimit = 'undef' if ( !defined($Klimit) );
1092 $msg = EMPTY_STRING unless $msg;
1093 Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
1096 } ## end sub check_rLL
1099 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
1101 # Check the keys of a hash:
1102 # $rtest = ref to hash to test
1103 # $rvalid = ref to hash with valid keys
1105 # $msg = a message to write in case of error
1106 # $exact_match defines the type of check:
1107 # = false: test hash must not have unknown key
1108 # = true: test hash must have exactly same keys as known hash
1110 grep { !exists $rvalid->{$_} } keys %{$rtest};
1112 grep { !exists $rtest->{$_} } keys %{$rvalid};
1113 my $error = @unknown_keys;
1114 if ($exact_match) { $error ||= @missing_keys }
1116 local $LIST_SEPARATOR = ')(';
1117 my @expected_keys = sort keys %{$rvalid};
1118 @unknown_keys = sort @unknown_keys;
1120 ------------------------------------------------------------------------
1121 Program error detected checking hash keys
1123 Expected keys: (@expected_keys)
1124 Unknown key(s): (@unknown_keys)
1125 Missing key(s): (@missing_keys)
1126 ------------------------------------------------------------------------
1130 } ## end sub check_keys
1132 sub check_token_array {
1135 # Check for errors in the array of tokens. This is only called
1136 # when the DEVEL_MODE flag is set, so this Fault will only occur
1137 # during code development.
1138 my $rLL = $self->[_rLL_];
1139 foreach my $KK ( 0 .. @{$rLL} - 1 ) {
1140 my $nvars = @{ $rLL->[$KK] };
1141 if ( $nvars != _NVARS ) {
1143 my $type = $rLL->[$KK]->[_TYPE_];
1144 $type = '*' unless defined($type);
1146 # The number of variables per token node is _NVARS and was set when
1147 # the array indexes were generated. So if the number of variables
1148 # is different we have done something wrong, like not store all of
1149 # them in sub 'write_line' when they were received from the
1152 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
1155 foreach my $var ( _TOKEN_, _TYPE_ ) {
1156 if ( !defined( $rLL->[$KK]->[$var] ) ) {
1157 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
1159 # This is a simple check that each token has some basic
1160 # variables. In other words, that there are no holes in the
1161 # array of tokens. Sub 'write_line' pushes tokens into the
1162 # $rLL array, so this should guarantee no gaps.
1163 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1168 } ## end sub check_token_array
1170 { ## begin closure check_line_hashes
1172 # This code checks that no autovivification occurs in the 'line' hash
1174 my %valid_line_hash;
1178 # These keys are defined for each line in the formatter
1179 # Each line must have exactly these quantities
1180 my @valid_line_keys = qw(
1183 _guessed_indentation_level
1190 _square_bracket_depth
1192 _ended_in_blank_token
1201 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1204 sub check_line_hashes {
1206 my $rlines = $self->[_rlines_];
1207 foreach my $rline ( @{$rlines} ) {
1208 my $iline = $rline->{_line_number};
1209 my $line_type = $rline->{_line_type};
1210 check_keys( $rline, \%valid_line_hash,
1211 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1214 } ## end sub check_line_hashes
1215 } ## end closure check_line_hashes
1217 { ## begin closure for logger routines
1220 # Called once per file to initialize the logger object
1221 sub set_logger_object {
1222 $logger_object = shift;
1226 sub get_logger_object {
1227 return $logger_object;
1230 sub get_input_stream_name {
1231 my $input_stream_name = EMPTY_STRING;
1232 if ($logger_object) {
1233 $input_stream_name = $logger_object->get_input_stream_name();
1235 return $input_stream_name;
1236 } ## end sub get_input_stream_name
1238 # interface to Perl::Tidy::Logger routines
1241 if ($logger_object) { $logger_object->warning($msg); }
1247 if ($logger_object) {
1248 $logger_object->complain($msg);
1251 } ## end sub complain
1253 sub write_logfile_entry {
1255 if ($logger_object) {
1256 $logger_object->write_logfile_entry(@msg);
1259 } ## end sub write_logfile_entry
1261 sub get_saw_brace_error {
1262 if ($logger_object) {
1263 return $logger_object->get_saw_brace_error();
1266 } ## end sub get_saw_brace_error
1268 sub we_are_at_the_last_line {
1269 if ($logger_object) {
1270 $logger_object->we_are_at_the_last_line();
1273 } ## end sub we_are_at_the_last_line
1275 } ## end closure for logger routines
1277 { ## begin closure for diagnostics routines
1278 my $diagnostics_object;
1280 # Called once per file to initialize the diagnostics object
1281 sub set_diagnostics_object {
1282 $diagnostics_object = shift;
1286 sub write_diagnostics {
1288 if ($diagnostics_object) {
1289 $diagnostics_object->write_diagnostics($msg);
1292 } ## end sub write_diagnostics
1293 } ## end closure for diagnostics routines
1295 sub get_convergence_check {
1297 return $self->[_converged_];
1300 sub get_output_line_number {
1302 my $vao = $self->[_vertical_aligner_object_];
1303 return $vao->get_output_line_number();
1306 sub want_blank_line {
1309 my $file_writer_object = $self->[_file_writer_object_];
1310 $file_writer_object->want_blank_line();
1312 } ## end sub want_blank_line
1314 sub write_unindented_line {
1315 my ( $self, $line ) = @_;
1317 my $file_writer_object = $self->[_file_writer_object_];
1318 $file_writer_object->write_line($line);
1320 } ## end sub write_unindented_line
1322 sub consecutive_nonblank_lines {
1324 my $file_writer_object = $self->[_file_writer_object_];
1325 my $vao = $self->[_vertical_aligner_object_];
1326 return $file_writer_object->get_consecutive_nonblank_lines() +
1327 $vao->get_cached_line_count();
1328 } ## end sub consecutive_nonblank_lines
1332 # given a string containing words separated by whitespace,
1333 # return the list of words
1338 return split( /\s+/, $str );
1339 } ## end sub split_words
1341 ###########################################
1342 # CODE SECTION 3: Check and process options
1343 ###########################################
1347 # This routine is called to check the user-supplied run parameters
1348 # and to configure the control hashes to them.
1351 $controlled_comma_style = 0;
1353 initialize_whitespace_hashes();
1354 initialize_bond_strength_hashes();
1356 # This function must be called early to get hashes with grep initialized
1357 initialize_grep_and_friends();
1359 # Make needed regex patterns for matching text.
1360 # NOTE: sub_matching_patterns must be made first because later patterns use
1361 # them; see RT #133130.
1362 make_sub_matching_pattern(); # must be first pattern made
1363 make_static_block_comment_pattern();
1364 make_static_side_comment_pattern();
1365 make_closing_side_comment_prefix();
1366 make_closing_side_comment_list_pattern();
1367 $format_skipping_pattern_begin =
1368 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1369 $format_skipping_pattern_end =
1370 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1371 make_non_indenting_brace_pattern();
1373 # If closing side comments ARE selected, then we can safely
1374 # delete old closing side comments unless closing side comment
1375 # warnings are requested. This is a good idea because it will
1376 # eliminate any old csc's which fall below the line count threshold.
1377 # We cannot do this if warnings are turned on, though, because we
1378 # might delete some text which has been added. So that must
1379 # be handled when comments are created. And we cannot do this
1380 # with -io because -csc will be skipped altogether.
1381 if ( $rOpts->{'closing-side-comments'} ) {
1382 if ( !$rOpts->{'closing-side-comment-warnings'}
1383 && !$rOpts->{'indent-only'} )
1385 $rOpts->{'delete-closing-side-comments'} = 1;
1389 # If closing side comments ARE NOT selected, but warnings ARE
1390 # selected and we ARE DELETING csc's, then we will pretend to be
1391 # adding with a huge interval. This will force the comments to be
1392 # generated for comparison with the old comments, but not added.
1393 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1394 if ( $rOpts->{'delete-closing-side-comments'} ) {
1395 $rOpts->{'delete-closing-side-comments'} = 0;
1396 $rOpts->{'closing-side-comments'} = 1;
1397 $rOpts->{'closing-side-comment-interval'} = 100_000_000;
1405 make_block_brace_vertical_tightness_pattern();
1407 make_blank_line_pattern();
1409 make_keyword_group_list_pattern();
1411 prepare_cuddled_block_types();
1413 if ( $rOpts->{'dump-cuddled-block-list'} ) {
1414 dump_cuddled_block_list(*STDOUT);
1419 if ( $rOpts->{'extended-line-up-parentheses'} ) {
1420 $rOpts->{'line-up-parentheses'} ||= 1;
1423 if ( $rOpts->{'line-up-parentheses'} ) {
1425 if ( $rOpts->{'indent-only'}
1426 || !$rOpts->{'add-newlines'}
1427 || !$rOpts->{'delete-old-newlines'} )
1430 -----------------------------------------------------------------------
1431 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1433 The -lp indentation logic requires that perltidy be able to coordinate
1434 arbitrarily large numbers of line breakpoints. This isn't possible
1436 -----------------------------------------------------------------------
1438 $rOpts->{'line-up-parentheses'} = 0;
1439 $rOpts->{'extended-line-up-parentheses'} = 0;
1442 if ( $rOpts->{'whitespace-cycle'} ) {
1444 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1446 $rOpts->{'whitespace-cycle'} = 0;
1450 # At present, tabs are not compatible with the line-up-parentheses style
1451 # (it would be possible to entab the total leading whitespace
1452 # just prior to writing the line, if desired).
1453 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1455 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
1457 $rOpts->{'tabs'} = 0;
1460 # Likewise, tabs are not compatible with outdenting..
1461 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1463 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1465 $rOpts->{'tabs'} = 0;
1468 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1470 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
1472 $rOpts->{'tabs'} = 0;
1475 if ( !$rOpts->{'space-for-semicolon'} ) {
1476 $want_left_space{'f'} = -1;
1479 if ( $rOpts->{'space-terminal-semicolon'} ) {
1480 $want_left_space{';'} = 1;
1483 # We should put an upper bound on any -sil=n value. Otherwise enormous
1484 # files could be created by mistake.
1485 for ( $rOpts->{'starting-indentation-level'} ) {
1486 if ( $_ && $_ > 100 ) {
1488 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1494 # Require -msp > 0 to avoid future parsing problems (issue c147)
1495 for ( $rOpts->{'minimum-space-to-comment'} ) {
1496 if ( !$_ || $_ <= 0 ) { $_ = 1 }
1499 # implement outdenting preferences for keywords
1500 %outdent_keyword = ();
1501 my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1503 @okw = qw(next last redo goto return); # defaults
1506 # FUTURE: if not a keyword, assume that it is an identifier
1508 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1509 $outdent_keyword{$_} = 1;
1512 Warn("ignoring '$_' in -okwl list; not a perl keyword");
1516 # setup hash for -kpit option
1517 %keyword_paren_inner_tightness = ();
1518 my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1519 if ( defined($kpit_value) && $kpit_value != 1 ) {
1521 split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1523 @kpit = qw(if elsif unless while until for foreach); # defaults
1526 # we will allow keywords and user-defined identifiers
1528 $keyword_paren_inner_tightness{$_} = $kpit_value;
1532 # implement user whitespace preferences
1533 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1534 @want_left_space{@q} = (1) x scalar(@q);
1537 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1538 @want_right_space{@q} = (1) x scalar(@q);
1541 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1542 @want_left_space{@q} = (-1) x scalar(@q);
1545 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1546 @want_right_space{@q} = (-1) x scalar(@q);
1548 if ( $rOpts->{'dump-want-left-space'} ) {
1549 dump_want_left_space(*STDOUT);
1553 if ( $rOpts->{'dump-want-right-space'} ) {
1554 dump_want_right_space(*STDOUT);
1558 initialize_space_after_keyword();
1560 initialize_token_break_preferences();
1562 #--------------------------------------------------------------
1563 # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1564 #--------------------------------------------------------------
1565 # The -vmll and -lp parameters do not really work well together.
1566 # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1567 # NOTE: we could make this more precise by looking at any exclusion
1568 # flags for -lp, and allowing -bbx=2 for excluded types.
1569 if ( $rOpts->{'variable-maximum-line-length'}
1570 && $rOpts->{'ignore-old-breakpoints'}
1571 && $rOpts->{'line-up-parentheses'} )
1574 foreach my $key ( keys %break_before_container_types ) {
1575 if ( $break_before_container_types{$key} == 2 ) {
1576 $break_before_container_types{$key} = 1;
1577 push @changed, $key;
1582 # we could write a warning here
1586 #-----------------------------------------------------------
1587 # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1588 #-----------------------------------------------------------
1589 # The -vmll and -lp parameters do not really work well together.
1590 # This is a very crude fix for an unusual parameter combination.
1591 if ( $rOpts->{'variable-maximum-line-length'}
1592 && $rOpts->{'line-up-parentheses'}
1593 && $rOpts->{'continuation-indentation'} < 2 )
1595 $rOpts->{'continuation-indentation'} = 2;
1596 ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1599 #-----------------------------------------------------------
1600 # The combination -lp -vmll -atc -dtc can be unstable
1601 #-----------------------------------------------------------
1602 # This fixes b1386 b1387 b1388 which had -wtc='b'
1603 # Updated to to include any -wtc to fix b1426
1604 if ( $rOpts->{'variable-maximum-line-length'}
1605 && $rOpts->{'line-up-parentheses'}
1606 && $rOpts->{'add-trailing-commas'}
1607 && $rOpts->{'delete-trailing-commas'}
1608 && $rOpts->{'want-trailing-commas'} )
1610 $rOpts->{'delete-trailing-commas'} = 0;
1611 ## Issuing a warning message causes trouble with test cases, and this combo is
1612 ## so rare that it is unlikely to not occur in practice. So skip warning.
1614 ##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n"
1618 %container_indentation_options = ();
1620 [ 'break-before-hash-brace-and-indent', '{' ],
1621 [ 'break-before-square-bracket-and-indent', '[' ],
1622 [ 'break-before-paren-and-indent', '(' ],
1625 my ( $key, $tok ) = @{$pair};
1626 my $opt = $rOpts->{$key};
1627 if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1630 # (1) -lp is not compatible with opt=2, silently set to opt=0
1631 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1632 # (3) set opt=0 if -i < -ci (can be unstable, case b1355)
1635 $rOpts->{'line-up-parentheses'}
1636 || ( $rOpts->{'indent-columns'} <=
1637 $rOpts->{'continuation-indentation'} )
1643 $container_indentation_options{$tok} = $opt;
1647 $right_bond_strength{'{'} = WEAK;
1648 $left_bond_strength{'{'} = VERY_STRONG;
1650 # make -l=0 equal to -l=infinite
1651 if ( !$rOpts->{'maximum-line-length'} ) {
1652 $rOpts->{'maximum-line-length'} = 1_000_000;
1655 # make -lbl=0 equal to -lbl=infinite
1656 if ( !$rOpts->{'long-block-line-count'} ) {
1657 $rOpts->{'long-block-line-count'} = 1_000_000;
1660 # hashes used to simplify setting whitespace
1662 '{' => $rOpts->{'brace-tightness'},
1663 '}' => $rOpts->{'brace-tightness'},
1664 '(' => $rOpts->{'paren-tightness'},
1665 ')' => $rOpts->{'paren-tightness'},
1666 '[' => $rOpts->{'square-bracket-tightness'},
1667 ']' => $rOpts->{'square-bracket-tightness'},
1670 if ( $rOpts->{'ignore-old-breakpoints'} ) {
1673 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1674 $rOpts->{'break-at-old-method-breakpoints'} = 0;
1675 push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1677 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1678 $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1679 push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1681 if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1682 $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1683 push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1685 if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1686 $rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
1687 push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1689 if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1690 $rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
1691 push @conflicts, '--keep-old-breakpoints-after (-kba)';
1695 my $msg = join( "\n ",
1696 " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1702 # Note: These additional parameters are made inactive by -iob.
1703 # They are silently turned off here because they are on by default.
1704 # We would generate unexpected warnings if we issued a warning.
1705 $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
1706 $rOpts->{'break-at-old-logical-breakpoints'} = 0;
1707 $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
1708 $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1711 %keep_break_before_type = ();
1712 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1713 'kbb', \%keep_break_before_type );
1715 %keep_break_after_type = ();
1716 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1717 'kba', \%keep_break_after_type );
1719 # Modify %keep_break_before and %keep_break_after to avoid conflicts
1720 # with %want_break_before; fixes b1436.
1721 # This became necessary after breaks for some tokens were converted
1722 # from hard to soft (see b1433).
1723 # We could do this for all tokens, but to minimize changes to existing
1724 # code we currently only do this for the soft break tokens.
1725 foreach my $key ( keys %keep_break_before_type ) {
1726 if ( defined( $want_break_before{$key} )
1727 && !$want_break_before{$key}
1728 && $is_soft_keep_break_type{$key} )
1730 $keep_break_after_type{$key} = $keep_break_before_type{$key};
1731 delete $keep_break_before_type{$key};
1734 foreach my $key ( keys %keep_break_after_type ) {
1735 if ( defined( $want_break_before{$key} )
1736 && $want_break_before{$key}
1737 && $is_soft_keep_break_type{$key} )
1739 $keep_break_before_type{$key} = $keep_break_after_type{$key};
1740 delete $keep_break_after_type{$key};
1744 $controlled_comma_style ||= $keep_break_before_type{','};
1745 $controlled_comma_style ||= $keep_break_after_type{','};
1747 initialize_global_option_vars();
1749 initialize_line_length_vars(); # after 'initialize_global_option_vars'
1751 initialize_trailing_comma_rules(); # after 'initialize_line_length_vars'
1753 initialize_weld_nested_exclusion_rules();
1755 initialize_weld_fat_comma_rules();
1757 %line_up_parentheses_control_hash = ();
1758 $line_up_parentheses_control_is_lxpl = 1;
1759 my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
1760 my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
1761 if ( $lpxl && $lpil ) {
1763 You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
1767 $line_up_parentheses_control_is_lxpl = 1;
1768 initialize_line_up_parentheses_control_hash(
1769 $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
1772 $line_up_parentheses_control_is_lxpl = 0;
1773 initialize_line_up_parentheses_control_hash(
1774 $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
1778 } ## end sub check_options
1780 use constant ALIGN_GREP_ALIASES => 0;
1782 sub initialize_grep_and_friends {
1784 # Initialize or re-initialize hashes with 'grep' and grep aliases. This
1785 # must be done after each set of options because new grep aliases may be
1788 # re-initialize the hashes ... this is critical!
1789 %is_sort_map_grep = ();
1791 my @q = qw(sort map grep);
1792 @is_sort_map_grep{@q} = (1) x scalar(@q);
1794 my $olbxl = $rOpts->{'one-line-block-exclusion-list'};
1795 my %is_olb_exclusion_word;
1796 if ( defined($olbxl) ) {
1797 my @list = split_words($olbxl);
1799 @is_olb_exclusion_word{@list} = (1) x scalar(@list);
1803 # Make the list of block types which may be re-formed into one line.
1804 # They will be modified with the grep-alias-list below and
1805 # by sub 'prepare_cuddled_block_types'.
1806 # Note that it is essential to always re-initialize the hash here:
1807 %want_one_line_block = ();
1808 if ( !$is_olb_exclusion_word{'*'} ) {
1809 foreach (qw(sort map grep eval)) {
1810 if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 }
1814 # Note that any 'grep-alias-list' string has been preprocessed to be a
1815 # trimmed, space-separated list.
1816 my $str = $rOpts->{'grep-alias-list'};
1817 my @grep_aliases = split /\s+/, $str;
1819 if (@grep_aliases) {
1821 @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
1823 if ( $want_one_line_block{'grep'} ) {
1824 @{want_one_line_block}{@grep_aliases} = (1) x scalar(@grep_aliases);
1828 ##@q = qw(sort map grep eval);
1829 %is_sort_map_grep_eval = %is_sort_map_grep;
1830 $is_sort_map_grep_eval{'eval'} = 1;
1832 ##@q = qw(sort map grep eval do);
1833 %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
1834 $is_sort_map_grep_eval_do{'do'} = 1;
1836 # These block types can take ci. This is used by the -xci option.
1837 # Note that the 'sub' in this list is an anonymous sub. To be more correct
1838 # we could remove sub and use ASUB pattern to also handle a
1839 # prototype/signature. But that would slow things down and would probably
1841 ##@q = qw( do sub eval sort map grep );
1842 %is_block_with_ci = %is_sort_map_grep_eval_do;
1843 $is_block_with_ci{'sub'} = 1;
1845 %is_keyword_returning_list = ();
1854 push @q, @grep_aliases;
1855 @is_keyword_returning_list{@q} = (1) x scalar(@q);
1857 # This code enables vertical alignment of grep aliases for testing. It has
1858 # not been found to be beneficial, so it is off by default. But it is
1859 # useful for precise testing of the grep alias coding.
1860 if (ALIGN_GREP_ALIASES) {
1872 $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
1876 } ## end sub initialize_grep_and_friends
1878 sub initialize_weld_nested_exclusion_rules {
1879 %weld_nested_exclusion_rules = ();
1881 my $opt_name = 'weld-nested-exclusion-list';
1882 my $str = $rOpts->{$opt_name};
1883 return unless ($str);
1886 return unless ($str);
1888 # There are four container tokens.
1896 # We are parsing an exclusion list for nested welds. The list is a string
1897 # with spaces separating any number of items. Each item consists of three
1898 # pieces of information:
1899 # <optional position> <optional type> <type of container>
1900 # < ^ or . > < k or K > < ( [ { >
1902 # The last character is the required container type and must be one of:
1904 # [ = square bracket
1907 # An optional leading position indicator:
1908 # ^ means the leading token position in the weld
1909 # . means a secondary token position in the weld
1910 # no position indicator means all positions match
1912 # An optional alphanumeric character between the position and container
1913 # token selects to which the rule applies:
1915 # K = any non-keyword
1917 # F = not a function call
1918 # w = function or keyword
1919 # W = not a function or keyword
1920 # no letter means any preceding type matches
1923 # ^( - the weld must not start with a paren
1924 # .( - the second and later tokens may not be parens
1925 # ( - no parens in weld
1926 # ^K( - exclude a leading paren not preceded by a keyword
1927 # .k( - exclude a secondary paren preceded by a keyword
1928 # [ { - exclude all brackets and braces
1930 my @items = split /\s+/, $str;
1933 foreach my $item (@items) {
1934 my $item_save = $item;
1935 my $tok = chop($item);
1936 my $key = $token_keys{$tok};
1937 if ( !defined($key) ) {
1938 $msg1 .= " '$item_save'";
1941 if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
1942 $weld_nested_exclusion_rules{$key} = [];
1944 my $rflags = $weld_nested_exclusion_rules{$key};
1946 # A 'q' means do not weld quotes
1947 if ( $tok eq 'q' ) {
1956 if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
1958 $select = $2 if ($2);
1961 $msg1 .= " '$item_save'";
1967 if ( $pos eq '^' || $pos eq '*' ) {
1968 if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
1971 $rflags->[0] = $select;
1973 if ( $pos eq '.' || $pos eq '*' ) {
1974 if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
1977 $rflags->[1] = $select;
1979 if ($err) { $msg2 .= " '$item_save'"; }
1983 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
1989 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
1991 Only the last will be used.
1995 } ## end sub initialize_weld_nested_exclusion_rules
1997 sub initialize_weld_fat_comma_rules {
1999 # Initialize a hash controlling which opening token types can be
2000 # welded around a fat comma
2001 %weld_fat_comma_rules = ();
2003 # The -wfc flag turns on welding of '=>' after an opening paren
2004 if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 }
2006 # This could be generalized in the future by introducing a parameter
2007 # -weld-fat-comma-after=str (-wfca=str), where str contains any of:
2009 # to indicate which opening parens may weld to a subsequent '=>'
2011 # The flag -wfc would then be equivalent to -wfca='('
2013 # This has not been done because it is not yet clear how useful
2014 # this generalization would be.
2016 } ## end sub initialize_weld_fat_comma_rules
2018 sub initialize_line_up_parentheses_control_hash {
2019 my ( $str, $opt_name ) = @_;
2020 return unless ($str);
2023 return unless ($str);
2025 # The format is space separated items, where each item must consist of a
2026 # string with a token type preceded by an optional text token and followed
2030 # = (flag1)(key)(flag2), where
2035 my @items = split /\s+/, $str;
2038 foreach my $item (@items) {
2039 my $item_save = $item;
2040 my ( $flag1, $key, $flag2 );
2041 if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2047 $msg1 .= " '$item_save'";
2051 if ( !defined($key) ) {
2052 $msg1 .= " '$item_save'";
2056 # Check for valid flag1
2057 if ( !defined($flag1) ) { $flag1 = '*' }
2058 elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2059 $msg1 .= " '$item_save'";
2063 # Check for valid flag2
2064 # 0 or blank: ignore container contents
2065 # 1 all containers with sublists match
2066 # 2 all containers with sublists, code blocks or ternary operators match
2067 # ... this could be extended in the future
2068 if ( !defined($flag2) ) { $flag2 = 0 }
2069 elsif ( $flag2 !~ /^[012]$/ ) {
2070 $msg1 .= " '$item_save'";
2074 if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2075 $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2079 # check for multiple conflicting specifications
2080 my $rflags = $line_up_parentheses_control_hash{$key};
2082 if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2084 $rflags->[0] = $flag1;
2086 if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2088 $rflags->[1] = $flag2;
2090 $msg2 .= " '$item_save'" if ($err);
2095 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2101 Multiple specifications were encountered in the $opt_name at:
2103 Only the last will be used.
2107 # Speedup: we can turn off -lp if it is not actually used
2108 if ($line_up_parentheses_control_is_lxpl) {
2110 foreach my $key (qw# ( { [ #) {
2111 my $rflags = $line_up_parentheses_control_hash{$key};
2112 if ( defined($rflags) ) {
2113 my ( $flag1, $flag2 ) = @{$rflags};
2114 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
2115 if ($flag2) { $all_off = 0; last }
2119 $rOpts->{'line-up-parentheses'} = EMPTY_STRING;
2124 } ## end sub initialize_line_up_parentheses_control_hash
2126 sub initialize_space_after_keyword {
2128 # default keywords for which space is introduced before an opening paren
2129 # (at present, including them messes up vertical alignment)
2130 my @sak = qw(my local our and or xor err eq ne if else elsif until
2131 unless while for foreach return switch case given when catch);
2132 %space_after_keyword = map { $_ => 1 } @sak;
2134 # first remove any or all of these if desired
2135 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
2137 # -nsak='*' selects all the above keywords
2138 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
2139 @space_after_keyword{@q} = (0) x scalar(@q);
2142 # then allow user to add to these defaults
2143 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
2144 @space_after_keyword{@q} = (1) x scalar(@q);
2148 } ## end sub initialize_space_after_keyword
2150 sub initialize_token_break_preferences {
2152 # implement user break preferences
2153 my $break_after = sub {
2155 foreach my $tok (@toks) {
2156 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
2157 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
2158 my $lbs = $left_bond_strength{$tok};
2159 my $rbs = $right_bond_strength{$tok};
2160 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
2161 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
2168 my $break_before = sub {
2170 foreach my $tok (@toks) {
2171 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
2172 my $lbs = $left_bond_strength{$tok};
2173 my $rbs = $right_bond_strength{$tok};
2174 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
2175 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
2182 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
2183 $break_before->(@all_operators)
2184 if ( $rOpts->{'break-before-all-operators'} );
2186 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
2187 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
2189 # make note if breaks are before certain key types
2190 %want_break_before = ();
2191 foreach my $tok ( @all_operators, ',' ) {
2192 $want_break_before{$tok} =
2193 $left_bond_strength{$tok} < $right_bond_strength{$tok};
2196 # Coordinate ?/: breaks, which must be similar
2197 # The small strength 0.01 which is added is 1% of the strength of one
2198 # indentation level and seems to work okay.
2199 if ( !$want_break_before{':'} ) {
2200 $want_break_before{'?'} = $want_break_before{':'};
2201 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
2202 $left_bond_strength{'?'} = NO_BREAK;
2205 # Only make a hash entry for the next parameters if values are defined.
2206 # That allows a quick check to be made later.
2207 %break_before_container_types = ();
2208 for ( $rOpts->{'break-before-hash-brace'} ) {
2209 $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
2211 for ( $rOpts->{'break-before-square-bracket'} ) {
2212 $break_before_container_types{'['} = $_ if $_ && $_ > 0;
2214 for ( $rOpts->{'break-before-paren'} ) {
2215 $break_before_container_types{'('} = $_ if $_ && $_ > 0;
2218 } ## end sub initialize_token_break_preferences
2220 use constant DEBUG_KB => 0;
2222 sub initialize_keep_old_breakpoints {
2223 my ( $str, $short_name, $rkeep_break_hash ) = @_;
2227 my @list = split_words($str);
2228 if ( DEBUG_KB && @list ) {
2229 local $LIST_SEPARATOR = SPACE;
2231 DEBUG_KB entering for '$short_name' with str=$str\n";
2236 # Ignore kbb='(' and '[' and '{': can cause unstable math formatting
2237 # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
2238 # Also always ignore ? and : (b1440 and b1433-b1439)
2239 if ( $short_name eq 'kbb' ) {
2240 @list = grep { !m/[\(\[\{\?\:]/ } @list;
2242 elsif ( $short_name eq 'kba' ) {
2243 @list = grep { !m/[\)\]\}\?\:]/ } @list;
2246 # pull out any any leading container code, like f( or *{
2247 # For example: 'f(' becomes flags hash entry '(' => 'f'
2248 foreach my $item (@list) {
2249 if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2256 foreach my $type (@list) {
2257 if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2258 push @unknown_types, $type;
2262 if (@unknown_types) {
2263 my $num = @unknown_types;
2264 local $LIST_SEPARATOR = SPACE;
2266 $num unrecognized token types were input with --$short_name :
2271 @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
2273 foreach my $key ( keys %flags ) {
2274 my $flag = $flags{$key};
2276 if ( length($flag) != 1 ) {
2278 Multiple entries given for '$key' in '$short_name'
2281 elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2283 Unknown flag '$flag' given for '$key' in '$short_name'
2286 elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2288 Unknown flag '$flag' given for '$key' in '$short_name'
2292 $rkeep_break_hash->{$key} = $flag;
2295 if ( DEBUG_KB && @list ) {
2297 local $LIST_SEPARATOR = SPACE;
2300 DEBUG_KB -$short_name flag: $str
2309 } ## end sub initialize_keep_old_breakpoints
2311 sub initialize_global_option_vars {
2313 #------------------------------------------------------------
2314 # Make global vars for frequently used options for efficiency
2315 #------------------------------------------------------------
2317 $rOpts_add_newlines = $rOpts->{'add-newlines'};
2318 $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'};
2319 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
2320 $rOpts_blank_lines_after_opening_block =
2321 $rOpts->{'blank-lines-after-opening-block'};
2322 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
2323 $rOpts_block_brace_vertical_tightness =
2324 $rOpts->{'block-brace-vertical-tightness'};
2325 $rOpts_brace_follower_vertical_tightness =
2326 $rOpts->{'brace-follower-vertical-tightness'};
2327 $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
2328 $rOpts_break_at_old_attribute_breakpoints =
2329 $rOpts->{'break-at-old-attribute-breakpoints'};
2330 $rOpts_break_at_old_comma_breakpoints =
2331 $rOpts->{'break-at-old-comma-breakpoints'};
2332 $rOpts_break_at_old_keyword_breakpoints =
2333 $rOpts->{'break-at-old-keyword-breakpoints'};
2334 $rOpts_break_at_old_logical_breakpoints =
2335 $rOpts->{'break-at-old-logical-breakpoints'};
2336 $rOpts_break_at_old_semicolon_breakpoints =
2337 $rOpts->{'break-at-old-semicolon-breakpoints'};
2338 $rOpts_break_at_old_ternary_breakpoints =
2339 $rOpts->{'break-at-old-ternary-breakpoints'};
2340 $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
2341 $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
2342 $rOpts_closing_side_comment_else_flag =
2343 $rOpts->{'closing-side-comment-else-flag'};
2344 $rOpts_closing_side_comment_maximum_text =
2345 $rOpts->{'closing-side-comment-maximum-text'};
2346 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
2347 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
2348 $rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'};
2349 $rOpts_delete_closing_side_comments =
2350 $rOpts->{'delete-closing-side-comments'};
2351 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
2352 $rOpts_extended_continuation_indentation =
2353 $rOpts->{'extended-continuation-indentation'};
2354 $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
2355 $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'};
2356 $rOpts_delete_weld_interfering_commas =
2357 $rOpts->{'delete-weld-interfering-commas'};
2358 $rOpts_format_skipping = $rOpts->{'format-skipping'};
2359 $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
2360 $rOpts_function_paren_vertical_alignment =
2361 $rOpts->{'function-paren-vertical-alignment'};
2362 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
2363 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
2364 $rOpts_ignore_side_comment_lengths =
2365 $rOpts->{'ignore-side-comment-lengths'};
2366 $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
2367 $rOpts_indent_columns = $rOpts->{'indent-columns'};
2368 $rOpts_indent_only = $rOpts->{'indent-only'};
2369 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
2370 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
2371 $rOpts_extended_line_up_parentheses =
2372 $rOpts->{'extended-line-up-parentheses'};
2373 $rOpts_logical_padding = $rOpts->{'logical-padding'};
2374 $rOpts_maximum_consecutive_blank_lines =
2375 $rOpts->{'maximum-consecutive-blank-lines'};
2376 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
2377 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
2378 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
2379 $rOpts_opening_brace_always_on_right =
2380 $rOpts->{'opening-brace-always-on-right'};
2381 $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
2382 $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
2383 $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
2384 $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
2385 $rOpts_outdent_static_block_comments =
2386 $rOpts->{'outdent-static-block-comments'};
2387 $rOpts_recombine = $rOpts->{'recombine'};
2388 $rOpts_short_concatenation_item_length =
2389 $rOpts->{'short-concatenation-item-length'};
2390 $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'};
2391 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
2392 $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
2393 $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
2394 $rOpts_tee_pod = $rOpts->{'tee-pod'};
2395 $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
2396 $rOpts_valign_code = $rOpts->{'valign-code'};
2397 $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
2398 $rOpts_variable_maximum_line_length =
2399 $rOpts->{'variable-maximum-line-length'};
2401 # Note that both opening and closing tokens can access the opening
2402 # and closing flags of their container types.
2403 %opening_vertical_tightness = (
2404 '(' => $rOpts->{'paren-vertical-tightness'},
2405 '{' => $rOpts->{'brace-vertical-tightness'},
2406 '[' => $rOpts->{'square-bracket-vertical-tightness'},
2407 ')' => $rOpts->{'paren-vertical-tightness'},
2408 '}' => $rOpts->{'brace-vertical-tightness'},
2409 ']' => $rOpts->{'square-bracket-vertical-tightness'},
2412 %closing_vertical_tightness = (
2413 '(' => $rOpts->{'paren-vertical-tightness-closing'},
2414 '{' => $rOpts->{'brace-vertical-tightness-closing'},
2415 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
2416 ')' => $rOpts->{'paren-vertical-tightness-closing'},
2417 '}' => $rOpts->{'brace-vertical-tightness-closing'},
2418 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
2421 # assume flag for '>' same as ')' for closing qw quotes
2422 %closing_token_indentation = (
2423 ')' => $rOpts->{'closing-paren-indentation'},
2424 '}' => $rOpts->{'closing-brace-indentation'},
2425 ']' => $rOpts->{'closing-square-bracket-indentation'},
2426 '>' => $rOpts->{'closing-paren-indentation'},
2429 # flag indicating if any closing tokens are indented
2430 $some_closing_token_indentation =
2431 $rOpts->{'closing-paren-indentation'}
2432 || $rOpts->{'closing-brace-indentation'}
2433 || $rOpts->{'closing-square-bracket-indentation'}
2434 || $rOpts->{'indent-closing-brace'};
2436 %opening_token_right = (
2437 '(' => $rOpts->{'opening-paren-right'},
2438 '{' => $rOpts->{'opening-hash-brace-right'},
2439 '[' => $rOpts->{'opening-square-bracket-right'},
2442 %stack_opening_token = (
2443 '(' => $rOpts->{'stack-opening-paren'},
2444 '{' => $rOpts->{'stack-opening-hash-brace'},
2445 '[' => $rOpts->{'stack-opening-square-bracket'},
2448 %stack_closing_token = (
2449 ')' => $rOpts->{'stack-closing-paren'},
2450 '}' => $rOpts->{'stack-closing-hash-brace'},
2451 ']' => $rOpts->{'stack-closing-square-bracket'},
2454 } ## end sub initialize_global_option_vars
2456 sub initialize_line_length_vars {
2458 # Create a table of maximum line length vs level for later efficient use.
2459 # We will make the tables very long to be sure it will not be exceeded.
2460 # But we have to choose a fixed length. A check will be made at the start
2461 # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
2462 # my standard test problems have indentation levels of about 150, so this
2463 # should be fairly large. If the choice of a maximum level ever becomes
2464 # an issue then these table values could be returned in a sub with a simple
2465 # memoization scheme.
2467 # Also create a table of the maximum spaces available for text due to the
2468 # level only. If a line has continuation indentation, then that space must
2469 # be subtracted from the table value. This table is used for preliminary
2470 # estimates in welding, extended_ci, BBX, and marking short blocks.
2471 use constant LEVEL_TABLE_MAX => 1000;
2474 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2475 my $indent = $level * $rOpts_indent_columns;
2476 $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
2477 $maximum_text_length_at_level[$level] =
2478 $rOpts_maximum_line_length - $indent;
2481 # Correct the maximum_text_length table if the -wc=n flag is used
2482 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
2483 if ($rOpts_whitespace_cycle) {
2484 if ( $rOpts_whitespace_cycle > 0 ) {
2485 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2486 my $level_mod = $level % $rOpts_whitespace_cycle;
2487 my $indent = $level_mod * $rOpts_indent_columns;
2488 $maximum_text_length_at_level[$level] =
2489 $rOpts_maximum_line_length - $indent;
2493 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
2497 # Correct the tables if the -vmll flag is used. These values override the
2499 if ($rOpts_variable_maximum_line_length) {
2500 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2501 $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
2502 $maximum_line_length_at_level[$level] =
2503 $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
2507 # Define two measures of indentation level, alpha and beta, at which some
2508 # formatting features come under stress and need to start shutting down.
2509 # Some combination of the two will be used to shut down different
2510 # formatting features.
2511 # Put a reasonable upper limit on stress level (say 100) in case the
2512 # whitespace-cycle variable is used.
2513 my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
2515 # Find stress_level_alpha, targeted at very short maximum line lengths.
2516 $stress_level_alpha = $stress_level_limit + 1;
2517 foreach my $level_test ( 0 .. $stress_level_limit ) {
2518 my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
2519 my $excess_inside_space =
2521 $rOpts_continuation_indentation -
2522 $rOpts_indent_columns - 8;
2523 if ( $excess_inside_space <= 0 ) {
2524 $stress_level_alpha = $level_test;
2529 # Find stress level beta, a stress level targeted at formatting
2530 # at deep levels near the maximum line length. We start increasing
2531 # from zero and stop at the first level which shows no more space.
2533 # 'const' is a fixed number of spaces for a typical variable.
2534 # Cases b1197-b1204 work ok with const=12 but not with const=8
2536 my $denom = max( 1, $rOpts_indent_columns );
2537 $stress_level_beta = 0;
2538 foreach my $level ( 0 .. $stress_level_limit ) {
2539 my $remaining_cycles = max(
2542 $maximum_text_length_at_level[$level] -
2543 $rOpts_continuation_indentation - $const
2546 last if ( $remaining_cycles <= 3 ); # 2 does not work
2547 $stress_level_beta = $level;
2550 # This is a combined level which works well for turning off formatting
2551 # features in most cases:
2552 $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
2555 } ## end sub initialize_line_length_vars
2557 sub initialize_trailing_comma_rules {
2559 # Setup control hash for trailing commas
2561 # -wtc=s defines desired trailing comma policy:
2564 # [ both -atc and -dtc ignored ]
2566 # [requires -dtc; -atc ignored]
2568 # [requires -atc; -dtc ignored]
2569 # =m : multiline lists require trailing comma
2570 # if -atc set => will add missing multiline trailing commas
2571 # if -dtc set => will delete trailing single line commas
2572 # =b or 'bare' (multiline) lists require trailing comma
2573 # if -atc set => will add missing bare trailing commas
2574 # if -dtc set => will delete non-bare trailing commas
2575 # =h or 'hash': single column stable bare lists require trailing comma
2576 # if -atc set will add these
2577 # if -dtc set will delete other trailing commas
2579 #-------------------------------------------------------------------
2580 # This routine must be called after the alpha and beta stress levels
2581 # have been defined in sub 'initialize_line_length_vars'.
2582 #-------------------------------------------------------------------
2584 %trailing_comma_rules = ();
2586 my $rvalid_flags = [qw(0 1 * m b h i)];
2588 my $option = $rOpts->{'want-trailing-commas'};
2591 $option =~ s/^\s+//;
2592 $option =~ s/\s+$//;
2595 # We need to use length() here because '0' is a possible option
2596 if ( defined($option) && length($option) ) {
2599 my @q = @{$rvalid_flags};
2601 @is_valid_flag{@q} = (1) x scalar(@q);
2603 # handle single character control, such as -wtc='b'
2604 if ( length($option) == 1 ) {
2605 foreach (qw< ) ] } >) {
2606 $rule_hash{$_} = [ $option, EMPTY_STRING ];
2610 # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m'
2612 my @parts = split /\s+/, $option;
2613 foreach my $part (@parts) {
2614 if ( length($part) >= 2 && length($part) <= 3 ) {
2615 my $val = substr( $part, -1, 1 );
2616 my $key_o = substr( $part, -2, 1 );
2617 if ( $is_opening_token{$key_o} ) {
2618 my $paren_flag = EMPTY_STRING;
2619 if ( length($part) == 3 ) {
2620 $paren_flag = substr( $part, 0, 1 );
2622 my $key = $matching_token{$key_o};
2623 $rule_hash{$key} = [ $val, $paren_flag ];
2626 $error_message .= "Unrecognized term: '$part'\n";
2630 $error_message .= "Unrecognized term: '$part'\n";
2635 # check for valid control characters
2636 if ( !$error_message ) {
2637 foreach my $key ( keys %rule_hash ) {
2638 my $item = $rule_hash{$key};
2639 my ( $val, $paren_flag ) = @{$item};
2640 if ( $val && !$is_valid_flag{$val} ) {
2641 my $valid_str = join( SPACE, @{$rvalid_flags} );
2643 "Unexpected value '$val'; must be one of: $valid_str\n";
2647 if ( $paren_flag !~ /^[kKfFwW]$/ ) {
2649 "Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
2652 if ( $key ne ')' ) {
2654 "paren flag '$paren_flag' is only allowed before a '('\n";
2661 if ($error_message) {
2663 Error parsing --want-trailing-commas='$option':
2668 # Set the control hash if no errors
2670 %trailing_comma_rules = %rule_hash;
2674 # Both adding and deleting commas can lead to instability in extreme cases
2675 if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) {
2677 # If the possible instability is significant, then we can turn off
2678 # -dtc as a defensive measure to prevent it.
2680 # We must turn off -dtc for very small values of --whitespace-cycle
2681 # to avoid instability. A minimum value of -wc=3 fixes b1393, but a
2682 # value of 4 is used here for safety. This parameter is seldom used,
2683 # and much larger than this when used, so the cutoff value is not
2685 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) {
2686 $rOpts_delete_trailing_commas = 0;
2691 } ## end sub initialize_trailing_comma_rules
2693 sub initialize_whitespace_hashes {
2695 # This is called once before formatting begins to initialize these global
2696 # hashes, which control the use of whitespace around tokens:
2701 # %space_after_keyword
2703 # Many token types are identical to the tokens themselves.
2704 # See the tokenizer for a complete list. Here are some special types:
2706 # f = semicolon in for statement
2709 # Note that :: is excluded since it should be contained in an identifier
2710 # Note that '->' is excluded because it never gets space
2711 # parentheses and brackets are excluded since they are handled specially
2712 # curly braces are included but may be overridden by logic, such as
2715 # NEW_TOKENS: create a whitespace rule here. This can be as
2716 # simple as adding your new letter to @spaces_both_sides, for
2719 my @spaces_both_sides = qw#
2720 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2721 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2722 &&= ||= //= <=> A k f w F n C Y U G v
2725 my @spaces_left_side = qw<
2726 t ! ~ m p { \ h pp mm Z j
2728 push( @spaces_left_side, '#' ); # avoids warning message
2730 my @spaces_right_side = qw<
2731 ; } ) ] R J ++ -- **=
2733 push( @spaces_right_side, ',' ); # avoids warning message
2735 %want_left_space = ();
2736 %want_right_space = ();
2737 %binary_ws_rules = ();
2739 # Note that we setting defaults here. Later in processing
2740 # the values of %want_left_space and %want_right_space
2741 # may be overridden by any user settings specified by the
2742 # -wls and -wrs parameters. However the binary_whitespace_rules
2743 # are hardwired and have priority.
2744 @want_left_space{@spaces_both_sides} =
2745 (1) x scalar(@spaces_both_sides);
2746 @want_right_space{@spaces_both_sides} =
2747 (1) x scalar(@spaces_both_sides);
2748 @want_left_space{@spaces_left_side} =
2749 (1) x scalar(@spaces_left_side);
2750 @want_right_space{@spaces_left_side} =
2751 (-1) x scalar(@spaces_left_side);
2752 @want_left_space{@spaces_right_side} =
2753 (-1) x scalar(@spaces_right_side);
2754 @want_right_space{@spaces_right_side} =
2755 (1) x scalar(@spaces_right_side);
2756 $want_left_space{'->'} = WS_NO;
2757 $want_right_space{'->'} = WS_NO;
2758 $want_left_space{'**'} = WS_NO;
2759 $want_right_space{'**'} = WS_NO;
2760 $want_right_space{'CORE::'} = WS_NO;
2762 # These binary_ws_rules are hardwired and have priority over the above
2763 # settings. It would be nice to allow adjustment by the user,
2764 # but it would be complicated to specify.
2766 # hash type information must stay tightly bound
2768 $binary_ws_rules{'i'}{'L'} = WS_NO;
2769 $binary_ws_rules{'i'}{'{'} = WS_YES;
2770 $binary_ws_rules{'k'}{'{'} = WS_YES;
2771 $binary_ws_rules{'U'}{'{'} = WS_YES;
2772 $binary_ws_rules{'i'}{'['} = WS_NO;
2773 $binary_ws_rules{'R'}{'L'} = WS_NO;
2774 $binary_ws_rules{'R'}{'{'} = WS_NO;
2775 $binary_ws_rules{'t'}{'L'} = WS_NO;
2776 $binary_ws_rules{'t'}{'{'} = WS_NO;
2777 $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
2778 $binary_ws_rules{'}'}{'L'} = WS_NO;
2779 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
2780 $binary_ws_rules{'$'}{'L'} = WS_NO;
2781 $binary_ws_rules{'$'}{'{'} = WS_NO;
2782 $binary_ws_rules{'@'}{'L'} = WS_NO;
2783 $binary_ws_rules{'@'}{'{'} = WS_NO;
2784 $binary_ws_rules{'='}{'L'} = WS_YES;
2785 $binary_ws_rules{'J'}{'J'} = WS_YES;
2787 # the following includes ') {'
2788 # as in : if ( xxx ) { yyy }
2789 $binary_ws_rules{']'}{'L'} = WS_NO;
2790 $binary_ws_rules{']'}{'{'} = WS_NO;
2791 $binary_ws_rules{')'}{'{'} = WS_YES;
2792 $binary_ws_rules{')'}{'['} = WS_NO;
2793 $binary_ws_rules{']'}{'['} = WS_NO;
2794 $binary_ws_rules{']'}{'{'} = WS_NO;
2795 $binary_ws_rules{'}'}{'['} = WS_NO;
2796 $binary_ws_rules{'R'}{'['} = WS_NO;
2798 $binary_ws_rules{']'}{'++'} = WS_NO;
2799 $binary_ws_rules{']'}{'--'} = WS_NO;
2800 $binary_ws_rules{')'}{'++'} = WS_NO;
2801 $binary_ws_rules{')'}{'--'} = WS_NO;
2803 $binary_ws_rules{'R'}{'++'} = WS_NO;
2804 $binary_ws_rules{'R'}{'--'} = WS_NO;
2806 $binary_ws_rules{'i'}{'Q'} = WS_YES;
2807 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
2809 $binary_ws_rules{'i'}{'('} = WS_NO;
2811 $binary_ws_rules{'w'}{'('} = WS_NO;
2812 $binary_ws_rules{'w'}{'{'} = WS_YES;
2815 } ## end sub initialize_whitespace_hashes
2817 { #<<< begin closure set_whitespace_flags
2819 my %is_special_ws_type;
2825 # The following hash is used to skip over needless if tests.
2826 # Be sure to update it when adding new checks in its block.
2827 my @q = qw(k w C m - Q);
2829 @is_special_ws_type{@q} = (1) x scalar(@q);
2831 # These hashes replace slower regex tests
2833 @is_wCUG{@q} = (1) x scalar(@q);
2836 @is_wi{@q} = (1) x scalar(@q);
2839 use constant DEBUG_WHITE => 0;
2841 # Hashes to set spaces around container tokens according to their
2842 # sequence numbers. These are set as keywords are examined.
2843 # They are controlled by the -kpit and -kpitl flags.
2844 my %opening_container_inside_ws;
2845 my %closing_container_inside_ws;
2847 sub set_whitespace_flags {
2849 # This routine is called once per file to set whitespace flags for that
2850 # file. This routine examines each pair of nonblank tokens and sets a flag
2851 # indicating if white space is needed.
2853 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2854 # BEFORE token $j is needed, with the following values:
2856 # WS_NO = -1 do not want a space BEFORE token $j
2857 # WS_OPTIONAL= 0 optional space or $j is a whitespace
2858 # WS_YES = 1 want a space BEFORE token $j
2863 my $j_tight_closing_paren = -1;
2864 my $rLL = $self->[_rLL_];
2865 my $jmax = @{$rLL} - 1;
2867 %opening_container_inside_ws = ();
2868 %closing_container_inside_ws = ();
2870 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2872 my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
2873 my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2874 my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
2876 my $rwhitespace_flags = [];
2877 my $ris_function_call_paren = {};
2879 return $rwhitespace_flags if ( $jmax < 0 );
2881 my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2883 my $last_token = SPACE;
2884 my $last_type = 'b';
2886 my $rtokh_last = [ @{ $rLL->[0] } ];
2887 $rtokh_last->[_TOKEN_] = $last_token;
2888 $rtokh_last->[_TYPE_] = $last_type;
2889 $rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING;
2890 $rtokh_last->[_LINE_INDEX_] = 0;
2892 my $rtokh_last_last = $rtokh_last;
2894 my ( $ws_1, $ws_2, $ws_3, $ws_4 );
2896 # main loop over all tokens to define the whitespace flags
2897 my $last_type_is_opening;
2898 my ( $token, $type );
2900 foreach my $rtokh ( @{$rLL} ) {
2904 $type = $rtokh->[_TYPE_];
2905 if ( $type eq 'b' ) {
2906 $rwhitespace_flags->[$j] = WS_OPTIONAL;
2910 $token = $rtokh->[_TOKEN_];
2914 #---------------------------------------------------------------
2915 # Whitespace Rules Section 1:
2916 # Handle space on the inside of opening braces.
2917 #---------------------------------------------------------------
2920 if ($last_type_is_opening) {
2922 $last_type_is_opening = 0;
2924 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2925 my $block_type = $rblock_type_of_seqno->{$seqno};
2926 my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
2927 my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
2929 $j_tight_closing_paren = -1;
2931 # let us keep empty matched braces together: () {} []
2933 if ( $token eq $matching_token{$last_token} ) {
2943 # we're considering the right of an opening brace
2944 # tightness = 0 means always pad inside with space
2945 # tightness = 1 means pad inside if "complex"
2946 # tightness = 2 means never pad inside with space
2949 if ( $last_type eq '{'
2950 && $last_token eq '{'
2951 && $last_block_type )
2953 $tightness = $rOpts_block_brace_tightness;
2955 else { $tightness = $tightness{$last_token} }
2957 #=============================================================
2958 # Patch for test problem <<snippets/fabrice_bug.in>>
2959 # We must always avoid spaces around a bare word beginning
2961 # my $before = ${^PREMATCH};
2962 # Because all of the following cause an error in perl:
2963 # my $before = ${ ^PREMATCH };
2964 # my $before = ${ ^PREMATCH};
2965 # my $before = ${^PREMATCH };
2966 # So if brace tightness flag is -bt=0 we must temporarily reset
2967 # to bt=1. Note that here we must set tightness=1 and not 2 so
2968 # that the closing space is also avoided
2969 # (via the $j_tight_closing_paren flag in coding)
2970 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2972 #=============================================================
2974 if ( $tightness <= 0 ) {
2977 elsif ( $tightness > 1 ) {
2982 # find the index of the closing token
2984 $self->[_K_closing_container_]->{$last_seqno};
2986 # If the closing token is less than five characters ahead
2987 # we must take a closer look
2988 if ( defined($j_closing)
2989 && $j_closing - $j < 5
2990 && $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq
2994 ws_in_container( $j, $j_closing, $rLL, $type, $token,
2996 if ( $ws == WS_NO ) {
2997 $j_tight_closing_paren = $j_closing;
3006 # check for special cases which override the above rules
3007 if ( %opening_container_inside_ws && $last_seqno ) {
3008 my $ws_override = $opening_container_inside_ws{$last_seqno};
3009 if ($ws_override) { $ws = $ws_override }
3012 $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
3015 } ## end setting space flag inside opening tokens
3017 #---------------------------------------------------------------
3018 # Whitespace Rules Section 2:
3019 # Special checks for certain types ...
3020 #---------------------------------------------------------------
3021 # The hash '%is_special_ws_type' significantly speeds up this routine,
3022 # but be sure to update it if a new check is added.
3023 # Currently has types: qw(k w C m - Q #)
3024 if ( $is_special_ws_type{$type} ) {
3026 if ( $type eq 'k' ) {
3028 # Keywords 'for', 'foreach' are special cases for -kpit since
3029 # the opening paren does not always immediately follow the
3030 # keyword. So we have to search forward for the paren in this
3031 # case. I have limited the search to 10 tokens ahead, just in
3032 # case somebody has a big file and no opening paren. This
3033 # should be enough for all normal code. Added the level check
3035 if ( $is_for_foreach{$token}
3036 && %keyword_paren_inner_tightness
3037 && defined( $keyword_paren_inner_tightness{$token} )
3040 my $level = $rLL->[$j]->[_LEVEL_];
3042 ## NOTE: we might use the KNEXT variable to avoid this loop
3043 ## but profiling shows that little would be saved
3044 foreach my $inc ( 1 .. 9 ) {
3046 last if ( $jp > $jmax );
3047 last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
3048 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
3049 my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
3050 set_container_ws_by_keyword( $token, $seqno_p );
3057 elsif ( $type eq '#' ) {
3059 # newline before block comment ($j==0), and
3060 # space before side comment ($j>0), so ..
3063 #---------------------------------
3064 # Nothing more to do for a comment
3065 #---------------------------------
3066 $rwhitespace_flags->[$j] = $ws;
3070 # retain any space between '-' and bare word
3071 elsif ( $type eq 'w' || $type eq 'C' ) {
3072 $ws = WS_OPTIONAL if $last_type eq '-';
3075 # retain any space between '-' and bare word; for example
3076 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
3077 # $myhash{USER-NAME}='steve';
3078 elsif ( $type eq 'm' || $type eq '-' ) {
3079 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
3082 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
3083 # allow a space between a backslash and single or double quote
3084 # to avoid fooling html formatters
3085 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
3087 if ($rOpts_space_backslash_quote) {
3088 if ( $rOpts_space_backslash_quote == 1 ) {
3091 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
3092 else { } # shouldnt happen
3098 } ## end elsif ( $is_special_ws_type{$type} ...
3100 #---------------------------------------------------------------
3101 # Whitespace Rules Section 3:
3102 # Handle space on inside of closing brace pairs.
3103 #---------------------------------------------------------------
3106 elsif ( $is_closing_type{$type} ) {
3108 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3109 if ( $j == $j_tight_closing_paren ) {
3111 $j_tight_closing_paren = -1;
3116 if ( !defined($ws) ) {
3119 my $block_type = $rblock_type_of_seqno->{$seqno};
3120 if ( $type eq '}' && $token eq '}' && $block_type ) {
3121 $tightness = $rOpts_block_brace_tightness;
3123 else { $tightness = $tightness{$token} }
3125 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
3129 # check for special cases which override the above rules
3130 if ( %closing_container_inside_ws && $seqno ) {
3131 my $ws_override = $closing_container_inside_ws{$seqno};
3132 if ($ws_override) { $ws = $ws_override }
3135 $ws_4 = $ws_3 = $ws_2 = $ws
3137 } ## end setting space flag inside closing tokens
3139 #---------------------------------------------------------------
3140 # Whitespace Rules Section 4:
3141 #---------------------------------------------------------------
3143 elsif ( $is_opening_type{$type} ) {
3145 $last_type_is_opening = 1;
3147 if ( $token eq '(' ) {
3149 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3151 # This will have to be tweaked as tokenization changes.
3152 # We usually want a space at '} (', for example:
3153 # <<snippets/space1.in>>
3154 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
3157 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
3158 # At present, the above & block is marked as type L/R so this
3159 # case won't go through here.
3160 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
3162 # NOTE: some older versions of Perl had occasional problems if
3163 # spaces are introduced between keywords or functions and
3164 # opening parens. So the default is not to do this except is
3165 # certain cases. The current Perl seems to tolerate spaces.
3167 # Space between keyword and '('
3168 elsif ( $last_type eq 'k' ) {
3170 unless ( $rOpts_space_keyword_paren
3171 || $space_after_keyword{$last_token} );
3173 # Set inside space flag if requested
3174 set_container_ws_by_keyword( $last_token, $seqno );
3177 # Space between function and '('
3178 # -----------------------------------------------------
3179 # 'w' and 'i' checks for something like:
3180 # myfun( &myfun( ->myfun(
3181 # -----------------------------------------------------
3183 # Note that at this point an identifier may still have a
3184 # leading arrow, but the arrow will be split off during token
3185 # respacing. After that, the token may become a bare word
3186 # without leading arrow. The point is, it is best to mark
3187 # function call parens right here before that happens.
3188 # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
3189 # NOTE: this would be the place to allow spaces between
3190 # repeated parens, like () () (), as in case c017, but I
3191 # decided that would not be a good idea.
3193 # Updated to allow detached '->' from tokenizer (issue c140)
3197 $is_wCUG{$last_type}
3206 # with prefix '->' or '&'
3207 $last_token =~ /^([\&]|->)/
3209 # or preceding token '->' (see b1337; c140)
3210 || $rtokh_last_last->[_TYPE_] eq '->'
3212 # or preceding sub call operator token '&'
3213 || ( $rtokh_last_last->[_TYPE_] eq 't'
3214 && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ )
3220 $rOpts_space_function_paren
3221 ? $self->ws_space_function_paren( $j, $rtokh_last_last )
3224 set_container_ws_by_keyword( $last_token, $seqno );
3225 $ris_function_call_paren->{$seqno} = 1;
3228 # space between something like $i and ( in 'snippets/space2.in'
3229 # for $i ( 0 .. 20 ) {
3230 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
3234 # allow constant function followed by '()' to retain no space
3235 elsif ($last_type eq 'C'
3236 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
3242 # patch for SWITCH/CASE: make space at ']{' optional
3243 # since the '{' might begin a case or when block
3244 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
3248 # keep space between 'sub' and '{' for anonymous sub definition,
3249 # be sure type = 'k' (added for c140)
3250 if ( $type eq '{' ) {
3251 if ( $last_token eq 'sub' && $last_type eq 'k' ) {
3255 # this is needed to avoid no space in '){'
3256 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
3258 # avoid any space before the brace or bracket in something like
3259 # @opts{'a','b',...}
3260 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
3264 } ## end if ( $is_opening_type{$type} ) {
3266 # always preserve whatever space was used after a possible
3267 # filehandle (except _) or here doc operator
3270 ( $last_type eq 'Z' && $last_token ne '_' )
3271 || $last_type eq 'h'
3273 && $type ne '#' # no longer required due to early exit for '#' above
3282 if ( !defined($ws) ) {
3284 #---------------------------------------------------------------
3285 # Whitespace Rules Section 4:
3286 # Use the binary rule table.
3287 #---------------------------------------------------------------
3288 if ( defined( $binary_ws_rules{$last_type}{$type} ) ) {
3289 $ws = $binary_ws_rules{$last_type}{$type};
3290 $ws_4 = $ws if DEBUG_WHITE;
3293 #---------------------------------------------------------------
3294 # Whitespace Rules Section 5:
3295 # Apply default rules not covered above.
3296 #---------------------------------------------------------------
3298 # If we fall through to here, look at the pre-defined hash tables
3299 # for the two tokens, and:
3300 # if (they are equal) use the common value
3301 # if (either is zero or undef) use the other
3302 # if (either is -1) use it
3317 my $wl = $want_left_space{$type};
3318 my $wr = $want_right_space{$last_type};
3319 if ( !defined($wl) ) {
3320 $ws = defined($wr) ? $wr : 0;
3322 elsif ( !defined($wr) ) {
3327 ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3332 # Treat newline as a whitespace. Otherwise, we might combine
3333 # 'Send' and '-recipients' here according to the above rules:
3334 # <<snippets/space3.in>>
3335 # my $msg = new Fax::Send
3336 # -recipients => $to,
3339 && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3344 $rwhitespace_flags->[$j] = $ws;
3346 # remember non-blank, non-comment tokens
3347 $last_token = $token;
3349 $rtokh_last_last = $rtokh_last;
3350 $rtokh_last = $rtokh;
3352 next if ( !DEBUG_WHITE );
3354 my $str = substr( $last_token, 0, 15 );
3355 $str .= SPACE x ( 16 - length($str) );
3356 if ( !defined($ws_1) ) { $ws_1 = "*" }
3357 if ( !defined($ws_2) ) { $ws_2 = "*" }
3358 if ( !defined($ws_3) ) { $ws_3 = "*" }
3359 if ( !defined($ws_4) ) { $ws_4 = "*" }
3361 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3363 # reset for next pass
3364 $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3368 if ( $rOpts->{'tight-secret-operators'} ) {
3369 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3371 $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3372 return $rwhitespace_flags;
3374 } ## end sub set_whitespace_flags
3376 sub set_container_ws_by_keyword {
3378 my ( $word, $sequence_number ) = @_;
3379 return unless (%keyword_paren_inner_tightness);
3381 # We just saw a keyword (or other function name) followed by an opening
3382 # paren. Now check to see if the following paren should have special
3383 # treatment for its inside space. If so we set a hash value using the
3384 # sequence number as key.
3385 if ( $word && $sequence_number ) {
3386 my $tightness = $keyword_paren_inner_tightness{$word};
3387 if ( defined($tightness) && $tightness != 1 ) {
3388 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
3389 $opening_container_inside_ws{$sequence_number} = $ws_flag;
3390 $closing_container_inside_ws{$sequence_number} = $ws_flag;
3394 } ## end sub set_container_ws_by_keyword
3396 sub ws_in_container {
3398 my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_;
3401 # $j = index of token following an opening container token
3402 # $type, $token = the type and token at index $j
3403 # $j_closing = closing token of the container
3404 # $last_token = the opening token of the container
3406 # WS_NO if there is just one token in the container (with exceptions)
3409 #------------------------------------
3410 # Look forward for the closing token;
3411 #------------------------------------
3412 if ( $j + 1 > $j_closing ) { return WS_NO }
3414 # Patch to count '-foo' as single token so that
3415 # each of $a{-foo} and $a{foo} and $a{'foo'} do
3416 # not get spaces with default formatting.
3420 && $last_token eq '{'
3421 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
3423 # Patch to count a sign separated from a number as a single token, as
3424 # in the following line. Otherwise, it takes two steps to converge:
3426 if ( ( $type eq 'm' || $type eq 'p' )
3427 && $j < $j_closing + 1
3428 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
3429 && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
3430 && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
3435 # $j_next is where a closing token should be if the container has
3436 # just a "single" token
3437 if ( $j_here + 1 > $j_closing ) { return WS_NO }
3439 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
3443 #-----------------------------------------------------------------
3444 # Now decide: if we get to the closing token we will keep it tight
3445 #-----------------------------------------------------------------
3447 $j_next == $j_closing
3449 # OLD PROBLEM: but watch out for this: [ [ ] (misc.t)
3450 # No longer necessary because of the previous check on sequence numbers
3451 ##&& $last_token ne $token
3453 # double diamond is usually spaced
3463 } ## end sub ws_in_container
3465 sub ws_space_function_paren {
3467 my ( $self, $j, $rtokh_last_last ) = @_;
3469 # Called if --space-function-paren is set to see if it might cause
3470 # a problem. The manual warns the user about potential problems with
3471 # this flag. Here we just try to catch one common problem.
3474 # $j = index of '(' after function name
3479 # This was added to fix for issue c166. Ignore -sfp at a possible indirect
3480 # object location. For example, do not convert this:
3481 # print header() ...
3483 # print header () ...
3484 # because in this latter form, header may be taken to be a file handle
3485 # instead of a function call.
3487 # Start with the normal value for -sfp:
3490 # now check to be sure we don't cause a problem:
3491 my $type_ll = $rtokh_last_last->[_TYPE_];
3492 my $tok_ll = $rtokh_last_last->[_TOKEN_];
3494 # NOTE: this is just a minimal check. For example, we might also check
3495 # for something like this:
3496 # print ( header ( ..
3497 if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) {
3503 } ## end sub ws_space_function_paren
3505 } ## end closure set_whitespace_flags
3507 sub dump_want_left_space {
3509 local $LIST_SEPARATOR = "\n";
3511 These values are the main control of whitespace to the left of a token type;
3512 They may be altered with the -wls parameter.
3513 For a list of token types, use perltidy --dump-token-types (-dtt)
3514 1 means the token wants a space to its left
3515 -1 means the token does not want a space to its left
3516 ------------------------------------------------------------------------
3518 foreach my $key ( sort keys %want_left_space ) {
3519 $fh->print("$key\t$want_left_space{$key}\n");
3522 } ## end sub dump_want_left_space
3524 sub dump_want_right_space {
3526 local $LIST_SEPARATOR = "\n";
3528 These values are the main control of whitespace to the right of a token type;
3529 They may be altered with the -wrs parameter.
3530 For a list of token types, use perltidy --dump-token-types (-dtt)
3531 1 means the token wants a space to its right
3532 -1 means the token does not want a space to its right
3533 ------------------------------------------------------------------------
3535 foreach my $key ( sort keys %want_right_space ) {
3536 $fh->print("$key\t$want_right_space{$key}\n");
3539 } ## end sub dump_want_right_space
3541 { ## begin closure is_essential_whitespace
3543 my %is_sort_grep_map;
3547 my %essential_whitespace_filter_l1;
3548 my %essential_whitespace_filter_r1;
3549 my %essential_whitespace_filter_l2;
3550 my %essential_whitespace_filter_r2;
3551 my %is_type_with_space_before_bareword;
3552 my %is_special_variable_char;
3558 # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3559 # grep aliases on purpose, since here we are looking parens, not braces
3560 @q = qw(sort grep map);
3561 @is_sort_grep_map{@q} = (1) x scalar(@q);
3563 @q = qw(for foreach);
3564 @is_for_foreach{@q} = (1) x scalar(@q);
3567 .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
3568 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3570 @is_digraph{@q} = (1) x scalar(@q);
3572 @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3573 @is_trigraph{@q} = (1) x scalar(@q);
3575 # These are used as a speedup filters for sub is_essential_whitespace.
3578 # These left side token types USUALLY do not require a space:
3579 @q = qw( ; { } [ ] L R );
3583 @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3585 # BUT some might if followed by these right token types
3586 @q = qw( pp mm << <<= h );
3587 @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3590 # These right side filters usually do not require a space
3594 @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3596 # BUT some might if followed by these left token types
3598 @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3600 # Keep a space between certain types and any bareword:
3601 # Q: keep a space between a quote and a bareword to prevent the
3602 # bareword from becoming a quote modifier.
3603 # &: do not remove space between an '&' and a bare word because
3604 # it may turn into a function evaluation, like here
3605 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3606 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3608 @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3610 # These are the only characters which can (currently) form special
3611 # variables, like $^W: (issue c066, c068).
3613 qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3614 @{is_special_variable_char}{@q} = (1) x scalar(@q);
3618 sub is_essential_whitespace {
3620 # Essential whitespace means whitespace which cannot be safely deleted
3621 # without risking the introduction of a syntax error.
3622 # We are given three tokens and their types:
3623 # ($tokenl, $typel) is the token to the left of the space in question
3624 # ($tokenr, $typer) is the token to the right of the space in question
3625 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3627 # Note1: This routine should almost never need to be changed. It is
3628 # for avoiding syntax problems rather than for formatting.
3630 # Note2: The -mangle option causes large numbers of calls to this
3631 # routine and therefore is a good test. So if a change is made, be sure
3632 # to use nytprof to profile with both old and reviesed coding using the
3633 # -mangle option and check differences.
3635 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3637 # This is potentially a very slow routine but the following quick
3638 # filters typically catch and handle over 90% of the calls.
3640 # Filter 1: usually no space required after common types ; , [ ] { } ( )
3642 if ( $essential_whitespace_filter_l1{$typel}
3643 && !$essential_whitespace_filter_r1{$typer} );
3645 # Filter 2: usually no space before common types ; ,
3647 if ( $essential_whitespace_filter_r2{$typer}
3648 && !$essential_whitespace_filter_l2{$typel} );
3650 # Filter 3: Handle side comments: a space is only essential if the left
3651 # token ends in '$' For example, we do not want to create $#foo below:
3660 # Also, I prefer not to put a ? and # together because ? used to be
3661 # a pattern delimiter and spacing was used if guessing was needed.
3663 if ( $typer eq '#' ) {
3667 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3671 my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3672 my $tokenr_is_open_paren = $tokenr eq '(';
3673 my $token_joined = $tokenl . $tokenr;
3674 my $tokenl_is_dash = $tokenl eq '-';
3678 # never combine two bare words or numbers
3679 # examples: and ::ok(1)
3681 # for bla::bla:: abc
3682 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3683 # $input eq"quit" to make $inputeq"quit"
3684 # my $size=-s::SINK if $file; <==OK but we won't do it
3685 # don't join something like: for bla::bla:: abc
3686 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3687 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3688 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3690 # do not combine a number with a concatenation dot
3691 # example: pom.caputo:
3692 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3693 || $typel eq 'n' && $tokenr eq '.'
3694 || $typer eq 'n' && $tokenl eq '.'
3696 # cases of a space before a bareword...
3698 $tokenr_is_bareword && (
3700 # do not join a minus with a bare word, because you might form
3701 # a file test operator. Example from Complex.pm:
3702 # if (CORE::abs($z - i) < $eps);
3703 # "z-i" would be taken as a file test.
3704 $tokenl_is_dash && length($tokenr) == 1
3706 # and something like this could become ambiguous without space
3708 # use constant III=>1;
3712 || $tokenl_is_dash && $typer =~ /^[wC]$/
3714 # keep space between types Q & and a bareword
3715 || $is_type_with_space_before_bareword{$typel}
3717 # +-: binary plus and minus before a bareword could get
3718 # converted into unary plus and minus on next pass through the
3719 # tokenizer. This can lead to blinkers: cases b660 b670 b780
3720 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3721 # follows an operator
3722 || ( ( $typel eq '+' || $typel eq '-' )
3723 && $typell !~ /^[niC\)\}\]R]$/ )
3725 # keep a space between a token ending in '$' and any word;
3726 # this caused trouble: "die @$ if $@"
3727 || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3729 # don't combine $$ or $# with any alphanumeric
3730 # (testfile mangle.t with --mangle)
3735 ) ## end $tokenr_is_bareword
3738 # '= -' should not become =- or you will get a warning
3740 # || ($tokenr eq '-')
3742 # do not join a bare word with a minus, like between 'Send' and
3743 # '-recipients' here <<snippets/space3.in>>
3744 # my $msg = new Fax::Send
3745 # -recipients => $to,
3747 # This is the safest thing to do. If we had the token to the right of
3748 # the minus we could do a better check.
3750 # And do not combine a bareword and a quote, like this:
3751 # oops "Your login, $Bad_Login, is not valid";
3752 # It can cause a syntax error if oops is a sub
3753 || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3755 # perl is very fussy about spaces before <<
3756 || substr( $tokenr, 0, 2 ) eq '<<'
3758 # avoid combining tokens to create new meanings. Example:
3759 # $a+ +$b must not become $a++$b
3760 || ( $is_digraph{$token_joined} )
3761 || $is_trigraph{$token_joined}
3763 # another example: do not combine these two &'s:
3764 # allow_options & &OPT_EXECCGI
3765 || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3767 # retain any space after possible filehandle
3768 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3771 # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3772 # space after type Y. Otherwise, it will get parsed as type 'Z' later
3773 # and any space would have to be added back manually if desired.
3776 # Perl is sensitive to whitespace after the + here:
3777 # $b = xvals $a + 0.1 * yvals $a;
3778 || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3781 $tokenr_is_open_paren && (
3783 # keep paren separate in 'use Foo::Bar ()'
3784 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3786 # OLD: keep any space between filehandle and paren:
3787 # file mangle.t with --mangle:
3788 # NEW: this test is no longer necessary here (moved above)
3791 # must have space between grep and left paren; "grep(" will fail
3792 || $is_sort_grep_map{$tokenl}
3794 # don't stick numbers next to left parens, as in:
3795 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3798 ) ## end $tokenr_is_open_paren
3800 # retain any space after here doc operator ( hereerr.t)
3803 # be careful with a space around ++ and --, to avoid ambiguity as to
3804 # which token it applies
3805 || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3806 || ( $typel eq '++' || $typel eq '--' )
3807 && $tokenr !~ /^[\;\}\)\]]/
3809 # need space after foreach my; for example, this will fail in
3810 # older versions of Perl:
3811 # foreach my$ft(@filetypes)...
3815 && substr( $tokenr, 0, 1 ) eq '$'
3818 && $is_for_foreach{$tokenll}
3821 # Keep space after like $^ if needed to avoid forming a different
3822 # special variable (issue c068). For example:
3823 # my $aa = $^ ? "none" : "ok";
3825 && length($tokenl) == 2
3826 && substr( $tokenl, 1, 1 ) eq '^'
3827 && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3829 # We must be sure that a space between a ? and a quoted string
3830 # remains if the space before the ? remains. [Loca.pm, lockarea]
3832 # $b=join $comma ? ',' : ':', @_; # ok
3833 # $b=join $comma?',' : ':', @_; # ok!
3834 # $b=join $comma ?',' : ':', @_; # error!
3835 # Not really required:
3836 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
3838 # Space stacked labels...
3839 # Not really required: Perl seems to accept non-spaced labels.
3840 ## || $typel eq 'J' && $typer eq 'J'
3842 ; # the value of this long logic sequence is the result we want
3844 } ## end sub is_essential_whitespace
3845 } ## end closure is_essential_whitespace
3847 { ## begin closure new_secret_operator_whitespace
3849 my %secret_operators;
3850 my %is_leading_secret_token;
3854 # token lists for perl secret operators as compiled by Philippe Bruhat
3855 # at: https://metacpan.org/module/perlsecret
3856 %secret_operators = (
3857 'Goatse' => [qw#= ( ) =#], #=( )=
3858 'Venus1' => [qw#0 +#], # 0+
3859 'Venus2' => [qw#+ 0#], # +0
3860 'Enterprise' => [qw#) x ! !#], # ()x!!
3861 'Kite1' => [qw#~ ~ <>#], # ~~<>
3862 'Kite2' => [qw#~~ <>#], # ~~<>
3863 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
3864 'Bang bang ' => [qw#! !#], # !!
3867 # The following operators and constants are not included because they
3868 # are normally kept tight by perltidy:
3872 # Make a lookup table indexed by the first token of each operator:
3873 # first token => [list, list, ...]
3874 foreach my $value ( values(%secret_operators) ) {
3875 my $tok = $value->[0];
3876 push @{ $is_leading_secret_token{$tok} }, $value;
3880 sub new_secret_operator_whitespace {
3882 my ( $rlong_array, $rwhitespace_flags ) = @_;
3884 # Loop over all tokens in this line
3885 my ( $token, $type );
3886 my $jmax = @{$rlong_array} - 1;
3887 foreach my $j ( 0 .. $jmax ) {
3889 $token = $rlong_array->[$j]->[_TOKEN_];
3890 $type = $rlong_array->[$j]->[_TYPE_];
3892 # Skip unless this token might start a secret operator
3893 next if ( $type eq 'b' );
3894 next unless ( $is_leading_secret_token{$token} );
3896 # Loop over all secret operators with this leading token
3897 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
3899 foreach my $tok ( @{$rpattern} ) {
3904 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
3906 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
3915 # set flags to prevent spaces within this operator
3916 foreach my $jj ( $j + 1 .. $jend ) {
3917 $rwhitespace_flags->[$jj] = WS_NO;
3922 } ## End Loop over all operators
3923 } ## End loop over all tokens
3925 } ## end sub new_secret_operator_whitespace
3926 } ## end closure new_secret_operator_whitespace
3928 { ## begin closure set_bond_strengths
3930 # These routines and variables are involved in deciding where to break very
3933 my %is_good_keyword_breakpoint;
3935 my %is_container_token;
3937 my %binary_bond_strength_nospace;
3938 my %binary_bond_strength;
3947 sub initialize_bond_strength_hashes {
3950 @q = qw(if unless while until for foreach);
3951 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
3953 @q = qw(lt gt le ge);
3954 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3956 @q = qw/ ( [ { } ] ) /;
3957 @is_container_token{@q} = (1) x scalar(@q);
3959 # The decision about where to break a line depends upon a "bond
3960 # strength" between tokens. The LOWER the bond strength, the MORE
3961 # likely a break. A bond strength may be any value but to simplify
3962 # things there are several pre-defined strength levels:
3964 # NO_BREAK => 10000;
3965 # VERY_STRONG => 100;
3969 # VERY_WEAK => 0.55;
3971 # The strength values are based on trial-and-error, and need to be
3972 # tweaked occasionally to get desired results. Some comments:
3974 # 1. Only relative strengths are important. small differences
3975 # in strengths can make big formatting differences.
3976 # 2. Each indentation level adds one unit of bond strength.
3977 # 3. A value of NO_BREAK makes an unbreakable bond
3978 # 4. A value of VERY_WEAK is the strength of a ','
3979 # 5. Values below NOMINAL are considered ok break points.
3980 # 6. Values above NOMINAL are considered poor break points.
3982 # The bond strengths should roughly follow precedence order where
3983 # possible. If you make changes, please check the results very
3984 # carefully on a variety of scripts. Testing with the -extrude
3985 # options is particularly helpful in exercising all of the rules.
3987 # Wherever possible, bond strengths are defined in the following
3988 # tables. There are two main stages to setting bond strengths and
3989 # two types of tables:
3991 # The first stage involves looking at each token individually and
3992 # defining left and right bond strengths, according to if we want
3993 # to break to the left or right side, and how good a break point it
3994 # is. For example tokens like =, ||, && make good break points and
3995 # will have low strengths, but one might want to break on either
3996 # side to put them at the end of one line or beginning of the next.
3998 # The second stage involves looking at certain pairs of tokens and
3999 # defining a bond strength for that particular pair. This second
4000 # stage has priority.
4002 #---------------------------------------------------------------
4003 # Bond Strength BEGIN Section 1.
4004 # Set left and right bond strengths of individual tokens.
4005 #---------------------------------------------------------------
4007 # NOTE: NO_BREAK's set in this section first are HINTS which will
4008 # probably not be honored. Essential NO_BREAKS's should be set in
4009 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
4010 # of this subroutine.
4012 # Note that we are setting defaults in this section. The user
4013 # cannot change bond strengths but can cause the left and right
4014 # bond strengths of any token type to be swapped through the use of
4015 # the -wba and -wbb flags. In this way the user can determine if a
4016 # breakpoint token should appear at the end of one line or the
4017 # beginning of the next line.
4019 %right_bond_strength = ();
4020 %left_bond_strength = ();
4021 %binary_bond_strength_nospace = ();
4022 %binary_bond_strength = ();
4026 # The hash keys in this section are token types, plus the text of
4027 # certain keywords like 'or', 'and'.
4029 # no break around possible filehandle
4030 $left_bond_strength{'Z'} = NO_BREAK;
4031 $right_bond_strength{'Z'} = NO_BREAK;
4033 # never put a bare word on a new line:
4034 # example print (STDERR, "bla"); will fail with break after (
4035 $left_bond_strength{'w'} = NO_BREAK;
4037 # blanks always have infinite strength to force breaks after
4039 $right_bond_strength{'b'} = NO_BREAK;
4041 # try not to break on exponentiation
4042 @q = qw# ** .. ... <=> #;
4043 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4044 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4046 # The comma-arrow has very low precedence but not a good break point
4047 $left_bond_strength{'=>'} = NO_BREAK;
4048 $right_bond_strength{'=>'} = NOMINAL;
4050 # ok to break after label
4051 $left_bond_strength{'J'} = NO_BREAK;
4052 $right_bond_strength{'J'} = NOMINAL;
4053 $left_bond_strength{'j'} = STRONG;
4054 $right_bond_strength{'j'} = STRONG;
4055 $left_bond_strength{'A'} = STRONG;
4056 $right_bond_strength{'A'} = STRONG;
4058 $left_bond_strength{'->'} = STRONG;
4059 $right_bond_strength{'->'} = VERY_STRONG;
4061 $left_bond_strength{'CORE::'} = NOMINAL;
4062 $right_bond_strength{'CORE::'} = NO_BREAK;
4064 # breaking AFTER modulus operator is ok:
4066 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4067 @right_bond_strength{@q} =
4068 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
4070 # Break AFTER math operators * and /
4072 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4073 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
4075 # Break AFTER weakest math operators + and -
4076 # Make them weaker than * but a bit stronger than '.'
4078 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4079 @right_bond_strength{@q} =
4080 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
4082 # Define left strength of unary plus and minus (fixes case b511)
4083 $left_bond_strength{p} = $left_bond_strength{'+'};
4084 $left_bond_strength{m} = $left_bond_strength{'-'};
4086 # And make right strength of unary plus and minus very high.
4087 # Fixes cases b670 b790
4088 $right_bond_strength{p} = NO_BREAK;
4089 $right_bond_strength{m} = NO_BREAK;
4091 # breaking BEFORE these is just ok:
4093 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4094 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
4096 # breaking before the string concatenation operator seems best
4097 # because it can be hard to see at the end of a line
4098 $right_bond_strength{'.'} = STRONG;
4099 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
4102 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4103 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
4105 # make these a little weaker than nominal so that they get
4106 # favored for end-of-line characters
4107 @q = qw< != == =~ !~ ~~ !~~ >;
4108 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4109 @right_bond_strength{@q} =
4110 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
4113 @q = qw# < > | & >= <= #;
4114 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
4115 @right_bond_strength{@q} =
4116 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
4118 # breaking either before or after a quote is ok
4119 # but bias for breaking before a quote
4120 $left_bond_strength{'Q'} = NOMINAL;
4121 $right_bond_strength{'Q'} = NOMINAL + 0.02;
4122 $left_bond_strength{'q'} = NOMINAL;
4123 $right_bond_strength{'q'} = NOMINAL;
4125 # starting a line with a keyword is usually ok
4126 $left_bond_strength{'k'} = NOMINAL;
4128 # we usually want to bond a keyword strongly to what immediately
4129 # follows, rather than leaving it stranded at the end of a line
4130 $right_bond_strength{'k'} = STRONG;
4132 $left_bond_strength{'G'} = NOMINAL;
4133 $right_bond_strength{'G'} = STRONG;
4135 # assignment operators
4137 = **= += *= &= <<= &&=
4138 -= /= |= >>= ||= //=
4143 # Default is to break AFTER various assignment operators
4144 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4145 @right_bond_strength{@q} =
4146 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
4148 # Default is to break BEFORE '&&' and '||' and '//'
4149 # set strength of '||' to same as '=' so that chains like
4150 # $a = $b || $c || $d will break before the first '||'
4151 $right_bond_strength{'||'} = NOMINAL;
4152 $left_bond_strength{'||'} = $right_bond_strength{'='};
4154 # same thing for '//'
4155 $right_bond_strength{'//'} = NOMINAL;
4156 $left_bond_strength{'//'} = $right_bond_strength{'='};
4158 # set strength of && a little higher than ||
4159 $right_bond_strength{'&&'} = NOMINAL;
4160 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
4162 $left_bond_strength{';'} = VERY_STRONG;
4163 $right_bond_strength{';'} = VERY_WEAK;
4164 $left_bond_strength{'f'} = VERY_STRONG;
4166 # make right strength of for ';' a little less than '='
4167 # to make for contents break after the ';' to avoid this:
4168 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
4169 # $number_of_fields )
4170 # and make it weaker than ',' and 'and' too
4171 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
4173 # The strengths of ?/: should be somewhere between
4174 # an '=' and a quote (NOMINAL),
4175 # make strength of ':' slightly less than '?' to help
4176 # break long chains of ? : after the colons
4177 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
4178 $right_bond_strength{':'} = NO_BREAK;
4179 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
4180 $right_bond_strength{'?'} = NO_BREAK;
4182 $left_bond_strength{','} = VERY_STRONG;
4183 $right_bond_strength{','} = VERY_WEAK;
4185 # remaining digraphs and trigraphs not defined above
4186 @q = qw( :: <> ++ --);
4187 @left_bond_strength{@q} = (WEAK) x scalar(@q);
4188 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4190 # Set bond strengths of certain keywords
4191 # make 'or', 'err', 'and' slightly weaker than a ','
4192 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
4193 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
4194 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
4195 $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
4196 $right_bond_strength{'and'} = NOMINAL;
4197 $right_bond_strength{'or'} = NOMINAL;
4198 $right_bond_strength{'err'} = NOMINAL;
4199 $right_bond_strength{'xor'} = NOMINAL;
4201 #---------------------------------------------------------------
4202 # Bond Strength BEGIN Section 2.
4203 # Set binary rules for bond strengths between certain token types.
4204 #---------------------------------------------------------------
4206 # We have a little problem making tables which apply to the
4207 # container tokens. Here is a list of container tokens and
4210 # type tokens // meaning
4211 # { {, [, ( // indent
4212 # } }, ], ) // outdent
4213 # [ [ // left non-structural [ (enclosing an array index)
4214 # ] ] // right non-structural square bracket
4215 # ( ( // left non-structural paren
4216 # ) ) // right non-structural paren
4217 # L { // left non-structural curly brace (enclosing a key)
4218 # R } // right non-structural curly brace
4220 # Some rules apply to token types and some to just the token
4221 # itself. We solve the problem by combining type and token into a
4222 # new hash key for the container types.
4224 # If a rule applies to a token 'type' then we need to make rules
4225 # for each of these 'type.token' combinations:
4236 # If a rule applies to a token then we need to make rules for
4237 # these 'type.token' combinations:
4246 # allow long lines before final { in an if statement, as in:
4251 # Otherwise, the line before the { tends to be too short.
4253 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
4254 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
4256 # break on something like '} (', but keep this stronger than a ','
4257 # example is in 'howe.pl'
4258 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4259 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4261 # keep matrix and hash indices together
4262 # but make them a little below STRONG to allow breaking open
4263 # something like {'some-word'}{'some-very-long-word'} at the }{
4265 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4266 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4267 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4268 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4270 # increase strength to the point where a break in the following
4271 # will be after the opening paren rather than at the arrow:
4273 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
4275 # Added for c140 to make 'w ->' and 'i ->' behave the same
4276 $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
4278 # Note that the following alternative strength would make the break at the
4279 # '->' rather than opening the '('. Both have advantages and disadvantages.
4280 # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
4282 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4283 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4284 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4285 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4286 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4287 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4289 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4290 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4291 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4292 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4294 #---------------------------------------------------------------
4295 # Binary NO_BREAK rules
4296 #---------------------------------------------------------------
4298 # use strict requires that bare word and => not be separated
4299 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
4300 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
4302 # Never break between a bareword and a following paren because
4303 # perl may give an error. For example, if a break is placed
4304 # between 'to_filehandle' and its '(' the following line will
4305 # give a syntax error [Carp.pm]: my( $no) =fileno(
4306 # to_filehandle( $in)) ;
4307 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
4308 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
4309 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
4310 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
4312 # use strict requires that bare word within braces not start new
4314 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
4316 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
4318 # The following two rules prevent a syntax error caused by breaking up
4319 # a construction like '{-y}'. The '-' quotes the 'y' and prevents
4320 # it from being taken as a transliteration. We have to keep
4321 # token types 'L m w' together to prevent this error.
4322 $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
4323 $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
4325 # keep 'bareword-' together, but only if there is no space between
4326 # the word and dash. Do not keep together if there is a space.
4327 # example 'use perl6-alpha'
4328 $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
4330 # use strict requires that bare word and => not be separated
4331 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
4333 # use strict does not allow separating type info from trailing { }
4334 # testfile is readmail.pl
4335 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
4336 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
4338 # As a defensive measure, do not break between a '(' and a
4339 # filehandle. In some cases, this can cause an error. For
4340 # example, the following program works:
4347 # But this program fails:
4355 # This is normally only a problem with the 'extrude' option
4356 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
4357 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
4359 # never break between sub name and opening paren
4360 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
4361 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
4363 # keep '}' together with ';'
4364 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
4366 # Breaking before a ++ can cause perl to guess wrong. For
4367 # example the following line will cause a syntax error
4368 # with -extrude if we break between '$i' and '++' [fixstyle2]
4369 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
4370 $nobreak_lhs{'++'} = NO_BREAK;
4372 # Do not break before a possible file handle
4373 $nobreak_lhs{'Z'} = NO_BREAK;
4375 # use strict hates bare words on any new line. For
4376 # example, a break before the underscore here provokes the
4377 # wrath of use strict:
4378 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
4379 $nobreak_rhs{'F'} = NO_BREAK;
4380 $nobreak_rhs{'CORE::'} = NO_BREAK;
4382 # To prevent the tokenizer from switching between types 'w' and 'G' we
4383 # need to avoid breaking between type 'G' and the following code block
4384 # brace. Fixes case b929.
4385 $nobreak_rhs{G} = NO_BREAK;
4387 #---------------------------------------------------------------
4388 # Bond Strength BEGIN Section 3.
4389 # Define tables and values for applying a small bias to the above
4391 #---------------------------------------------------------------
4392 # Adding a small 'bias' to strengths is a simple way to make a line
4393 # break at the first of a sequence of identical terms. For
4394 # example, to force long string of conditional operators to break
4395 # with each line ending in a ':', we can add a small number to the
4396 # bond strength of each ':' (colon.t)
4397 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
4398 %bias_hash = map { $_ => 0 } @bias_tokens;
4399 $delta_bias = 0.0001; # a very small strength level
4402 } ## end sub initialize_bond_strength_hashes
4404 use constant DEBUG_BOND => 0;
4406 sub set_bond_strengths {
4410 #-----------------------------------------------------------------
4411 # Define a 'bond strength' for each token pair in an output batch.
4412 # See comments above for definition of bond strength.
4413 #-----------------------------------------------------------------
4415 my $rbond_strength_to_go = [];
4417 my $rLL = $self->[_rLL_];
4418 my $rK_weld_right = $self->[_rK_weld_right_];
4419 my $rK_weld_left = $self->[_rK_weld_left_];
4420 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
4422 # patch-its always ok to break at end of line
4423 $nobreak_to_go[$max_index_to_go] = 0;
4425 # we start a new set of bias values for each line
4428 my $code_bias = -.01; # bias for closing block braces
4432 my $token_length = 1;
4434 my $last_nonblank_type = $type;
4435 my $last_nonblank_token = $token;
4436 my $list_str = $left_bond_strength{'?'};
4438 my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4440 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4441 $next_nonblank_type, $next_token, $next_type,
4442 $total_nesting_depth, );
4444 # main loop to compute bond strengths between each pair of tokens
4445 foreach my $i ( 0 .. $max_index_to_go ) {
4447 if ( $type ne 'b' ) {
4448 $last_nonblank_type = $type;
4449 $last_nonblank_token = $token;
4451 $type = $types_to_go[$i];
4453 # strength on both sides of a blank is the same
4454 if ( $type eq 'b' && $last_type ne 'b' ) {
4455 $rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
4456 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4460 $token = $tokens_to_go[$i];
4461 $token_length = $token_lengths_to_go[$i];
4462 $block_type = $block_type_to_go[$i];
4464 $next_type = $types_to_go[$i_next];
4465 $next_token = $tokens_to_go[$i_next];
4466 $total_nesting_depth = $nesting_depth_to_go[$i_next];
4467 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4468 $next_nonblank_type = $types_to_go[$i_next_nonblank];
4469 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4471 my $seqno = $type_sequence_to_go[$i];
4472 my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4474 # We are computing the strength of the bond between the current
4475 # token and the NEXT token.
4477 #---------------------------------------------------------------
4478 # Bond Strength Section 1:
4479 # First Approximation.
4480 # Use minimum of individual left and right tabulated bond
4482 #---------------------------------------------------------------
4483 my $bsr = $right_bond_strength{$type};
4484 my $bsl = $left_bond_strength{$next_nonblank_type};
4486 # define right bond strengths of certain keywords
4487 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
4488 $bsr = $right_bond_strength{$token};
4490 elsif ( $token eq 'ne' or $token eq 'eq' ) {
4494 # set terminal bond strength to the nominal value
4495 # this will cause good preceding breaks to be retained
4496 if ( $i_next_nonblank > $max_index_to_go ) {
4499 # But weaken the bond at a 'missing terminal comma'. If an
4500 # optional comma is missing at the end of a broken list, use
4501 # the strength of a comma anyway to make formatting the same as
4502 # if it were there. Fixes issue c133.
4503 if ( !defined($bsr) || $bsr > VERY_WEAK ) {
4504 my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
4505 if ( $ris_list_by_seqno->{$seqno_px} ) {
4506 my $KK = $K_to_go[$max_index_to_go];
4507 my $Kn = $self->K_next_nonblank($KK);
4508 my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
4509 if ( $seqno_n && $seqno_n eq $seqno_px ) {
4516 # define right bond strengths of certain keywords
4517 if ( $next_nonblank_type eq 'k'
4518 && defined( $left_bond_strength{$next_nonblank_token} ) )
4520 $bsl = $left_bond_strength{$next_nonblank_token};
4522 elsif ($next_nonblank_token eq 'ne'
4523 or $next_nonblank_token eq 'eq' )
4527 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
4528 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
4531 # Use the minimum of the left and right strengths. Note: it might
4532 # seem that we would want to keep a NO_BREAK if either token has
4533 # this value. This didn't work, for example because in an arrow
4534 # list, it prevents the comma from separating from the following
4535 # bare word (which is probably quoted by its arrow). So necessary
4536 # NO_BREAK's have to be handled as special cases in the final
4538 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
4539 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
4540 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4541 $bond_str_1 = $bond_str if (DEBUG_BOND);
4543 #---------------------------------------------------------------
4544 # Bond Strength Section 2:
4545 # Apply hardwired rules..
4546 #---------------------------------------------------------------
4548 # Patch to put terminal or clauses on a new line: Weaken the bond
4549 # at an || followed by die or similar keyword to make the terminal
4550 # or clause fall on a new line, like this:
4553 # || die "Cannot add broadcast: No class identifier found";
4555 # Otherwise the break will be at the previous '=' since the || and
4556 # = have the same starting strength and the or is biased, like
4560 # shift || die "Cannot add broadcast: No class identifier found";
4562 # In any case if the user places a break at either the = or the ||
4563 # it should remain there.
4564 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
4566 # /^(die|confess|croak|warn)$/
4567 if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
4568 if ( $want_break_before{$token} && $i > 0 ) {
4569 $rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
4571 # keep bond strength of a token and its following blank
4573 if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4574 $rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
4578 $bond_str -= $delta_bias;
4583 # good to break after end of code blocks
4584 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
4586 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4587 $code_bias += $delta_bias;
4590 if ( $type eq 'k' ) {
4592 # allow certain control keywords to stand out
4593 if ( $next_nonblank_type eq 'k'
4594 && $is_last_next_redo_return{$token} )
4596 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4599 # Don't break after keyword my. This is a quick fix for a
4600 # rare problem with perl. An example is this line from file
4603 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4604 # $this->{'question'} ) )
4606 if ( $token eq 'my' ) {
4607 $bond_str = NO_BREAK;
4612 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
4614 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4615 $bond_str = $list_str if ( $bond_str > $list_str );
4618 # keywords like 'unless', 'if', etc, within statements
4620 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4621 $bond_str = VERY_WEAK / 1.05;
4625 # try not to break before a comma-arrow
4626 elsif ( $next_nonblank_type eq '=>' ) {
4627 if ( $bond_str < STRONG ) { $bond_str = STRONG }
4630 #---------------------------------------------------------------
4631 # Additional hardwired NOBREAK rules
4632 #---------------------------------------------------------------
4634 # map1.t -- correct for a quirk in perl
4636 && $next_nonblank_type eq 'i'
4637 && $last_nonblank_type eq 'k'
4638 && $is_sort_map_grep{$last_nonblank_token} )
4640 # /^(sort|map|grep)$/ )
4642 $bond_str = NO_BREAK;
4645 # extrude.t: do not break before paren at:
4647 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4648 $bond_str = NO_BREAK;
4651 # OLD COMMENT: In older version of perl, use strict can cause
4652 # problems with breaks before bare words following opening parens.
4653 # For example, this will fail under older versions if a break is
4654 # made between '(' and 'MAIL':
4656 # use strict; open( MAIL, "a long filename or command"); close MAIL;
4658 # NEW COMMENT: Third fix for b1213:
4659 # This option does not seem to be needed any longer, and it can
4660 # cause instabilities. It can be turned off, but to minimize
4661 # changes to existing formatting it is retained only in the case
4662 # where the previous token was 'open' and there was no line break.
4663 # Even this could eventually be removed if it causes instability.
4664 if ( $type eq '{' ) {
4667 && $next_nonblank_type eq 'w'
4668 && $last_nonblank_type eq 'k'
4669 && $last_nonblank_token eq 'open'
4670 && !$old_breakpoint_to_go[$i] )
4672 $bond_str = NO_BREAK;
4676 # Do not break between a possible filehandle and a ? or / and do
4677 # not introduce a break after it if there is no blank
4679 elsif ( $type eq 'Z' ) {
4684 # if there is no blank and we do not want one. Examples:
4685 # print $x++ # do not break after $x
4686 # print HTML"HELLO" # break ok after HTML
4689 && defined( $want_left_space{$next_type} )
4690 && $want_left_space{$next_type} == WS_NO
4693 # or we might be followed by the start of a quote,
4694 # and this is not an existing breakpoint; fixes c039.
4695 || !$old_breakpoint_to_go[$i]
4696 && substr( $next_nonblank_token, 0, 1 ) eq '/'
4700 $bond_str = NO_BREAK;
4704 # Breaking before a ? before a quote can cause trouble if
4705 # they are not separated by a blank.
4706 # Example: a syntax error occurs if you break before the ? here
4707 # my$logic=join$all?' && ':' || ',@regexps;
4708 # From: Professional_Perl_Programming_Code/multifind.pl
4709 if ( $next_nonblank_type eq '?' ) {
4710 $bond_str = NO_BREAK
4711 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4714 # Breaking before a . followed by a number
4715 # can cause trouble if there is no intervening space
4716 # Example: a syntax error occurs if you break before the .2 here
4717 # $str .= pack($endian.2, ensurrogate($ord));
4718 # From: perl58/Unicode.pm
4719 elsif ( $next_nonblank_type eq '.' ) {
4720 $bond_str = NO_BREAK
4721 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4725 elsif ( $type eq 'w' ) {
4726 $bond_str = NO_BREAK
4727 if ( !$old_breakpoint_to_go[$i]
4728 && substr( $next_nonblank_token, 0, 1 ) eq '/'
4729 && $next_nonblank_type ne '//' );
4732 $bond_str_2 = $bond_str if (DEBUG_BOND);
4734 #---------------------------------------------------------------
4735 # End of hardwired rules
4736 #---------------------------------------------------------------
4738 #---------------------------------------------------------------
4739 # Bond Strength Section 3:
4740 # Apply table rules. These have priority over the above
4742 #---------------------------------------------------------------
4744 my $tabulated_bond_str;
4746 my $rtype = $next_nonblank_type;
4747 if ( $seqno && $is_container_token{$token} ) {
4748 $ltype = $type . $token;
4751 if ( $next_nonblank_seqno
4752 && $is_container_token{$next_nonblank_token} )
4754 $rtype = $next_nonblank_type . $next_nonblank_token;
4756 # Alternate Fix #1 for issue b1299. This version makes the
4757 # decision as soon as possible. See Alternate Fix #2 also.
4758 # Do not separate a bareword identifier from its paren: b1299
4759 # This is currently needed for stability because if the bareword
4760 # gets separated from a preceding '->' and following '(' then
4761 # the tokenizer may switch from type 'i' to type 'w'. This
4762 # patch will prevent this by keeping it adjacent to its '('.
4763 ## if ( $next_nonblank_token eq '('
4765 ## && substr( $token, 0, 1 ) =~ /^\w$/ )
4771 # apply binary rules which apply regardless of space between tokens
4772 if ( $binary_bond_strength{$ltype}{$rtype} ) {
4773 $bond_str = $binary_bond_strength{$ltype}{$rtype};
4774 $tabulated_bond_str = $bond_str;
4777 # apply binary rules which apply only if no space between tokens
4778 if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4779 $bond_str = $binary_bond_strength{$ltype}{$next_type};
4780 $tabulated_bond_str = $bond_str;
4783 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4784 $bond_str = NO_BREAK;
4785 $tabulated_bond_str = $bond_str;
4788 $bond_str_3 = $bond_str if (DEBUG_BOND);
4790 # If the hardwired rules conflict with the tabulated bond
4791 # strength then there is an inconsistency that should be fixed
4793 && $tabulated_bond_str
4795 && $bond_str_1 != $bond_str_2
4796 && $bond_str_2 != $tabulated_bond_str
4799 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4802 #-----------------------------------------------------------------
4803 # Bond Strength Section 4:
4804 # Modify strengths of certain tokens which often occur in sequence
4805 # by adding a small bias to each one in turn so that the breaks
4806 # occur from left to right.
4808 # Note that we only changing strengths by small amounts here,
4809 # and usually increasing, so we should not be altering any NO_BREAKs.
4810 # Other routines which check for NO_BREAKs will use a tolerance
4811 # of one to avoid any problem.
4812 #-----------------------------------------------------------------
4814 # The bias tables use special keys:
4815 # $type - if not keyword
4816 # $token - if keyword, but map some keywords together
4818 $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4820 $next_nonblank_type eq 'k'
4821 ? $next_nonblank_token eq 'err'
4823 : $next_nonblank_token
4824 : $next_nonblank_type;
4827 if ( defined( $bias{$left_key} ) ) {
4828 if ( !$want_break_before{$left_key} ) {
4829 $bias{$left_key} += $delta_bias;
4830 $bond_str += $bias{$left_key};
4835 if ( defined( $bias{$right_key} ) ) {
4836 if ( $want_break_before{$right_key} ) {
4838 # for leading '.' align all but 'short' quotes; the idea
4839 # is to not place something like "\n" on a single line.
4840 if ( $right_key eq '.' ) {
4842 $last_nonblank_type eq '.'
4843 && ( $token_length <=
4844 $rOpts_short_concatenation_item_length )
4845 && ( !$is_closing_token{$token} )
4848 $bias{$right_key} += $delta_bias;
4852 $bias{$right_key} += $delta_bias;
4854 $bond_str += $bias{$right_key};
4858 $bond_str_4 = $bond_str if (DEBUG_BOND);
4860 #---------------------------------------------------------------
4861 # Bond Strength Section 5:
4862 # Fifth Approximation.
4863 # Take nesting depth into account by adding the nesting depth
4864 # to the bond strength.
4865 #---------------------------------------------------------------
4868 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
4869 if ( $total_nesting_depth > 0 ) {
4870 $strength = $bond_str + $total_nesting_depth;
4873 $strength = $bond_str;
4877 $strength = NO_BREAK;
4879 # For critical code such as lines with here targets we must
4880 # be absolutely sure that we do not allow a break. So for
4881 # these the nobreak flag exceeds 1 as a signal. Otherwise we
4882 # can run into trouble when small tolerances are added.
4884 if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 );
4887 #---------------------------------------------------------------
4888 # Bond Strength Section 6:
4889 # Sixth Approximation. Welds.
4890 #---------------------------------------------------------------
4892 # Do not allow a break within welds
4893 if ( $total_weld_count && $seqno ) {
4894 my $KK = $K_to_go[$i];
4895 if ( $rK_weld_right->{$KK} ) {
4896 $strength = NO_BREAK;
4899 # But encourage breaking after opening welded tokens
4900 elsif ($rK_weld_left->{$KK}
4901 && $is_opening_token{$token} )
4907 # always break after side comment
4908 if ( $type eq '#' ) { $strength = 0 }
4910 $rbond_strength_to_go->[$i] = $strength;
4912 # Fix for case c001: be sure NO_BREAK's are enforced by later
4913 # routines, except at a '?' because '?' as quote delimiter is
4915 if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
4916 $nobreak_to_go[$i] ||= 1;
4920 my $str = substr( $token, 0, 15 );
4921 $str .= SPACE x ( 16 - length($str) );
4923 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
4925 # reset for next pass
4926 $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
4930 return $rbond_strength_to_go;
4931 } ## end sub set_bond_strengths
4932 } ## end closure set_bond_strengths
4936 # See if a pattern will compile. We have to use a string eval here,
4937 # but it should be safe because the pattern has been constructed
4940 my $ok = eval "'##'=~/$pattern/";
4941 return !defined($ok) || $EVAL_ERROR;
4942 } ## end sub bad_pattern
4944 { ## begin closure prepare_cuddled_block_types
4948 # Add keywords here which really should not be cuddled
4950 my @q = qw(if unless for foreach while);
4951 @no_cuddle{@q} = (1) x scalar(@q);
4954 sub prepare_cuddled_block_types {
4956 # the cuddled-else style, if used, is controlled by a hash that
4959 # Include keywords here which should not be cuddled
4961 my $cuddled_string = EMPTY_STRING;
4962 if ( $rOpts->{'cuddled-else'} ) {
4965 $cuddled_string = 'elsif else continue catch finally'
4966 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
4968 # This is the old equivalent but more complex version
4969 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
4971 # Add users other blocks to be cuddled
4972 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
4973 if ($cuddled_block_list) {
4974 $cuddled_string .= SPACE . $cuddled_block_list;
4979 # If we have a cuddled string of the form
4980 # 'try-catch-finally'
4982 # we want to prepare a hash of the form
4984 # $rcuddled_block_types = {
4991 # use -dcbl to dump this hash
4993 # Multiple such strings are input as a space or comma separated list
4995 # If we get two lists with the same leading type, such as
4996 # -cbl = "-try-catch-finally -try-catch-otherwise"
4997 # then they will get merged as follows:
4998 # $rcuddled_block_types = {
5005 # This will allow either type of chain to be followed.
5007 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
5008 my @cuddled_strings = split /\s+/, $cuddled_string;
5010 $rcuddled_block_types = {};
5012 # process each dash-separated string...
5013 my $string_count = 0;
5014 foreach my $string (@cuddled_strings) {
5015 next unless $string;
5016 my @words = split /-+/, $string; # allow multiple dashes
5018 # we could look for and report possible errors here...
5019 next unless ( @words > 0 );
5021 # allow either '-continue' or *-continue' for arbitrary starting type
5024 # a single word without dashes is a secondary block type
5026 $start = shift @words;
5029 # always make an entry for the leading word. If none follow, this
5030 # will still prevent a wildcard from matching this word.
5031 if ( !defined( $rcuddled_block_types->{$start} ) ) {
5032 $rcuddled_block_types->{$start} = {};
5035 # The count gives the original word order in case we ever want it.
5038 foreach my $word (@words) {
5040 if ( $no_cuddle{$word} ) {
5042 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
5047 $rcuddled_block_types->{$start}->{$word} =
5048 1; #"$string_count.$word_count";
5050 # git#9: Remove this word from the list of desired one-line
5052 $want_one_line_block{$word} = 0;
5056 } ## end sub prepare_cuddled_block_types
5057 } ## end closure prepare_cuddled_block_types
5059 sub dump_cuddled_block_list {
5062 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
5063 # which controls this routine
5064 # my $rcuddled_block_types = {
5075 # SIMPLIFIED METHOD: the simplified method uses a wildcard for
5076 # the starting block type and puts all cuddled blocks together:
5077 # my $rcuddled_block_types = {
5086 # Both methods work, but the simplified method has proven to be adequate and
5089 my $cuddled_string = $rOpts->{'cuddled-block-list'};
5090 $cuddled_string = EMPTY_STRING unless $cuddled_string;
5092 my $flags = EMPTY_STRING;
5093 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
5094 $flags .= " -cbl='$cuddled_string'";
5096 unless ( $rOpts->{'cuddled-else'} ) {
5097 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
5101 ------------------------------------------------------------------------
5102 Hash of cuddled block types prepared for a run with these parameters:
5104 ------------------------------------------------------------------------
5108 $fh->print( Dumper($rcuddled_block_types) );
5111 ------------------------------------------------------------------------
5114 } ## end sub dump_cuddled_block_list
5116 sub make_static_block_comment_pattern {
5118 # create the pattern used to identify static block comments
5119 $static_block_comment_pattern = '^\s*##';
5121 # allow the user to change it
5122 if ( $rOpts->{'static-block-comment-prefix'} ) {
5123 my $prefix = $rOpts->{'static-block-comment-prefix'};
5124 $prefix =~ s/^\s*//;
5125 my $pattern = $prefix;
5127 # user may give leading caret to force matching left comments only
5128 if ( $prefix !~ /^\^#/ ) {
5129 if ( $prefix !~ /^#/ ) {
5131 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
5134 $pattern = '^\s*' . $prefix;
5136 if ( bad_pattern($pattern) ) {
5138 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
5141 $static_block_comment_pattern = $pattern;
5144 } ## end sub make_static_block_comment_pattern
5146 sub make_format_skipping_pattern {
5147 my ( $opt_name, $default ) = @_;
5148 my $param = $rOpts->{$opt_name};
5149 unless ($param) { $param = $default }
5151 if ( $param !~ /^#/ ) {
5152 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
5154 my $pattern = '^' . $param . '\s';
5155 if ( bad_pattern($pattern) ) {
5157 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
5161 } ## end sub make_format_skipping_pattern
5163 sub make_non_indenting_brace_pattern {
5165 # Create the pattern used to identify static side comments.
5166 # Note that we are ending the pattern in a \s. This will allow
5167 # the pattern to be followed by a space and some text, or a newline.
5168 # The pattern is used in sub 'non_indenting_braces'
5169 $non_indenting_brace_pattern = '^#<<<\s';
5171 # allow the user to change it
5172 if ( $rOpts->{'non-indenting-brace-prefix'} ) {
5173 my $prefix = $rOpts->{'non-indenting-brace-prefix'};
5174 $prefix =~ s/^\s*//;
5175 if ( $prefix !~ /^#/ ) {
5176 Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
5178 my $pattern = '^' . $prefix . '\s';
5179 if ( bad_pattern($pattern) ) {
5181 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
5184 $non_indenting_brace_pattern = $pattern;
5187 } ## end sub make_non_indenting_brace_pattern
5189 sub make_closing_side_comment_list_pattern {
5191 # turn any input list into a regex for recognizing selected block types
5192 $closing_side_comment_list_pattern = '^\w+';
5193 if ( defined( $rOpts->{'closing-side-comment-list'} )
5194 && $rOpts->{'closing-side-comment-list'} )
5196 $closing_side_comment_list_pattern =
5197 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
5200 } ## end sub make_closing_side_comment_list_pattern
5202 sub make_sub_matching_pattern {
5204 # Patterns for standardizing matches to block types for regular subs and
5205 # anonymous subs. Examples
5206 # 'sub process' is a named sub
5207 # 'sub ::m' is a named sub
5208 # 'sub' is an anonymous sub
5209 # 'sub:' is a label, not a sub
5210 # 'sub :' is a label, not a sub ( block type will be <sub:> )
5211 # sub'_ is a named sub ( block type will be <sub '_> )
5212 # 'substr' is a keyword
5213 # So note that named subs always have a space after 'sub'
5214 $SUB_PATTERN = '^sub\s'; # match normal sub
5215 $ASUB_PATTERN = '^sub$'; # match anonymous sub
5217 # Note (see also RT #133130): These patterns are used by
5218 # sub make_block_pattern, which is used for making most patterns.
5219 # So this sub needs to be called before other pattern-making routines.
5221 if ( $rOpts->{'sub-alias-list'} ) {
5223 # Note that any 'sub-alias-list' has been preprocessed to
5224 # be a trimmed, space-separated list which includes 'sub'
5225 # for example, it might be 'sub method fun'
5226 my $sub_alias_list = $rOpts->{'sub-alias-list'};
5227 $sub_alias_list =~ s/\s+/\|/g;
5228 $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
5229 $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
5232 } ## end sub make_sub_matching_pattern
5234 sub make_bl_pattern {
5236 # Set defaults lists to retain historical default behavior for -bl:
5237 my $bl_list_string = '*';
5238 my $bl_exclusion_list_string = 'sort map grep eval asub';
5240 if ( defined( $rOpts->{'brace-left-list'} )
5241 && $rOpts->{'brace-left-list'} )
5243 $bl_list_string = $rOpts->{'brace-left-list'};
5245 if ( $bl_list_string =~ /\bsub\b/ ) {
5246 $rOpts->{'opening-sub-brace-on-new-line'} ||=
5247 $rOpts->{'opening-brace-on-new-line'};
5249 if ( $bl_list_string =~ /\basub\b/ ) {
5250 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5251 $rOpts->{'opening-brace-on-new-line'};
5254 $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
5256 # for -bl, a list with '*' turns on -sbl and -asbl
5257 if ( $bl_pattern =~ /\.\*/ ) {
5258 $rOpts->{'opening-sub-brace-on-new-line'} ||=
5259 $rOpts->{'opening-brace-on-new-line'};
5260 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5261 $rOpts->{'opening-anonymous-brace-on-new-line'};
5264 if ( defined( $rOpts->{'brace-left-exclusion-list'} )
5265 && $rOpts->{'brace-left-exclusion-list'} )
5267 $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
5268 if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
5269 $rOpts->{'opening-sub-brace-on-new-line'} = 0;
5271 if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
5272 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
5276 $bl_exclusion_pattern =
5277 make_block_pattern( '-blxl', $bl_exclusion_list_string );
5279 } ## end sub make_bl_pattern
5281 sub make_bli_pattern {
5283 # default list of block types for which -bli would apply
5284 my $bli_list_string = 'if else elsif unless while for foreach do : sub';
5285 my $bli_exclusion_list_string = SPACE;
5287 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
5288 && $rOpts->{'brace-left-and-indent-list'} )
5290 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
5293 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
5295 if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
5296 && $rOpts->{'brace-left-and-indent-exclusion-list'} )
5298 $bli_exclusion_list_string =
5299 $rOpts->{'brace-left-and-indent-exclusion-list'};
5301 $bli_exclusion_pattern =
5302 make_block_pattern( '-blixl', $bli_exclusion_list_string );
5304 } ## end sub make_bli_pattern
5306 sub make_keyword_group_list_pattern {
5308 # turn any input list into a regex for recognizing selected block types.
5309 # Here are the defaults:
5310 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
5311 $keyword_group_list_comment_pattern = EMPTY_STRING;
5312 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
5313 && $rOpts->{'keyword-group-blanks-list'} )
5315 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
5318 foreach my $word (@words) {
5319 if ( $word eq 'BC' || $word eq 'SBC' ) {
5320 push @comment_list, $word;
5321 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
5324 push @keyword_list, $word;
5327 $keyword_group_list_pattern =
5328 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
5329 $keyword_group_list_comment_pattern =
5330 make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
5333 } ## end sub make_keyword_group_list_pattern
5335 sub make_block_brace_vertical_tightness_pattern {
5337 # turn any input list into a regex for recognizing selected block types
5338 $block_brace_vertical_tightness_pattern =
5339 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5340 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
5341 && $rOpts->{'block-brace-vertical-tightness-list'} )
5343 $block_brace_vertical_tightness_pattern =
5344 make_block_pattern( '-bbvtl',
5345 $rOpts->{'block-brace-vertical-tightness-list'} );
5348 } ## end sub make_block_brace_vertical_tightness_pattern
5350 sub make_blank_line_pattern {
5352 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
5353 my $key = 'blank-lines-before-closing-block-list';
5354 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5355 $blank_lines_before_closing_block_pattern =
5356 make_block_pattern( '-blbcl', $rOpts->{$key} );
5359 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
5360 $key = 'blank-lines-after-opening-block-list';
5361 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5362 $blank_lines_after_opening_block_pattern =
5363 make_block_pattern( '-blaol', $rOpts->{$key} );
5366 } ## end sub make_blank_line_pattern
5368 sub make_block_pattern {
5370 # given a string of block-type keywords, return a regex to match them
5371 # The only tricky part is that labels are indicated with a single ':'
5372 # and the 'sub' token text may have additional text after it (name of
5377 # input string: "if else elsif unless while for foreach do : sub";
5378 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5382 # To distinguish between anonymous subs and named subs, use 'sub' to
5383 # indicate a named sub, and 'asub' to indicate an anonymous sub
5385 my ( $abbrev, $string ) = @_;
5386 my @list = split_words($string);
5390 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
5393 if ( $i eq 'sub' ) {
5395 elsif ( $i eq 'asub' ) {
5397 elsif ( $i eq ';' ) {
5400 elsif ( $i eq '{' ) {
5403 elsif ( $i eq ':' ) {
5404 push @words, '\w+:';
5406 elsif ( $i =~ /^\w/ ) {
5410 Warn("unrecognized block type $i after $abbrev, ignoring\n");
5414 # Fix 2 for c091, prevent the pattern from matching an empty string
5415 # '1 ' is an impossible block name.
5416 if ( !@words ) { push @words, "1 " }
5418 my $pattern = '(' . join( '|', @words ) . ')$';
5419 my $sub_patterns = EMPTY_STRING;
5420 if ( $seen{'sub'} ) {
5421 $sub_patterns .= '|' . $SUB_PATTERN;
5423 if ( $seen{'asub'} ) {
5424 $sub_patterns .= '|' . $ASUB_PATTERN;
5426 if ($sub_patterns) {
5427 $pattern = '(' . $pattern . $sub_patterns . ')';
5429 $pattern = '^' . $pattern;
5431 } ## end sub make_block_pattern
5433 sub make_static_side_comment_pattern {
5435 # create the pattern used to identify static side comments
5436 $static_side_comment_pattern = '^##';
5438 # allow the user to change it
5439 if ( $rOpts->{'static-side-comment-prefix'} ) {
5440 my $prefix = $rOpts->{'static-side-comment-prefix'};
5441 $prefix =~ s/^\s*//;
5442 my $pattern = '^' . $prefix;
5443 if ( bad_pattern($pattern) ) {
5445 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5448 $static_side_comment_pattern = $pattern;
5451 } ## end sub make_static_side_comment_pattern
5453 sub make_closing_side_comment_prefix {
5455 # Be sure we have a valid closing side comment prefix
5456 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5457 my $csc_prefix_pattern;
5458 if ( !defined($csc_prefix) ) {
5459 $csc_prefix = '## end';
5460 $csc_prefix_pattern = '^##\s+end';
5463 my $test_csc_prefix = $csc_prefix;
5464 if ( $test_csc_prefix !~ /^#/ ) {
5465 $test_csc_prefix = '#' . $test_csc_prefix;
5468 # make a regex to recognize the prefix
5469 my $test_csc_prefix_pattern = $test_csc_prefix;
5471 # escape any special characters
5472 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5474 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5476 # allow exact number of intermediate spaces to vary
5477 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5479 # make sure we have a good pattern
5480 # if we fail this we probably have an error in escaping
5483 if ( bad_pattern($test_csc_prefix_pattern) ) {
5485 # shouldn't happen..must have screwed up escaping, above
5488 Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5492 # just warn and keep going with defaults
5494 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5496 Warn("Please consider using a simpler -cscp prefix\n");
5497 Warn("Using default -cscp instead; please check output\n");
5500 $csc_prefix = $test_csc_prefix;
5501 $csc_prefix_pattern = $test_csc_prefix_pattern;
5504 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5505 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5507 } ## end sub make_closing_side_comment_prefix
5509 ##################################################
5510 # CODE SECTION 4: receive lines from the tokenizer
5511 ##################################################
5513 { ## begin closure write_line
5517 # Variables used by sub check_sequence_numbers:
5519 my %saw_opening_seqno;
5520 my %saw_closing_seqno;
5523 sub initialize_write_line {
5525 $nesting_depth = undef;
5527 $last_seqno = SEQ_ROOT;
5528 %saw_opening_seqno = ();
5529 %saw_closing_seqno = ();
5532 } ## end sub initialize_write_line
5534 sub check_sequence_numbers {
5536 # Routine for checking sequence numbers. This only needs to be
5537 # done occasionally in DEVEL_MODE to be sure everything is working
5539 my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5540 my $jmax = @{$rtokens} - 1;
5541 return unless ( $jmax >= 0 );
5542 foreach my $j ( 0 .. $jmax ) {
5543 my $seqno = $rtype_sequence->[$j];
5544 my $token = $rtokens->[$j];
5545 my $type = $rtoken_type->[$j];
5546 $seqno = EMPTY_STRING unless ( defined($seqno) );
5548 "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5552 # Sequence numbers are generated for opening tokens, so every opening
5553 # token should be sequenced. Closing tokens will be unsequenced
5554 # if they do not have a matching opening token.
5555 if ( $is_opening_sequence_token{$token}
5561 $err_msg Unexpected opening token without sequence number
5568 # Save starting seqno to identify sequence method:
5569 # New method starts with 2 and has continuous numbering
5570 # Old method starts with >2 and may have gaps
5571 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
5573 if ( $is_opening_sequence_token{$token} ) {
5575 # New method should have continuous numbering
5576 if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5579 $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5583 $last_seqno = $seqno;
5585 # Numbers must be unique
5586 if ( $saw_opening_seqno{$seqno} ) {
5587 my $lno = $saw_opening_seqno{$seqno};
5590 $err_msg Already saw an opening tokens at line $lno with this sequence number
5594 $saw_opening_seqno{$seqno} = $input_line_no;
5597 # only one closing item per seqno
5598 elsif ( $is_closing_sequence_token{$token} ) {
5599 if ( $saw_closing_seqno{$seqno} ) {
5600 my $lno = $saw_closing_seqno{$seqno};
5603 $err_msg Already saw a closing token with this seqno at line $lno
5607 $saw_closing_seqno{$seqno} = $input_line_no;
5609 # Every closing seqno must have an opening seqno
5610 if ( !$saw_opening_seqno{$seqno} ) {
5613 $err_msg Saw a closing token but no opening token with this seqno
5619 # Sequenced items must be opening or closing
5623 $err_msg Unexpected token type with a sequence number
5630 } ## end sub check_sequence_numbers
5632 sub store_block_type {
5633 my ( $self, $block_type, $seqno ) = @_;
5635 return if ( !$block_type );
5637 $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type;
5639 if ( $block_type =~ /$ASUB_PATTERN/ ) {
5640 $self->[_ris_asub_block_]->{$seqno} = 1;
5642 elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5643 $self->[_ris_sub_block_]->{$seqno} = 1;
5646 } ## end sub store_block_type
5650 # This routine receives lines one-by-one from the tokenizer and stores
5651 # them in a format suitable for further processing. After the last
5652 # line has been sent, the tokenizer will call sub 'finish_formatting'
5653 # to do the actual formatting.
5655 my ( $self, $line_of_tokens_old ) = @_;
5657 my $rLL = $self->[_rLL_];
5658 my $line_of_tokens = {};
5663 _guessed_indentation_level
5669 _square_bracket_depth
5674 $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
5677 my $line_type = $line_of_tokens_old->{_line_type};
5680 my $Klimit = $self->[_Klimit_];
5683 # Handle line of non-code
5684 if ( $line_type ne 'CODE' ) {
5685 $tee_output ||= $rOpts_tee_pod
5686 && substr( $line_type, 0, 3 ) eq 'POD';
5688 $line_of_tokens->{_level_0} = 0;
5689 $line_of_tokens->{_ci_level_0} = 0;
5690 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5691 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5692 $line_of_tokens->{_ended_in_blank_token} = undef;
5696 # Handle line of code
5699 my $rtokens = $line_of_tokens_old->{_rtokens};
5700 my $jmax = @{$rtokens} - 1;
5704 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5706 #----------------------------
5707 # get the tokens on this line
5708 #----------------------------
5709 $self->write_line_inner_loop( $line_of_tokens_old,
5712 # update Klimit for added tokens
5713 $Klimit = @{$rLL} - 1;
5715 } ## end if ( $jmax >= 0 )
5719 $line_of_tokens->{_level_0} = 0;
5720 $line_of_tokens->{_ci_level_0} = 0;
5721 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5722 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5723 $line_of_tokens->{_ended_in_blank_token} = undef;
5728 $rOpts_tee_block_comments
5730 && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5733 $rOpts_tee_side_comments
5735 && $Klimit > $Kfirst
5736 && $rLL->[$Klimit]->[_TYPE_] eq '#';
5738 } ## end if ( $line_type eq 'CODE')
5740 # Finish storing line variables
5741 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
5742 $self->[_Klimit_] = $Klimit;
5743 my $rlines = $self->[_rlines_];
5744 push @{$rlines}, $line_of_tokens;
5747 my $fh_tee = $self->[_fh_tee_];
5748 my $line_text = $line_of_tokens_old->{_line_text};
5749 $fh_tee->print($line_text) if ($fh_tee);
5753 } ## end sub write_line
5755 sub write_line_inner_loop {
5756 my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
5758 #---------------------------------------------------------------------
5759 # Copy the tokens on one line received from the tokenizer to their new
5760 # storage locations.
5761 #---------------------------------------------------------------------
5764 # $line_of_tokens_old = line received from tokenizer
5765 # $line_of_tokens = line of tokens being formed for formatter
5767 my $rtokens = $line_of_tokens_old->{_rtokens};
5768 my $jmax = @{$rtokens} - 1;
5771 # safety check; shouldn't happen
5772 DEVEL_MODE && Fault("unexpected jmax=$jmax\n");
5776 my $line_index = $line_of_tokens_old->{_line_number} - 1;
5777 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
5778 my $rblock_type = $line_of_tokens_old->{_rblock_type};
5779 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
5780 my $rlevels = $line_of_tokens_old->{_rlevels};
5781 my $rci_levels = $line_of_tokens_old->{_rci_levels};
5783 my $rLL = $self->[_rLL_];
5784 my $rSS = $self->[_rSS_];
5785 my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5788 && check_sequence_numbers( $rtokens, $rtoken_type,
5789 $rtype_sequence, $line_index + 1 );
5791 # Find the starting nesting depth ...
5792 # It must be the value of variable 'level' of the first token
5793 # because the nesting depth is used as a token tag in the
5794 # vertical aligner and is compared to actual levels.
5795 # So vertical alignment problems will occur with any other
5797 if ( !defined($nesting_depth) ) {
5798 $nesting_depth = $rlevels->[0];
5799 $nesting_depth = 0 if ( $nesting_depth < 0 );
5800 $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
5805 # NOTE: coding efficiency is critical in this loop over all tokens
5806 foreach my $token ( @{$rtokens} ) {
5808 # Do not clip the 'level' variable yet. We will do this
5809 # later, in sub 'store_token_to_go'. The reason is that in
5810 # files with level errors, the logic in 'weld_cuddled_else'
5811 # uses a stack logic that will give bad welds if we clip
5814 ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
5816 my $seqno = EMPTY_STRING;
5818 # Handle tokens with sequence numbers ...
5819 # note the ++ increment hidden here for efficiency
5820 if ( $rtype_sequence->[ ++$j ] ) {
5821 $seqno = $rtype_sequence->[$j];
5823 if ( $is_opening_token{$token} ) {
5824 $self->[_K_opening_container_]->{$seqno} = @{$rLL};
5825 $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
5828 # Save a sequenced block type at its opening token.
5829 # Note that unsequenced block types can occur in
5830 # unbalanced code with errors but are ignored here.
5831 $self->store_block_type( $rblock_type->[$j], $seqno )
5832 if ( $rblock_type->[$j] );
5834 elsif ( $is_closing_token{$token} ) {
5836 # The opening depth should always be defined, and
5837 # it should equal $nesting_depth-1. To protect
5838 # against unforseen error conditions, however, we
5839 # will check this and fix things if necessary. For
5840 # a test case see issue c055.
5841 my $opening_depth = $rdepth_of_opening_seqno->[$seqno];
5842 if ( !defined($opening_depth) ) {
5843 $opening_depth = $nesting_depth - 1;
5844 $opening_depth = 0 if ( $opening_depth < 0 );
5845 $rdepth_of_opening_seqno->[$seqno] = $opening_depth;
5847 # This is not fatal but should not happen. The
5848 # tokenizer generates sequence numbers
5849 # incrementally upon encountering each new
5850 # opening token, so every positive sequence
5851 # number should correspond to an opening token.
5852 DEVEL_MODE && Fault(<<EOM);
5853 No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
5856 $self->[_K_closing_container_]->{$seqno} = @{$rLL};
5857 $nesting_depth = $opening_depth;
5860 elsif ( $token eq '?' ) {
5862 elsif ( $token eq ':' ) {
5866 # The only sequenced types output by the tokenizer are
5867 # the opening & closing containers and the ternary
5868 # types. So we would only get here if the tokenizer has
5869 # been changed to mark some other tokens with sequence
5870 # numbers, or if an error has been introduced in a
5871 # hash such as %is_opening_container
5873 DEVEL_MODE && Fault(<<EOM);
5874 Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
5875 Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
5880 $self->[_Iss_opening_]->[$seqno] = @{$rSS};
5882 # For efficiency, we find the maximum level of
5883 # opening tokens of any type. The actual maximum
5884 # level will be that of their contents which is 1
5885 # greater. That will be fixed in sub
5886 # 'finish_formatting'.
5887 my $level = $rlevels->[$j];
5888 if ( $level > $self->[_maximum_level_] ) {
5889 $self->[_maximum_level_] = $level;
5890 $self->[_maximum_level_at_line_] = $line_index + 1;
5893 else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} }
5894 push @{$rSS}, $sign * $seqno;
5918 push @{$rLL}, \@tokary;
5921 # Need to remember if we can trim the input line
5922 $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b';
5924 # Values needed by Logger
5925 $line_of_tokens->{_level_0} = $rlevels->[0];
5926 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
5927 $line_of_tokens->{_nesting_blocks_0} =
5928 $line_of_tokens_old->{_nesting_blocks_0};
5929 $line_of_tokens->{_nesting_tokens_0} =
5930 $line_of_tokens_old->{_nesting_tokens_0};
5934 } ## end sub write_line_inner_loop
5936 } ## end closure write_line
5938 #############################################
5939 # CODE SECTION 5: Pre-process the entire file
5940 #############################################
5942 sub finish_formatting {
5944 my ( $self, $severe_error ) = @_;
5946 # The file has been tokenized and is ready to be formatted.
5947 # All of the relevant data is stored in $self, ready to go.
5950 # true if input file was copied verbatim due to errors
5953 # Some of the code in sub break_lists is not robust enough to process code
5954 # with arbitrary brace errors. The simplest fix is to just return the file
5955 # verbatim if there are brace errors. This fixes issue c160.
5956 $severe_error ||= get_saw_brace_error();
5958 # Check the maximum level. If it is extremely large we will give up and
5959 # output the file verbatim. Note that the actual maximum level is 1
5960 # greater than the saved value, so we fix that here.
5961 $self->[_maximum_level_] += 1;
5962 my $maximum_level = $self->[_maximum_level_];
5963 my $maximum_table_index = $#maximum_line_length_at_level;
5964 if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
5965 $severe_error ||= 1;
5967 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
5968 Something may be wrong; formatting will be skipped.
5972 # Dump any requested block summary data
5973 if ( $rOpts->{'dump-block-summary'} ) {
5974 if ($severe_error) { Exit(1) }
5975 $self->dump_block_summary();
5979 # output file verbatim if severe error or no formatting requested
5980 if ( $severe_error || $rOpts->{notidy} ) {
5981 $self->dump_verbatim();
5982 $self->wrapup($severe_error);
5986 # Update the 'save_logfile' flag based to include any tokenization errors.
5987 # We can save time by skipping logfile calls if it is not going to be saved.
5988 my $logger_object = $self->[_logger_object_];
5989 if ($logger_object) {
5990 my $save_logfile = $logger_object->get_save_logfile();
5991 $self->[_save_logfile_] = $save_logfile;
5992 my $file_writer_object = $self->[_file_writer_object_];
5993 $file_writer_object->set_save_logfile($save_logfile);
5997 my $rix_side_comments = $self->set_CODE_type();
5999 $self->find_non_indenting_braces($rix_side_comments);
6001 # Handle any requested side comment deletions. It is easier to get
6002 # this done here rather than farther down the pipeline because IO
6003 # lines take a different route, and because lines with deleted HSC
6004 # become BL lines. We have already handled any tee requests in sub
6005 # getline, so it is safe to delete side comments now.
6006 $self->delete_side_comments($rix_side_comments)
6007 if ( $rOpts_delete_side_comments
6008 || $rOpts_delete_closing_side_comments );
6011 # Verify that the line hash does not have any unknown keys.
6012 $self->check_line_hashes() if (DEVEL_MODE);
6015 # Make a pass through all tokens, adding or deleting any whitespace as
6016 # required. Also make any other changes, such as adding semicolons.
6017 # All token changes must be made here so that the token data structure
6018 # remains fixed for the rest of this iteration.
6019 my ( $error, $rqw_lines ) = $self->respace_tokens();
6021 $self->dump_verbatim();
6026 $self->find_multiline_qw($rqw_lines);
6029 $self->examine_vertical_tightness_flags();
6031 $self->set_excluded_lp_containers();
6033 $self->keep_old_line_breaks();
6035 # Implement any welding needed for the -wn or -cb options
6036 $self->weld_containers();
6038 # Collect info needed to implement the -xlp style
6039 $self->xlp_collapsed_lengths()
6040 if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
6042 # Locate small nested blocks which should not be broken
6043 $self->mark_short_nested_blocks();
6045 $self->special_indentation_adjustments();
6047 # Verify that the main token array looks OK. If this ever causes a fault
6048 # then place similar checks before the sub calls above to localize the
6050 $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
6052 # Finishes formatting and write the result to the line sink.
6053 # Eventually this call should just change the 'rlines' data according to the
6054 # new line breaks and then return so that we can do an internal iteration
6055 # before continuing with the next stages of formatting.
6056 $self->process_all_lines();
6058 # A final routine to tie up any loose ends
6061 } ## end sub finish_formatting
6066 my @q = qw( for foreach while do until );
6067 @{is_loop_type}{@q} = (1) x scalar(@q);
6070 sub find_level_info {
6072 # Find level ranges and total variations of all code blocks in this file.
6075 # ref to hash with block info, with seqno as key (see below)
6079 # The array _rSS_ has the complete container tree for this file.
6080 my $rSS = $self->[_rSS_];
6082 # We will be ignoring everything except code block containers
6083 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6089 foreach my $sseq ( @{$rSS} ) {
6090 my $stack_depth = @stack;
6091 my $seq_next = $sseq > 0 ? $sseq : -$sseq;
6093 next if ( !$rblock_type_of_seqno->{$seq_next} );
6098 foreach my $seq (@stack) {
6099 $item = $level_info{$seq};
6100 if ( $item->{maximum_depth} < $stack_depth ) {
6101 $item->{maximum_depth} = $stack_depth;
6103 $item->{block_count}++;
6106 push @stack, $seq_next;
6107 my $block_type = $rblock_type_of_seqno->{$seq_next};
6109 # If this block is a loop nested within a loop, then we
6110 # will mark it as an 'inner_loop'. This is a useful
6111 # complexity measure.
6112 my $is_inner_loop = 0;
6113 if ( $is_loop_type{$block_type} && defined($item) ) {
6114 $is_inner_loop = $is_loop_type{ $item->{block_type} };
6117 $level_info{$seq_next} = {
6118 starting_depth => $stack_depth,
6119 maximum_depth => $stack_depth,
6121 block_type => $block_type,
6122 is_inner_loop => $is_inner_loop,
6126 my $seq_test = pop @stack;
6129 if ( $seq_test != $seq_next ) {
6131 # Shouldn't happen - the $rSS array must have an error
6132 DEVEL_MODE && Fault("stack error finding total depths\n");
6139 return \%level_info;
6140 } ## end sub find_level_info
6142 sub find_loop_label {
6144 my ( $self, $seqno ) = @_;
6147 # $seqno = sequence number of a block of code for a loop
6149 # $label = the loop label text, if any, or an empty string
6151 my $rLL = $self->[_rLL_];
6152 my $rlines = $self->[_rlines_];
6153 my $K_opening_container = $self->[_K_opening_container_];
6155 my $label = EMPTY_STRING;
6156 my $K_opening = $K_opening_container->{$seqno};
6158 # backup to the line with the opening paren, if any, in case the
6159 # keyword is on a different line
6160 my $Kp = $self->K_previous_code($K_opening);
6161 return $label unless ( defined($Kp) );
6162 if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) {
6163 $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
6164 $K_opening = $K_opening_container->{$seqno};
6167 return $label unless ( defined($K_opening) );
6168 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6170 # look for a lable within a few lines; allow a couple of blank lines
6171 foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) {
6172 last if ( $lx < 0 );
6173 my $line_of_tokens = $rlines->[$lx];
6174 my $line_type = $line_of_tokens->{_line_type};
6176 # stop search on a non-code line
6177 last if ( $line_type ne 'CODE' );
6179 my $rK_range = $line_of_tokens->{_rK_range};
6180 my ( $Kfirst, $Klast ) = @{$rK_range};
6183 next if ( !defined($Kfirst) );
6186 if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) {
6187 $label = $rLL->[$Kfirst]->[_TOKEN_];
6191 # quit the search if we are above the starting line
6192 last if ( $lx < $lx_open );
6196 } ## end sub find_loop_label
6198 { ## closure find_mccabe_count
6199 my %is_mccabe_logic_keyword;
6200 my %is_mccabe_logic_operator;
6203 my @q = (qw( && || ||= &&= ? <<= >>= ));
6204 @is_mccabe_logic_operator{@q} = (1) x scalar(@q);
6206 @q = (qw( and or xor if else elsif unless until while for foreach ));
6207 @is_mccabe_logic_keyword{@q} = (1) x scalar(@q);
6210 sub find_mccabe_count {
6213 # Find the cumulative mccabe count to each token
6214 # Return '$rmccabe_count_sum' = ref to array with cumulative
6215 # mccabe count to each token $K
6217 # NOTE: This sub currently follows the definitions in Perl::Critic
6219 my $rmccabe_count_sum;
6220 my $rLL = $self->[_rLL_];
6222 my $Klimit = $self->[_Klimit_];
6223 foreach my $KK ( 0 .. $Klimit ) {
6224 $rmccabe_count_sum->{$KK} = $count;
6225 my $type = $rLL->[$KK]->[_TYPE_];
6226 if ( $type eq 'k' ) {
6227 my $token = $rLL->[$KK]->[_TOKEN_];
6228 if ( $is_mccabe_logic_keyword{$token} ) { $count++ }
6230 elsif ( $is_mccabe_logic_operator{$type} ) {
6234 $rmccabe_count_sum->{ $Klimit + 1 } = $count;
6235 return $rmccabe_count_sum;
6236 } ## end sub find_mccabe_count
6237 } ## end closure find_mccabe_count
6239 sub find_code_line_count {
6242 # Find the cumulative number of lines of code, excluding blanks,
6244 # Return '$rcode_line_count' = ref to array with cumulative
6245 # code line count for each input line number.
6247 my $rcode_line_count;
6248 my $rLL = $self->[_rLL_];
6249 my $rlines = $self->[_rlines_];
6251 my $code_line_count = 0;
6253 # loop over all lines
6254 foreach my $line_of_tokens ( @{$rlines} ) {
6257 # what type of line?
6258 my $line_type = $line_of_tokens->{_line_type};
6260 # if 'CODE' it must be non-blank and non-comment
6261 if ( $line_type eq 'CODE' ) {
6262 my $rK_range = $line_of_tokens->{_rK_range};
6263 my ( $Kfirst, $Klast ) = @{$rK_range};
6265 if ( defined($Kfirst) ) {
6268 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
6269 if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
6271 # ok, it is a non-comment
6277 # Count all other special line types except pod;
6278 # For a list of line types see sub 'process_all_lines'
6279 elsif ( $line_type !~ /^POD/ ) { $code_line_count++ }
6281 # Store the cumulative count using the input line index
6282 $rcode_line_count->[$ix_line] = $code_line_count;
6284 return $rcode_line_count;
6285 } ## end sub find_code_line_count
6287 sub find_selected_packages {
6289 my ( $self, $rdump_block_types ) = @_;
6291 # returns a list of all package statements in a file if requested
6293 unless ( $rdump_block_types->{'*'}
6294 || $rdump_block_types->{'package'}
6295 || $rdump_block_types->{'class'} )
6300 my $rLL = $self->[_rLL_];
6301 my $Klimit = $self->[_Klimit_];
6302 my $rlines = $self->[_rlines_];
6304 my $K_closing_container = $self->[_K_closing_container_];
6307 foreach my $KK ( 0 .. $Klimit ) {
6308 my $item = $rLL->[$KK];
6309 my $type = $item->[_TYPE_];
6310 if ( $type ne 'i' ) {
6313 my $token = $item->[_TOKEN_];
6314 if ( substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/
6315 || substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ )
6318 $token =~ s/\s+/ /g;
6319 my ( $keyword, $name ) = split /\s+/, $token, 2;
6321 my $lx_start = $item->[_LINE_INDEX_];
6322 my $level = $item->[_LEVEL_];
6323 my $parent_seqno = $self->parent_seqno_by_K($KK);
6325 # Skip a class BLOCK because it will be handled as a block
6326 if ( $keyword eq 'class' ) {
6327 my $line_of_tokens = $rlines->[$lx_start];
6328 my $rK_range = $line_of_tokens->{_rK_range};
6329 my ( $K_first, $K_last ) = @{$rK_range};
6330 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
6331 $K_last = $self->K_previous_code($K_last);
6333 if ( defined($K_last) ) {
6334 my $seqno_class = $rLL->[$K_last]->[_TYPE_SEQUENCE_];
6335 my $block_type_next =
6336 $self->[_rblock_type_of_seqno_]->{$seqno_class};
6338 # these block types are currently marked 'package'
6339 # but may be 'class' in the future, so allow both.
6340 if ( defined($block_type_next)
6341 && $block_type_next =~ /^(class|package)\b/ )
6348 my $K_closing = $Klimit;
6349 if ( $parent_seqno != SEQ_ROOT ) {
6350 my $Kc = $K_closing_container->{$parent_seqno};
6351 if ( defined($Kc) ) {
6356 # This package ends any previous package at this level
6357 if ( defined( my $ix = $package_sweep[$level] ) ) {
6358 my $rpk = $package_list[$ix];
6359 my $Kc = $rpk->{K_closing};
6361 $rpk->{K_closing} = $KK - 1;
6364 $package_sweep[$level] = @package_list;
6366 # max_change and block_count are not currently reported 'package'
6369 line_start => $lx_start + 1,
6371 K_closing => $Klimit,
6381 return \@package_list;
6382 } ## end sub find_selected_packages
6384 sub find_selected_blocks {
6386 my ( $self, $rdump_block_types ) = @_;
6388 # Find blocks needed for --dump-block-summary
6390 # $rslected_blocks = ref to a list of information on the selected blocks
6392 my $rLL = $self->[_rLL_];
6393 my $rlines = $self->[_rlines_];
6394 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6395 my $K_opening_container = $self->[_K_opening_container_];
6396 my $K_closing_container = $self->[_K_closing_container_];
6397 my $ris_asub_block = $self->[_ris_asub_block_];
6398 my $ris_sub_block = $self->[_ris_sub_block_];
6400 my $dump_all_types = $rdump_block_types->{'*'};
6402 # Get level variation info for code blocks
6403 my $rlevel_info = $self->find_level_info();
6405 my @selected_blocks;
6407 #---------------------------------------------------
6408 # BEGIN loop over all blocks to find selected blocks
6409 #---------------------------------------------------
6410 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
6413 my $name = EMPTY_STRING;
6414 my $block_type = $rblock_type_of_seqno->{$seqno};
6415 my $K_opening = $K_opening_container->{$seqno};
6416 my $K_closing = $K_closing_container->{$seqno};
6417 my $level = $rLL->[$K_opening]->[_LEVEL_];
6419 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6420 my $line_of_tokens = $rlines->[$lx_open];
6421 my $rK_range = $line_of_tokens->{_rK_range};
6422 my ( $Kfirst, $Klast ) = @{$rK_range};
6423 if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) {
6424 my $line_type = $line_of_tokens->{_line_type};
6427 my $CODE_type = $line_of_tokens->{_code_type};
6428 DEVEL_MODE && Fault(<<EOM);
6429 unexpected line_type=$line_type at line $lx_open, code type=$CODE_type
6434 my ( $max_change, $block_count, $inner_loop_plus ) =
6435 ( 0, 0, EMPTY_STRING );
6436 my $item = $rlevel_info->{$seqno};
6437 if ( defined($item) ) {
6438 my $starting_depth = $item->{starting_depth};
6439 my $maximum_depth = $item->{maximum_depth};
6440 $block_count = $item->{block_count};
6441 $max_change = $maximum_depth - $starting_depth + 1;
6443 # this is a '+' character if this block is an inner loops
6444 $inner_loop_plus = $item->{is_inner_loop} ? '+' : EMPTY_STRING;
6447 # Skip closures unless type 'closure' is explicitely requested
6448 if ( ( $block_type eq '}' || $block_type eq ';' )
6449 && $rdump_block_types->{'closure'} )
6454 # Both 'sub' and 'asub' select an anonymous sub.
6455 # This allows anonymous subs to be explicitely selected
6457 $ris_asub_block->{$seqno}
6458 && ( $dump_all_types
6459 || $rdump_block_types->{'sub'}
6460 || $rdump_block_types->{'asub'} )
6465 # Look back to try to find some kind of name, such as
6466 # my $var = sub { - var is type 'i'
6467 # var => sub { - var is type 'w'
6468 # -var => sub { - var is type 'w'
6469 # 'var' => sub { - var is type 'Q'
6470 my ( $saw_equals, $saw_fat_comma, $blank_count );
6471 foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) {
6472 my $token_type = $rLL->[$KK]->[_TYPE_];
6473 if ( $token_type eq 'b' ) { $blank_count++; next }
6474 if ( $token_type eq '=>' ) { $saw_fat_comma++; next }
6475 if ( $token_type eq '=' ) { $saw_equals++; next }
6476 if ( $token_type eq 'i' && $saw_equals
6477 || ( $token_type eq 'w' || $token_type eq 'Q' )
6480 $name = $rLL->[$KK]->[_TOKEN_];
6485 elsif ( $ris_sub_block->{$seqno}
6486 && ( $dump_all_types || $rdump_block_types->{'sub'} ) )
6492 # 'sub setidentifier($)' => 'setidentifier'
6493 # 'method setidentifier($)' => 'setidentifier'
6494 my @parts = split /\s+/, $block_type;
6499 $block_type =~ /^(package|class)\b/
6500 && ( $dump_all_types
6501 || $rdump_block_types->{'package'}
6502 || $rdump_block_types->{'class'} )
6506 my @parts = split /\s+/, $block_type;
6511 $is_loop_type{$block_type}
6512 && ( $dump_all_types
6513 || $rdump_block_types->{$block_type}
6514 || $rdump_block_types->{ $block_type . $inner_loop_plus }
6515 || $rdump_block_types->{$inner_loop_plus} )
6518 $type = $block_type . $inner_loop_plus;
6520 elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) {
6521 if ( $is_loop_type{$block_type} ) {
6522 $name = $self->find_loop_label($seqno);
6524 $type = $block_type;
6530 push @selected_blocks,
6532 K_opening => $K_opening,
6533 K_closing => $K_closing,
6534 line_start => $lx_open + 1,
6538 max_change => $max_change,
6539 block_count => $block_count,
6541 } ## END loop to get info for selected blocks
6542 return \@selected_blocks;
6543 } ## end sub find_selected_blocks
6545 sub dump_block_summary {
6548 # Dump information about selected code blocks to STDOUT
6549 # This sub is called when
6550 # --dump-block-summary (-dbs) is set.
6552 # The following controls are available:
6553 # --dump-block-types=s (-dbt=s), where s is a list of block types
6554 # (if else elsif for foreach while do ... sub) ; default is 'sub'
6555 # --dump-block-minimum-lines=n (-dbml=n), where n is the minimum
6556 # number of lines for a block to be included; default is 20.
6558 my $rOpts_dump_block_types = $rOpts->{'dump-block-types'};
6559 if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' }
6560 $rOpts_dump_block_types =~ s/^\s+//;
6561 $rOpts_dump_block_types =~ s/\s+$//;
6562 my @list = split /\s+/, $rOpts_dump_block_types;
6563 my %dump_block_types;
6564 @{dump_block_types}{@list} = (1) x scalar(@list);
6567 my $rselected_blocks = $self->find_selected_blocks( \%dump_block_types );
6570 my $rpackage_list = $self->find_selected_packages( \%dump_block_types );
6572 return if ( !@{$rselected_blocks} && !@{$rpackage_list} );
6574 my $input_stream_name = get_input_stream_name();
6576 # Get code line count
6577 my $rcode_line_count = $self->find_code_line_count();
6580 my $rmccabe_count_sum = $self->find_mccabe_count();
6582 my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'};
6583 if ( !defined($rOpts_dump_block_minimum_lines) ) {
6584 $rOpts_dump_block_minimum_lines = 20;
6587 my $rLL = $self->[_rLL_];
6589 # merge blocks and packages, add various counts, filter and print to STDOUT
6590 my $routput_lines = [];
6591 foreach my $item ( @{$rselected_blocks}, @{$rpackage_list} ) {
6593 my $K_opening = $item->{K_opening};
6594 my $K_closing = $item->{K_closing};
6596 # define total number of lines
6597 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6598 my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_];
6599 my $line_count = $lx_close - $lx_open + 1;
6601 # define total number of lines of code excluding blanks, comments, pod
6602 my $code_lines_open = $rcode_line_count->[$lx_open];
6603 my $code_lines_close = $rcode_line_count->[$lx_close];
6605 if ( defined($code_lines_open) && defined($code_lines_close) ) {
6606 $code_lines = $code_lines_close - $code_lines_open + 1;
6609 # filter out blocks below the selected code line limit
6610 if ( $code_lines < $rOpts_dump_block_minimum_lines ) {
6614 # add mccabe_count for this block
6615 my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 };
6616 my $mccabe_opening = $rmccabe_count_sum->{$K_opening};
6617 my $mccabe_count = 1; # add 1 to match Perl::Critic
6618 if ( defined($mccabe_opening) && defined($mccabe_closing) ) {
6619 $mccabe_count += $mccabe_closing - $mccabe_opening;
6622 # Store the final set of print variables
6623 push @{$routput_lines}, [
6626 $item->{line_start},
6632 $item->{max_change},
6633 $item->{block_count},
6639 return unless @{$routput_lines};
6641 # Sort blocks and packages on starting line number
6642 my @sorted_lines = sort { $a->[1] <=> $b->[1] } @{$routput_lines};
6645 "file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n";
6647 foreach my $rline_vars (@sorted_lines) {
6648 my $line = join( ",", @{$rline_vars} ) . "\n";
6652 } ## end sub dump_block_summary
6657 # Examine each line of code and set a flag '$CODE_type' to describe it.
6658 # Also return a list of lines with side comments.
6660 my $rLL = $self->[_rLL_];
6661 my $rlines = $self->[_rlines_];
6663 my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
6664 my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
6665 my $rOpts_static_block_comment_prefix =
6666 $rOpts->{'static-block-comment-prefix'};
6668 # Remember indexes of lines with side comments
6669 my @ix_side_comments;
6671 my $In_format_skipping_section = 0;
6672 my $Saw_VERSION_in_this_file = 0;
6673 my $has_side_comment = 0;
6674 my ( $Kfirst, $Klast );
6677 # Loop to set CODE_type
6679 # Possible CODE_types
6680 # 'VB' = Verbatim - line goes out verbatim (a quote)
6681 # 'FS' = Format Skipping - line goes out verbatim
6683 # 'HSC' = Hanging Side Comment - fix this hanging side comment
6684 # 'SBCX'= Static Block Comment Without Leading Space
6685 # 'SBC' = Static Block Comment
6686 # 'BC' = Block Comment - an ordinary full line comment
6687 # 'IO' = Indent Only - line goes out unchanged except for indentation
6688 # 'NIN' = No Internal Newlines - line does not get broken
6689 # 'VER' = VERSION statement
6690 # '' = ordinary line of code with no restrictions
6693 foreach my $line_of_tokens ( @{$rlines} ) {
6695 my $line_type = $line_of_tokens->{_line_type};
6697 my $Last_line_had_side_comment = $has_side_comment;
6698 if ($has_side_comment) {
6699 push @ix_side_comments, $ix_line - 1;
6700 $has_side_comment = 0;
6703 my $last_CODE_type = $CODE_type;
6704 $CODE_type = EMPTY_STRING;
6706 if ( $line_type ne 'CODE' ) {
6710 my $Klast_prev = $Klast;
6712 my $rK_range = $line_of_tokens->{_rK_range};
6713 ( $Kfirst, $Klast ) = @{$rK_range};
6715 my $input_line = $line_of_tokens->{_line_text};
6716 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
6718 my $is_block_comment = 0;
6719 if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
6720 if ( $jmax == 0 ) { $is_block_comment = 1; }
6721 else { $has_side_comment = 1 }
6724 # Write line verbatim if we are in a formatting skip section
6725 if ($In_format_skipping_section) {
6727 # Note: extra space appended to comment simplifies pattern matching
6731 # optional fast pre-check
6732 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
6733 || $rOpts_format_skipping_end )
6735 && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
6736 /$format_skipping_pattern_end/
6739 $In_format_skipping_section = 0;
6740 my $input_line_no = $line_of_tokens->{_line_number};
6741 write_logfile_entry(
6742 "Line $input_line_no: Exiting format-skipping section\n");
6748 # Check for a continued quote..
6749 if ( $line_of_tokens->{_starting_in_quote} ) {
6751 # A line which is entirely a quote or pattern must go out
6752 # verbatim. Note: the \n is contained in $input_line.
6754 if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
6755 my $input_line_number = $line_of_tokens->{_line_number};
6756 $self->note_embedded_tab($input_line_number);
6763 # See if we are entering a formatting skip section
6767 # optional fast pre-check
6768 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
6769 || $rOpts_format_skipping_begin )
6771 && $rOpts_format_skipping
6772 && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
6773 /$format_skipping_pattern_begin/
6776 $In_format_skipping_section = 1;
6777 my $input_line_no = $line_of_tokens->{_line_number};
6778 write_logfile_entry(
6779 "Line $input_line_no: Entering format-skipping section\n");
6784 # ignore trailing blank tokens (they will get deleted later)
6785 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
6796 if ($is_block_comment) {
6798 # see if this is a static block comment (starts with ## by default)
6799 my $is_static_block_comment = 0;
6800 my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
6803 # optional fast pre-check
6805 substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
6806 || $rOpts_static_block_comment_prefix
6809 && $rOpts_static_block_comments
6810 && $input_line =~ /$static_block_comment_pattern/
6813 $is_static_block_comment = 1;
6816 # Check for comments which are line directives
6817 # Treat exactly as static block comments without leading space
6818 # reference: perlsyn, near end, section Plain Old Comments (Not!)
6819 # example: '# line 42 "new_filename.plx"'
6822 && $input_line =~ /^\# \s*
6824 (?:\s("?)([^"]+)\2)? \s*
6828 $is_static_block_comment = 1;
6831 # look for hanging side comment ...
6833 $Last_line_had_side_comment # last line had side comment
6834 && !$no_leading_space # there is some leading space
6836 $is_static_block_comment # do not make static comment hanging
6840 # continuing an existing HSC chain?
6841 if ( $last_CODE_type eq 'HSC' ) {
6842 $has_side_comment = 1;
6847 # starting a new HSC chain?
6850 $rOpts->{'hanging-side-comments'} # user is allowing
6851 # hanging side comments
6854 && ( defined($Klast_prev) && $Klast_prev > 1 )
6856 # and the previous side comment was not static (issue c070)
6858 $rOpts->{'static-side-comments'}
6859 && $rLL->[$Klast_prev]->[_TOKEN_] =~
6860 /$static_side_comment_pattern/
6866 # and it is not a closing side comment (issue c070).
6867 my $K_penult = $Klast_prev - 1;
6868 $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
6870 ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
6871 && $rLL->[$K_penult]->[_TYPE_] eq '}'
6872 && $rLL->[$Klast_prev]->[_TOKEN_] =~
6873 /$closing_side_comment_prefix_pattern/ );
6875 if ( !$follows_csc ) {
6876 $has_side_comment = 1;
6883 if ($is_static_block_comment) {
6884 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
6887 elsif ($Last_line_had_side_comment
6888 && !$rOpts_maximum_consecutive_blank_lines
6889 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
6891 # Emergency fix to keep a block comment from becoming a hanging
6892 # side comment. This fix is for the case that blank lines
6893 # cannot be inserted. There is related code in sub
6894 # 'process_line_of_CODE'
6895 $CODE_type = 'SBCX';
6904 # End of comments. Handle a line of normal code:
6906 if ($rOpts_indent_only) {
6911 if ( !$rOpts_add_newlines ) {
6916 # Patch needed for MakeMaker. Do not break a statement
6917 # in which $VERSION may be calculated. See MakeMaker.pm;
6918 # this is based on the coding in it.
6919 # The first line of a file that matches this will be eval'd:
6920 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
6922 # *VERSION = \'1.01';
6923 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
6924 # We will pass such a line straight through without breaking
6925 # it unless -npvl is used.
6927 # Patch for problem reported in RT #81866, where files
6928 # had been flattened into a single line and couldn't be
6929 # tidied without -npvl. There are two parts to this patch:
6930 # First, it is not done for a really long line (80 tokens for now).
6931 # Second, we will only allow up to one semicolon
6932 # before the VERSION. We need to allow at least one semicolon
6933 # for statements like this:
6934 # require Exporter; our $VERSION = $Exporter::VERSION;
6935 # where both statements must be on a single line for MakeMaker
6937 if ( !$Saw_VERSION_in_this_file
6940 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
6942 $Saw_VERSION_in_this_file = 1;
6943 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
6945 # This code type has lower priority than others
6951 $line_of_tokens->{_code_type} = $CODE_type;
6954 if ($has_side_comment) {
6955 push @ix_side_comments, $ix_line;
6958 return \@ix_side_comments;
6959 } ## end sub set_CODE_type
6961 sub find_non_indenting_braces {
6963 my ( $self, $rix_side_comments ) = @_;
6964 return unless ( $rOpts->{'non-indenting-braces'} );
6965 my $rLL = $self->[_rLL_];
6966 return unless ( defined($rLL) && @{$rLL} );
6967 my $rlines = $self->[_rlines_];
6968 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6969 my $rseqno_non_indenting_brace_by_ix =
6970 $self->[_rseqno_non_indenting_brace_by_ix_];
6972 foreach my $ix ( @{$rix_side_comments} ) {
6973 my $line_of_tokens = $rlines->[$ix];
6974 my $line_type = $line_of_tokens->{_line_type};
6975 if ( $line_type ne 'CODE' ) {
6978 DEVEL_MODE && Fault("unexpected line_type=$line_type\n");
6981 my $rK_range = $line_of_tokens->{_rK_range};
6982 my ( $Kfirst, $Klast ) = @{$rK_range};
6983 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
6986 DEVEL_MODE && Fault("did not get a comment\n");
6989 next unless ( $Klast > $Kfirst ); # maybe HSC
6990 my $token_sc = $rLL->[$Klast]->[_TOKEN_];
6991 my $K_m = $Klast - 1;
6992 my $type_m = $rLL->[$K_m]->[_TYPE_];
6993 if ( $type_m eq 'b' && $K_m > $Kfirst ) {
6995 $type_m = $rLL->[$K_m]->[_TYPE_];
6997 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
6999 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
7001 # The pattern ends in \s but we have removed the newline, so
7002 # we added it back for the match. That way we require an exact
7003 # match to the special string and also allow additional text.
7006 && $is_opening_type{$type_m}
7007 && $token_sc =~ /$non_indenting_brace_pattern/ )
7009 $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
7014 } ## end sub find_non_indenting_braces
7016 sub delete_side_comments {
7017 my ( $self, $rix_side_comments ) = @_;
7019 # Given a list of indexes of lines with side comments, handle any
7020 # requested side comment deletions.
7022 my $rLL = $self->[_rLL_];
7023 my $rlines = $self->[_rlines_];
7024 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
7025 my $rseqno_non_indenting_brace_by_ix =
7026 $self->[_rseqno_non_indenting_brace_by_ix_];
7028 foreach my $ix ( @{$rix_side_comments} ) {
7029 my $line_of_tokens = $rlines->[$ix];
7030 my $line_type = $line_of_tokens->{_line_type};
7032 # This fault shouldn't happen because we only saved CODE lines with
7033 # side comments in the TASK 1 loop above.
7034 if ( $line_type ne 'CODE' ) {
7038 Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
7044 my $CODE_type = $line_of_tokens->{_code_type};
7045 my $rK_range = $line_of_tokens->{_rK_range};
7046 my ( $Kfirst, $Klast ) = @{$rK_range};
7048 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
7052 Did not find side comment near line $lno while deleting side comments
7058 my $delete_side_comment =
7059 $rOpts_delete_side_comments
7060 && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
7062 || $CODE_type eq 'HSC'
7063 || $CODE_type eq 'IO'
7064 || $CODE_type eq 'NIN' );
7066 # Do not delete special control side comments
7067 if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
7068 $delete_side_comment = 0;
7072 $rOpts_delete_closing_side_comments
7073 && !$delete_side_comment
7076 || $CODE_type eq 'HSC'
7077 || $CODE_type eq 'IO'
7078 || $CODE_type eq 'NIN' )
7081 my $token = $rLL->[$Klast]->[_TOKEN_];
7082 my $K_m = $Klast - 1;
7083 my $type_m = $rLL->[$K_m]->[_TYPE_];
7084 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
7085 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
7087 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
7089 && $token =~ /$closing_side_comment_prefix_pattern/
7090 && $block_type_m =~ /$closing_side_comment_list_pattern/ )
7092 $delete_side_comment = 1;
7095 } ## end if ( $rOpts_delete_closing_side_comments...)
7097 if ($delete_side_comment) {
7099 # We are actually just changing the side comment to a blank.
7100 # This may produce multiple blanks in a row, but sub respace_tokens
7101 # will check for this and fix it.
7102 $rLL->[$Klast]->[_TYPE_] = 'b';
7103 $rLL->[$Klast]->[_TOKEN_] = SPACE;
7105 # The -io option outputs the line text, so we have to update
7106 # the line text so that the comment does not reappear.
7107 if ( $CODE_type eq 'IO' ) {
7108 my $line = EMPTY_STRING;
7109 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
7110 $line .= $rLL->[$KK]->[_TOKEN_];
7113 $line_of_tokens->{_line_text} = $line . "\n";
7116 # If we delete a hanging side comment the line becomes blank.
7117 if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
7121 } ## end sub delete_side_comments
7125 my $rlines = $self->[_rlines_];
7126 foreach my $line ( @{$rlines} ) {
7127 my $input_line = $line->{_line_text};
7128 $self->write_unindented_line($input_line);
7131 } ## end sub dump_verbatim
7137 my %is_nonlist_keyword;
7138 my %is_nonlist_type;
7140 my %is_unexpected_equals;
7144 # added 'U' to fix cases b1125 b1126 b1127
7146 @{wU}{@q} = (1) x scalar(@q);
7148 @q = qw(w i q Q G C Z);
7149 @{wiq}{@q} = (1) x scalar(@q);
7152 @{is_wit}{@q} = (1) x scalar(@q);
7155 @{is_sigil}{@q} = (1) x scalar(@q);
7157 # Parens following these keywords will not be marked as lists. Note that
7158 # 'for' is not included and is handled separately, by including 'f' in the
7159 # hash %is_counted_type, since it may or may not be a c-style for loop.
7160 @q = qw( if elsif unless and or );
7161 @is_nonlist_keyword{@q} = (1) x scalar(@q);
7163 # Parens following these types will not be marked as lists
7165 @is_nonlist_type{@q} = (1) x scalar(@q);
7168 @is_s_y_m_slash{@q} = (1) x scalar(@q);
7171 @is_unexpected_equals{@q} = (1) x scalar(@q);
7175 { #<<< begin clousure respace_tokens
7177 my $rLL_new; # This will be the new array of tokens
7179 # These are variables in $self
7181 my $length_function;
7182 my $is_encoded_data;
7184 my $K_closing_ternary;
7185 my $K_opening_ternary;
7186 my $rchildren_of_seqno;
7187 my $rhas_broken_code_block;
7188 my $rhas_broken_list;
7189 my $rhas_broken_list_with_lec;
7190 my $rhas_code_block;
7193 my $ris_assigned_structure;
7194 my $ris_broken_container;
7195 my $ris_excluded_lp_container;
7196 my $ris_list_by_seqno;
7197 my $ris_permanently_broken;
7198 my $rlec_count_by_seqno;
7200 my $rparent_of_seqno;
7201 my $rtype_count_by_seqno;
7202 my $rblock_type_of_seqno;
7204 my $K_opening_container;
7205 my $K_closing_container;
7207 my %K_first_here_doc_by_seqno;
7209 my $last_nonblank_code_type;
7210 my $last_nonblank_code_token;
7211 my $last_nonblank_block_type;
7212 my $last_last_nonblank_code_type;
7213 my $last_last_nonblank_code_token;
7216 my %K_old_opening_by_seqno;
7220 my $cumulative_length;
7222 # Variables holding the current line info
7229 my $rwhitespace_flags;
7231 sub initialize_respace_tokens_closure {
7235 $rLL_new = []; # This is the new array
7237 $rLL = $self->[_rLL_];
7238 $length_function = $self->[_length_function_];
7239 $is_encoded_data = $self->[_is_encoded_data_];
7241 $K_closing_ternary = $self->[_K_closing_ternary_];
7242 $K_opening_ternary = $self->[_K_opening_ternary_];
7243 $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
7244 $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
7245 $rhas_broken_list = $self->[_rhas_broken_list_];
7246 $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
7247 $rhas_code_block = $self->[_rhas_code_block_];
7248 $rhas_list = $self->[_rhas_list_];
7249 $rhas_ternary = $self->[_rhas_ternary_];
7250 $ris_assigned_structure = $self->[_ris_assigned_structure_];
7251 $ris_broken_container = $self->[_ris_broken_container_];
7252 $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
7253 $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
7254 $ris_permanently_broken = $self->[_ris_permanently_broken_];
7255 $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
7256 $roverride_cab3 = $self->[_roverride_cab3_];
7257 $rparent_of_seqno = $self->[_rparent_of_seqno_];
7258 $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
7259 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
7261 %K_first_here_doc_by_seqno = ();
7263 $last_nonblank_code_type = ';';
7264 $last_nonblank_code_token = ';';
7265 $last_nonblank_block_type = EMPTY_STRING;
7266 $last_last_nonblank_code_type = ';';
7267 $last_last_nonblank_code_token = ';';
7270 %K_old_opening_by_seqno = (); # Note: old K index
7272 $depth_next_max = 0;
7274 # we will be setting token lengths as we go
7275 $cumulative_length = 0;
7277 $Ktoken_vars = undef; # the old K value of $rtoken_vars
7278 $Kfirst_old = undef; # min K of old line
7279 $Klast_old = undef; # max K of old line
7280 $Klast_old_code = undef; # K of last token if side comment
7281 $CODE_type = EMPTY_STRING;
7283 # Set the whitespace flags, which indicate the token spacing preference.
7284 $rwhitespace_flags = $self->set_whitespace_flags();
7286 # Note that $K_opening_container and $K_closing_container have values
7287 # defined in sub get_line() for the previous K indexes. They were needed
7288 # in case option 'indent-only' was set, and we didn't get here. We no
7289 # longer need those and will eliminate them now to avoid any possible
7290 # mixing of old and new values. This must be done AFTER the call to
7291 # set_whitespace_flags, which needs these.
7292 $K_opening_container = $self->[_K_opening_container_] = {};
7293 $K_closing_container = $self->[_K_closing_container_] = {};
7297 } ## end sub initialize_respace_tokens_closure
7299 sub respace_tokens {
7303 #--------------------------------------------------------------------------
7304 # This routine is called once per file to do as much formatting as possible
7305 # before new line breaks are set.
7306 #--------------------------------------------------------------------------
7308 # Return parameters:
7309 # Set $severe_error=true if processing must terminate immediately
7310 my ( $severe_error, $rqw_lines );
7312 # We change any spaces in --indent-only mode
7313 if ( $rOpts->{'indent-only'} ) {
7315 # We need to define lengths for -indent-only to avoid undefs, even
7316 # though these values are not actually needed for option --indent-only.
7318 $rLL = $self->[_rLL_];
7319 $length_function = $self->[_length_function_];
7320 $cumulative_length = 0;
7322 foreach my $item ( @{$rLL} ) {
7323 my $token = $item->[_TOKEN_];
7324 my $token_length = $length_function->($token);
7325 $cumulative_length += $token_length;
7326 $item->[_TOKEN_LENGTH_] = $token_length;
7327 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
7330 return ( $severe_error, $rqw_lines );
7333 # This routine makes all necessary and possible changes to the tokenization
7334 # after the initial tokenization of the file. This is a tedious routine,
7335 # but basically it consists of inserting and deleting whitespace between
7336 # nonblank tokens according to the selected parameters. In a few cases
7337 # non-space characters are added, deleted or modified.
7339 # The goal of this routine is to create a new token array which only needs
7340 # the definition of new line breaks and padding to complete formatting. In
7341 # a few cases we have to cheat a little to achieve this goal. In
7342 # particular, we may not know if a semicolon will be needed, because it
7343 # depends on how the line breaks go. To handle this, we include the
7344 # semicolon as a 'phantom' which can be displayed as normal or as an empty
7347 # Method: The old tokens are copied one-by-one, with changes, from the old
7348 # linear storage array $rLL to a new array $rLL_new.
7350 # (re-)initialize closure variables for this problem
7351 $self->initialize_respace_tokens_closure();
7353 #--------------------------------
7354 # Main over all lines of the file
7355 #--------------------------------
7356 my $rlines = $self->[_rlines_];
7357 my $line_type = EMPTY_STRING;
7360 foreach my $line_of_tokens ( @{$rlines} ) {
7362 my $input_line_number = $line_of_tokens->{_line_number};
7363 my $last_line_type = $line_type;
7364 $line_type = $line_of_tokens->{_line_type};
7365 next unless ( $line_type eq 'CODE' );
7366 $CODE_type = $line_of_tokens->{_code_type};
7368 if ( $CODE_type eq 'BL' ) {
7369 my $seqno = $seqno_stack{ $depth_next - 1 };
7370 if ( defined($seqno) ) {
7371 $self->[_rblank_and_comment_count_]->{$seqno} += 1;
7372 $self->set_permanently_broken($seqno)
7373 if (!$ris_permanently_broken->{$seqno}
7374 && $rOpts_maximum_consecutive_blank_lines );
7378 my $rK_range = $line_of_tokens->{_rK_range};
7379 my ( $Kfirst, $Klast ) = @{$rK_range};
7380 next unless defined($Kfirst);
7381 ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
7382 $Klast_old_code = $Klast_old;
7384 # Be sure an old K value is defined for sub store_token
7385 $Ktoken_vars = $Kfirst;
7387 # Check for correct sequence of token indexes...
7388 # An error here means that sub write_line() did not correctly
7389 # package the tokenized lines as it received them. If we
7390 # get a fault here it has not output a continuous sequence
7391 # of K values. Or a line of CODE may have been mis-marked as
7392 # something else. There is no good way to continue after such an
7394 if ( defined($last_K_out) ) {
7395 if ( $Kfirst != $last_K_out + 1 ) {
7397 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
7400 return ( $severe_error, $rqw_lines );
7405 # The first token should always have been given index 0 by sub
7407 if ( $Kfirst != 0 ) {
7408 Fault("Program Bug: first K is $Kfirst but should be 0");
7411 $last_K_out = $Klast;
7413 # Handle special lines of code
7414 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
7416 # CODE_types are as follows.
7418 # 'VB' = Verbatim - line goes out verbatim
7419 # 'FS' = Format Skipping - line goes out verbatim, no blanks
7420 # 'IO' = Indent Only - only indentation may be changed
7421 # 'NIN' = No Internal Newlines - line does not get broken
7422 # 'HSC'=Hanging Side Comment - fix this hanging side comment
7423 # 'BC'=Block Comment - an ordinary full line comment
7424 # 'SBC'=Static Block Comment - a block comment which does not get
7426 # 'SBCX'=Static Block Comment Without Leading Space
7427 # 'VER'=VERSION statement
7428 # '' or (undefined) - no restructions
7430 # For a hanging side comment we insert an empty quote before
7431 # the comment so that it becomes a normal side comment and
7432 # will be aligned by the vertical aligner
7433 if ( $CODE_type eq 'HSC' ) {
7435 # Safety Check: This must be a line with one token (a comment)
7436 my $rvars_Kfirst = $rLL->[$Kfirst];
7437 if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) {
7439 # Note that even if the flag 'noadd-whitespace' is set, we
7440 # will make an exception here and allow a blank to be
7441 # inserted to push the comment to the right. We can think
7442 # of this as an adjustment of indentation rather than
7443 # whitespace between tokens. This will also prevent the
7444 # hanging side comment from getting converted to a block
7445 # comment if whitespace gets deleted, as for example with
7446 # the -extrude and -mangle options.
7448 copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
7449 $self->store_token($rcopy);
7450 $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
7451 $self->store_token($rcopy);
7452 $self->store_token($rvars_Kfirst);
7457 # This line was mis-marked by sub scan_comment. Catch in
7458 # DEVEL_MODE, otherwise try to repair and keep going.
7460 "Program bug. A hanging side comment has been mismarked"
7463 $CODE_type = EMPTY_STRING;
7464 $line_of_tokens->{_code_type} = $CODE_type;
7468 # Copy tokens unchanged
7469 foreach my $KK ( $Kfirst .. $Klast ) {
7471 $self->store_token( $rLL->[$KK] );
7476 # Handle normal line..
7478 # Define index of last token before any side comment for comma counts
7479 my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
7480 if ( ( $type_end eq '#' || $type_end eq 'b' )
7481 && $Klast_old_code > $Kfirst_old )
7484 if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
7485 && $Klast_old_code > $Kfirst_old )
7491 # Insert any essential whitespace between lines
7492 # if last line was normal CODE.
7493 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
7494 # because comments may disappear.
7495 # Note that we must do this even if --noadd-whitespace is set
7496 if ( $last_line_type eq 'CODE' ) {
7497 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
7498 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
7500 is_essential_whitespace(
7501 $last_last_nonblank_code_token,
7502 $last_last_nonblank_code_type,
7503 $last_nonblank_code_token,
7504 $last_nonblank_code_type,
7510 $self->store_space();
7514 #-----------------------------------------------
7515 # Inner loop to respace tokens on a line of code
7516 #-----------------------------------------------
7518 # The inner loop is in a separate sub for clarity
7519 $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number );
7523 # finalize data structures
7524 $self->respace_post_loop_ops();
7526 # Reset memory to be the new array
7527 $self->[_rLL_] = $rLL_new;
7529 if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
7530 $self->[_Klimit_] = $Klimit;
7532 # During development, verify that the new array still looks okay.
7533 DEVEL_MODE && $self->check_token_array();
7535 # update the token limits of each line
7536 ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens();
7538 return ( $severe_error, $rqw_lines );
7539 } ## end sub respace_tokens
7541 sub respace_tokens_inner_loop {
7543 my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
7545 #-----------------------------------------------------------------
7546 # Loop to copy all tokens on one line, making any spacing changes,
7547 # while also collecting information needed by later subs.
7548 #-----------------------------------------------------------------
7549 foreach my $KK ( $Kfirst .. $Klast ) {
7551 # TODO: consider eliminating this closure var by passing directly to
7552 # store_token following pattern of store_tokens_to_go.
7555 my $rtoken_vars = $rLL->[$KK];
7556 my $type = $rtoken_vars->[_TYPE_];
7558 # Handle a blank space ...
7559 if ( $type eq 'b' ) {
7561 # Delete it if not wanted by whitespace rules
7562 # or we are deleting all whitespace
7563 # Note that whitespace flag is a flag indicating whether a
7564 # white space BEFORE the token is needed
7565 next if ( $KK >= $Klast ); # skip terminal blank
7566 my $Knext = $KK + 1;
7568 if ($rOpts_freeze_whitespace) {
7569 $self->store_token($rtoken_vars);
7573 my $ws = $rwhitespace_flags->[$Knext];
7575 || $rOpts_delete_old_whitespace )
7578 my $token_next = $rLL->[$Knext]->[_TOKEN_];
7579 my $type_next = $rLL->[$Knext]->[_TYPE_];
7581 my $do_not_delete = is_essential_whitespace(
7582 $last_last_nonblank_code_token,
7583 $last_last_nonblank_code_type,
7584 $last_nonblank_code_token,
7585 $last_nonblank_code_type,
7590 # Note that repeated blanks will get filtered out here
7591 next unless ($do_not_delete);
7594 # make it just one character
7595 $rtoken_vars->[_TOKEN_] = SPACE;
7596 $self->store_token($rtoken_vars);
7600 my $token = $rtoken_vars->[_TOKEN_];
7602 # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? :
7603 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
7606 if ( $is_closing_token{$token} ) {
7608 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
7609 my $block_type = $rblock_type_of_seqno->{$type_sequence};
7611 #---------------------------------------------
7612 # check for semicolon addition in a code block
7613 #---------------------------------------------
7616 # if not preceded by a ';' ..
7617 if ( $last_nonblank_code_type ne ';' ) {
7619 # tentatively insert a semicolon if appropriate
7620 $self->add_phantom_semicolon($KK)
7621 if $rOpts->{'add-semicolons'};
7625 #----------------------------------------------------------
7626 # check for addition/deletion of a trailing comma in a list
7627 #----------------------------------------------------------
7630 # if this is a list ..
7631 my $rtype_count = $rtype_count_by_seqno->{$type_sequence};
7633 && $rtype_count->{','}
7634 && !$rtype_count->{';'}
7635 && !$rtype_count->{'f'} )
7638 # if NOT preceded by a comma..
7639 if ( $last_nonblank_code_type ne ',' ) {
7641 # insert a comma if requested
7642 if ( $rOpts_add_trailing_commas
7643 && %trailing_comma_rules )
7645 $self->add_trailing_comma( $KK, $Kfirst,
7646 $trailing_comma_rules{$token} );
7650 # if preceded by a comma ..
7653 # delete a trailing comma if requested
7655 if ( $rOpts_delete_trailing_commas
7656 && %trailing_comma_rules )
7659 $self->delete_trailing_comma( $KK, $Kfirst,
7660 $trailing_comma_rules{$token} );
7663 # delete a weld-interfering comma if requested
7665 && $rOpts_delete_weld_interfering_commas
7666 && $is_closing_type{
7667 $last_last_nonblank_code_type} )
7669 $self->delete_weld_interfering_comma($KK);
7677 # Modify certain tokens here for whitespace
7678 # The following is not yet done, but could be:
7680 # ( $type =~ /^[wit]$/ )
7681 elsif ( $is_wit{$type} ) {
7683 # change '$ var' to '$var' etc
7684 # change '@ ' to '@'
7685 # Examples: <<snippets/space1.in>>
7686 my $ord = ord( substr( $token, 1, 1 ) );
7689 # quick test for possible blank at second char
7690 $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
7691 || $ord > ORD_PRINTABLE_MAX )
7694 my ( $sigil, $word ) = split /\s+/, $token, 2;
7696 # $sigil =~ /^[\$\&\%\*\@]$/ )
7697 if ( $is_sigil{$sigil} ) {
7699 $token .= $word if ( defined($word) ); # fix c104
7700 $rtoken_vars->[_TOKEN_] = $token;
7704 # Trim certain spaces in identifiers
7705 if ( $type eq 'i' ) {
7707 if ( $token =~ /$SUB_PATTERN/ ) {
7709 # -spp = 0 : no space before opening prototype paren
7710 # -spp = 1 : stable (follow input spacing)
7711 # -spp = 2 : always space before opening prototype paren
7712 if ( !defined($rOpts_space_prototype_paren)
7713 || $rOpts_space_prototype_paren == 1 )
7717 elsif ( $rOpts_space_prototype_paren == 0 ) {
7718 $token =~ s/\s+\(/\(/;
7720 elsif ( $rOpts_space_prototype_paren == 2 ) {
7724 # one space max, and no tabs
7725 $token =~ s/\s+/ /g;
7726 $rtoken_vars->[_TOKEN_] = $token;
7728 $self->[_ris_special_identifier_token_]->{$token} = 'sub';
7732 # clean up spaces in package identifiers, like
7733 # "package Bob::Dog;"
7734 elsif ( substr( $token, 0, 7 ) eq 'package'
7735 && $token =~ /^package\s/ )
7737 $token =~ s/\s+/ /g;
7738 $rtoken_vars->[_TOKEN_] = $token;
7740 $self->[_ris_special_identifier_token_]->{$token} =
7745 # trim identifiers of trailing blanks which can occur
7746 # under some unusual circumstances, such as if the
7747 # identifier 'witch' has trailing blanks on input here:
7751 # () # prototype may be on new line ...
7753 my $ord_ch = ord( substr( $token, -1, 1 ) );
7756 # quick check for possible ending space
7757 $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
7758 || $ord_ch > ORD_PRINTABLE_MAX )
7761 $token =~ s/\s+$//g;
7762 $rtoken_vars->[_TOKEN_] = $token;
7768 elsif ( $type eq ';' ) {
7770 # Remove unnecessary semicolons, but not after bare
7771 # blocks, where it could be unsafe if the brace is
7774 $rOpts->{'delete-semicolons'}
7777 $last_nonblank_block_type
7778 && $last_nonblank_code_type eq '}'
7780 $is_block_without_semicolon{
7781 $last_nonblank_block_type}
7782 || $last_nonblank_block_type =~ /$SUB_PATTERN/
7783 || $last_nonblank_block_type =~ /^\w+:$/
7786 || $last_nonblank_code_type eq ';'
7791 # This looks like a deletable semicolon, but even if a
7792 # semicolon can be deleted it is not necessarily best to do
7793 # so. We apply these additional rules for deletion:
7794 # - Always ok to delete a ';' at the end of a line
7795 # - Never delete a ';' before a '#' because it would
7796 # promote it to a block comment.
7797 # - If a semicolon is not at the end of line, then only
7798 # delete if it is followed by another semicolon or closing
7799 # token. This includes the comment rule. It may take
7800 # two passes to get to a final state, but it is a little
7801 # safer. For example, keep the first semicolon here:
7802 # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
7803 # It is not required but adds some clarity.
7804 my $ok_to_delete = 1;
7805 if ( $KK < $Klast ) {
7806 my $Kn = $self->K_next_nonblank($KK);
7807 if ( defined($Kn) && $Kn <= $Klast ) {
7808 my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_];
7809 $ok_to_delete = $next_nonblank_token_type eq ';'
7810 || $next_nonblank_token_type eq '}';
7814 # do not delete only nonblank token in a file
7816 my $Kp = $self->K_previous_code( undef, $rLL_new );
7817 my $Kn = $self->K_next_nonblank($KK);
7818 $ok_to_delete = defined($Kn) || defined($Kp);
7821 if ($ok_to_delete) {
7822 $self->note_deleted_semicolon($input_line_number);
7826 write_logfile_entry("Extra ';'\n");
7831 # Old patch to add space to something like "x10".
7832 # Note: This is now done in the Tokenizer, but this code remains
7834 elsif ( $type eq 'n' ) {
7835 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
7837 $rtoken_vars->[_TOKEN_] = $token;
7840 Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
7846 # check for a qw quote
7847 elsif ( $type eq 'q' ) {
7849 # trim blanks from right of qw quotes
7850 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
7853 $rtoken_vars->[_TOKEN_] = $token;
7854 if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
7855 $self->note_embedded_tab($input_line_number);
7857 if ( $rwhitespace_flags->[$KK] == WS_YES
7859 && $rLL_new->[-1]->[_TYPE_] ne 'b'
7860 && $rOpts_add_whitespace )
7862 $self->store_space();
7864 $self->store_token($rtoken_vars);
7866 } ## end if ( $type eq 'q' )
7868 # delete repeated commas if requested
7869 elsif ( $type eq ',' ) {
7870 if ( $last_nonblank_code_type eq ','
7871 && $rOpts->{'delete-repeated-commas'} )
7873 # Could note this deletion as a possible future update:
7874 ## $self->note_deleted_comma($input_line_number);
7878 # remember input line index of first comma if -wtc is used
7879 if (%trailing_comma_rules) {
7880 my $seqno = $seqno_stack{ $depth_next - 1 };
7881 if ( defined($seqno)
7882 && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
7885 $self->[_rfirst_comma_line_index_]->{$seqno} =
7886 $rtoken_vars->[_LINE_INDEX_];
7891 # change 'LABEL :' to 'LABEL:'
7892 elsif ( $type eq 'J' ) {
7894 $rtoken_vars->[_TOKEN_] = $token;
7897 # check a quote for problems
7898 elsif ( $type eq 'Q' ) {
7899 $self->check_Q( $KK, $Kfirst, $input_line_number )
7900 if ( $self->[_save_logfile_] );
7903 # Store this token with possible previous blank
7904 if ( $rwhitespace_flags->[$KK] == WS_YES
7906 && $rLL_new->[-1]->[_TYPE_] ne 'b'
7907 && $rOpts_add_whitespace )
7909 $self->store_space();
7911 $self->store_token($rtoken_vars);
7916 } ## end sub respace_tokens_inner_loop
7918 sub respace_post_loop_ops {
7922 # Walk backwards through the tokens, making forward links to sequence items.
7923 if ( @{$rLL_new} ) {
7925 foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) {
7926 $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
7927 if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
7929 $self->[_K_first_seq_item_] = $KNEXT;
7932 # Find and remember lists by sequence number
7934 foreach my $seqno ( keys %{$K_opening_container} ) {
7935 my $K_opening = $K_opening_container->{$seqno};
7936 next unless defined($K_opening);
7938 # code errors may leave undefined closing tokens
7939 my $K_closing = $K_closing_container->{$seqno};
7940 next unless defined($K_closing);
7942 my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
7943 my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
7944 my $line_diff = $lx_close - $lx_open;
7945 $ris_broken_container->{$seqno} = $line_diff;
7947 # See if this is a list
7949 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7951 my $comma_count = $rtype_count->{','};
7952 my $fat_comma_count = $rtype_count->{'=>'};
7953 my $semicolon_count = $rtype_count->{';'};
7954 if ( $rtype_count->{'f'} ) {
7955 $semicolon_count += $rtype_count->{'f'};
7956 $is_C_style_for{$seqno} = 1;
7959 # We will define a list to be a container with one or more commas
7960 # and no semicolons. Note that we have included the semicolons
7961 # in a 'for' container in the semicolon count to keep c-style for
7962 # statements from being formatted as lists.
7963 if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
7966 # We need to do one more check for a parenthesized list:
7967 # At an opening paren following certain tokens, such as 'if',
7968 # we do not want to format the contents as a list.
7969 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
7970 my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
7971 if ( defined($Kp) ) {
7972 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7973 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
7976 ? !$is_nonlist_keyword{$token_p}
7977 : !$is_nonlist_type{$type_p};
7983 # Look for a block brace marked as uncertain. If the tokenizer thinks
7984 # its guess is uncertain for the type of a brace following an unknown
7985 # bareword then it adds a trailing space as a signal. We can fix the
7986 # type here now that we have had a better look at the contents of the
7987 # container. This fixes case b1085. To find the corresponding code in
7988 # Tokenizer.pm search for 'b1085' with an editor.
7989 my $block_type = $rblock_type_of_seqno->{$seqno};
7990 if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
7992 # Always remove the trailing space
7993 $block_type =~ s/\s+$//;
7995 # Try to filter out parenless sub calls
7996 my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
7998 if ( defined($Knn1) ) {
7999 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
8001 my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
8002 my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
8004 # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
8005 if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
8009 # Convert to a hash brace if it looks like it holds a list
8012 $block_type = EMPTY_STRING;
8014 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
8015 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
8018 $rblock_type_of_seqno->{$seqno} = $block_type;
8021 # Handle a list container
8022 if ( $is_list && !$block_type ) {
8023 $ris_list_by_seqno->{$seqno} = $seqno;
8024 my $seqno_parent = $rparent_of_seqno->{$seqno};
8026 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
8029 # for $rhas_list we need to save the minimum depth
8030 if ( !$rhas_list->{$seqno_parent}
8031 || $rhas_list->{$seqno_parent} > $depth )
8033 $rhas_list->{$seqno_parent} = $depth;
8037 $rhas_broken_list->{$seqno_parent} = 1;
8039 # Patch1: We need to mark broken lists with non-terminal
8040 # line-ending commas for the -bbx=2 parameter. This insures
8041 # that the list will stay broken. Otherwise the flag
8042 # -bbx=2 can be unstable. This fixes case b789 and b938.
8044 # Patch2: Updated to also require either one fat comma or
8045 # one more line-ending comma. Fixes cases b1069 b1070
8048 $rlec_count_by_seqno->{$seqno}
8049 && ( $rlec_count_by_seqno->{$seqno} > 1
8050 || $rtype_count_by_seqno->{$seqno}->{'=>'} )
8053 $rhas_broken_list_with_lec->{$seqno_parent} = 1;
8056 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
8060 # Handle code blocks ...
8061 # The -lp option needs to know if a container holds a code block
8062 elsif ( $block_type && $rOpts_line_up_parentheses ) {
8063 my $seqno_parent = $rparent_of_seqno->{$seqno};
8064 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
8065 $rhas_code_block->{$seqno_parent} = 1;
8066 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
8067 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
8072 # Find containers with ternaries, needed for -lp formatting.
8073 foreach my $seqno ( keys %{$K_opening_ternary} ) {
8074 my $seqno_parent = $rparent_of_seqno->{$seqno};
8075 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
8076 $rhas_ternary->{$seqno_parent} = 1;
8077 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
8081 # Turn off -lp for containers with here-docs with text within a container,
8082 # since they have their own fixed indentation. Fixes case b1081.
8083 if ($rOpts_line_up_parentheses) {
8084 foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
8085 my $Kh = $K_first_here_doc_by_seqno{$seqno};
8086 my $Kc = $K_closing_container->{$seqno};
8087 my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
8088 my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
8089 next if ( $line_Kh == $line_Kc );
8090 $ris_excluded_lp_container->{$seqno} = 1;
8094 # Set a flag to turn off -cab=3 in complex structures. Otherwise,
8095 # instability can occur. When it is overridden the behavior of the closest
8096 # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
8097 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
8098 foreach my $seqno ( keys %{$K_opening_container} ) {
8100 my $rtype_count = $rtype_count_by_seqno->{$seqno};
8101 next unless ( $rtype_count && $rtype_count->{'=>'} );
8103 # override -cab=3 if this contains a sub-list
8104 if ( !defined( $roverride_cab3->{$seqno} ) ) {
8105 if ( $rhas_list->{$seqno} ) {
8106 $roverride_cab3->{$seqno} = 2;
8109 # or if this is a sub-list of its parent container
8111 my $seqno_parent = $rparent_of_seqno->{$seqno};
8112 if ( defined($seqno_parent)
8113 && $ris_list_by_seqno->{$seqno_parent} )
8115 $roverride_cab3->{$seqno} = 2;
8122 # Add -ci to C-style for loops (issue c154)
8123 # This is much easier to do here than in the tokenizer.
8124 foreach my $seqno ( keys %is_C_style_for ) {
8125 my $K_opening = $K_opening_container->{$seqno};
8126 my $K_closing = $K_closing_container->{$seqno};
8127 my $type_last = 'f';
8128 for my $KK ( $K_opening + 1 .. $K_closing - 1 ) {
8129 $rLL_new->[$KK]->[_CI_LEVEL_] = $type_last eq 'f' ? 0 : 1;
8130 my $type = $rLL_new->[$KK]->[_TYPE_];
8131 if ( $type ne 'b' && $type ne '#' ) { $type_last = $type }
8136 } ## end sub respace_post_loop_ops
8138 sub set_permanently_broken {
8139 my ( $self, $seqno ) = @_;
8140 while ( defined($seqno) ) {
8141 $ris_permanently_broken->{$seqno} = 1;
8142 $seqno = $rparent_of_seqno->{$seqno};
8145 } ## end sub set_permanently_broken
8149 my ( $self, $item ) = @_;
8151 #------------------------------------------
8152 # Store one token during respace operations
8153 #------------------------------------------
8156 # $item = ref to a token
8158 # NOTE: this sub is called once per token so coding efficiency is critical.
8160 # The next multiple assignment statements are significantly faster than
8161 # doing them one-by-one.
8176 # Set the token length. Later it may be adjusted again if phantom or
8177 # ignoring side comment lengths.
8179 $is_encoded_data ? $length_function->($token) : length($token);
8182 if ( $type eq 'b' ) {
8184 # Do not output consecutive blanks. This situation should have been
8185 # prevented earlier, but it is worth checking because later routines
8186 # make this assumption.
8187 if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
8193 elsif ( $type eq '#' ) {
8195 # trim comments if necessary
8196 my $ord = ord( substr( $token, -1, 1 ) );
8199 && ( $ord < ORD_PRINTABLE_MIN
8200 || $ord > ORD_PRINTABLE_MAX )
8201 && $token =~ s/\s+$//
8204 $token_length = $length_function->($token);
8205 $item->[_TOKEN_] = $token;
8208 # Mark length of side comments as just 1 if sc lengths are ignored
8209 if ( $rOpts_ignore_side_comment_lengths
8210 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
8214 my $seqno = $seqno_stack{ $depth_next - 1 };
8215 if ( defined($seqno) ) {
8216 $self->[_rblank_and_comment_count_]->{$seqno} += 1
8217 if ( $CODE_type eq 'BC' );
8218 $self->set_permanently_broken($seqno)
8219 if !$ris_permanently_broken->{$seqno};
8223 # handle non-blanks and non-comments
8228 # check for a sequenced item (i.e., container or ?/:)
8229 if ($type_sequence) {
8231 # This will be the index of this item in the new array
8232 my $KK_new = @{$rLL_new};
8234 if ( $is_opening_token{$token} ) {
8236 $K_opening_container->{$type_sequence} = $KK_new;
8237 $block_type = $rblock_type_of_seqno->{$type_sequence};
8239 # Fix for case b1100: Count a line ending in ', [' as having
8240 # a line-ending comma. Otherwise, these commas can be hidden
8241 # with something like --opening-square-bracket-right
8242 if ( $last_nonblank_code_type eq ','
8243 && $Ktoken_vars == $Klast_old_code
8244 && $Ktoken_vars > $Kfirst_old )
8246 $rlec_count_by_seqno->{$type_sequence}++;
8249 if ( $last_nonblank_code_type eq '='
8250 || $last_nonblank_code_type eq '=>' )
8252 $ris_assigned_structure->{$type_sequence} =
8253 $last_nonblank_code_type;
8256 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
8257 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
8258 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
8259 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
8260 $seqno_stack{$depth_next} = $type_sequence;
8261 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
8264 if ( $depth_next > $depth_next_max ) {
8265 $depth_next_max = $depth_next;
8268 elsif ( $is_closing_token{$token} ) {
8270 $K_closing_container->{$type_sequence} = $KK_new;
8271 $block_type = $rblock_type_of_seqno->{$type_sequence};
8273 # Do not include terminal commas in counts
8274 if ( $last_nonblank_code_type eq ','
8275 || $last_nonblank_code_type eq '=>' )
8277 $rtype_count_by_seqno->{$type_sequence}
8278 ->{$last_nonblank_code_type}--;
8280 if ( $Ktoken_vars == $Kfirst_old
8281 && $last_nonblank_code_type eq ','
8282 && $rlec_count_by_seqno->{$type_sequence} )
8284 $rlec_count_by_seqno->{$type_sequence}--;
8288 # Update the stack...
8293 # For ternary, note parent but do not include as child
8294 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
8295 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
8296 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
8298 # These are not yet used but could be useful
8299 if ( $token eq '?' ) {
8300 $K_opening_ternary->{$type_sequence} = $KK_new;
8302 elsif ( $token eq ':' ) {
8303 $K_closing_ternary->{$type_sequence} = $KK_new;
8307 # We really shouldn't arrive here, just being cautious:
8308 # The only sequenced types output by the tokenizer are the
8309 # opening & closing containers and the ternary types. Each
8310 # of those was checked above. So we would only get here
8311 # if the tokenizer has been changed to mark some other
8312 # tokens with sequence numbers.
8315 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
8322 # Remember the most recent two non-blank, non-comment tokens.
8323 # NOTE: the phantom semicolon code may change the output stack
8324 # without updating these values. Phantom semicolons are considered
8325 # the same as blanks for now, but future needs might change that.
8326 # See the related note in sub 'add_phantom_semicolon'.
8327 $last_last_nonblank_code_type = $last_nonblank_code_type;
8328 $last_last_nonblank_code_token = $last_nonblank_code_token;
8330 $last_nonblank_code_type = $type;
8331 $last_nonblank_code_token = $token;
8332 $last_nonblank_block_type = $block_type;
8334 # count selected types
8335 if ( $is_counted_type{$type} ) {
8336 my $seqno = $seqno_stack{ $depth_next - 1 };
8337 if ( defined($seqno) ) {
8338 $rtype_count_by_seqno->{$seqno}->{$type}++;
8340 # Count line-ending commas for -bbx
8341 if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
8342 $rlec_count_by_seqno->{$seqno}++;
8345 # Remember index of first here doc target
8346 if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
8347 my $KK_new = @{$rLL_new};
8348 $K_first_here_doc_by_seqno{$seqno} = $KK_new;
8354 # cumulative length is the length sum including this token
8355 $cumulative_length += $token_length;
8357 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
8358 $item->[_TOKEN_LENGTH_] = $token_length;
8360 # For reference, here is how to get the parent sequence number.
8361 # This is not used because it is slower than finding it on the fly
8362 # in sub parent_seqno_by_K:
8364 # my $seqno_parent =
8365 # $type_sequence && $is_opening_token{$token}
8366 # ? $seqno_stack{ $depth_next - 2 }
8367 # : $seqno_stack{ $depth_next - 1 };
8368 # my $KK = @{$rLL_new};
8369 # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
8371 # and finally, add this item to the new array
8372 push @{$rLL_new}, $item;
8374 } ## end sub store_token
8379 # Store a blank space in the new array
8380 # - but never start the array with a space
8381 # - and never store two consecutive spaces
8383 && $rLL_new->[-1]->[_TYPE_] ne 'b' )
8386 $ritem->[_TYPE_] = 'b';
8387 $ritem->[_TOKEN_] = SPACE;
8388 $ritem->[_TYPE_SEQUENCE_] = EMPTY_STRING;
8390 $ritem->[_LINE_INDEX_] =
8391 $rLL_new->[-1]->[_LINE_INDEX_];
8393 # The level and ci_level of newly created spaces should be the same
8394 # as the previous token. Otherwise the coding for the -lp option
8395 # can create a blinking state in some rare cases (see b1109, b1110).
8397 $rLL_new->[-1]->[_LEVEL_];
8398 $ritem->[_CI_LEVEL_] =
8399 $rLL_new->[-1]->[_CI_LEVEL_];
8401 $self->store_token($ritem);
8405 } ## end sub store_space
8407 sub add_phantom_semicolon {
8409 my ( $self, $KK ) = @_;
8411 # The token at old index $KK is a closing block brace, and not preceded
8412 # by a semicolon. Before we push it onto the new token list, we may
8413 # want to add a phantom semicolon which can be activated if the the
8414 # block is broken on output.
8416 # We are only adding semicolons for certain block types
8417 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8418 return unless ($type_sequence);
8419 my $block_type = $rblock_type_of_seqno->{$type_sequence};
8420 return unless ($block_type);
8422 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
8423 || $block_type =~ /^(sub|package)/
8424 || $block_type =~ /^\w+\:$/ );
8426 # Find the most recent token in the new token list
8427 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8428 return unless ( defined($Kp) ); # shouldn't happen except for bad input
8430 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
8431 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
8432 my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
8434 # Do not add a semicolon if...
8438 # it would follow a comment (and be isolated)
8441 # it follows a code block ( because they are not always wanted
8442 # there and may add clutter)
8443 || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
8445 # it would follow a label
8448 # it would be inside a 'format' statement (and cause syntax error)
8450 && $token_p =~ /format/ )
8454 # Do not add a semicolon if it would impede a weld with an immediately
8455 # following closing token...like this
8457 # ^--No semicolon can go here
8459 # look at the previous token... note use of the _NEW rLL array here,
8460 # but sequence numbers are invariant.
8461 my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
8463 # If it is also a CLOSING token we have to look closer...
8466 && $is_closing_token{$token_p}
8468 # we only need to look if there is just one inner container..
8469 && defined( $rchildren_of_seqno->{$type_sequence} )
8470 && @{ $rchildren_of_seqno->{$type_sequence} } == 1
8474 # Go back and see if the corresponding two OPENING tokens are also
8475 # together. Note that we are using the OLD K indexing here:
8476 my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
8477 if ( defined($K_outer_opening) ) {
8478 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
8479 if ( defined($K_nxt) ) {
8480 my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
8482 # Is the next token after the outer opening the same as
8483 # our inner closing (i.e. same sequence number)?
8484 # If so, do not insert a semicolon here.
8485 return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
8490 # We will insert an empty semicolon here as a placeholder. Later, if
8491 # it becomes the last token on a line, we will bring it to life. The
8492 # advantage of doing this is that (1) we just have to check line
8493 # endings, and (2) the phantom semicolon has zero width and therefore
8494 # won't cause needless breaks of one-line blocks.
8496 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
8497 && $want_left_space{';'} == WS_NO )
8500 # convert the blank into a semicolon..
8501 # be careful: we are working on the new stack top
8502 # on a token which has been stored.
8503 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
8505 # Convert the existing blank to:
8506 # a phantom semicolon for one_line_block option = 0 or 1
8507 # a real semicolon for one_line_block option = 2
8508 my $tok = EMPTY_STRING;
8510 if ( $rOpts_one_line_block_semicolons == 2 ) {
8515 $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
8516 $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
8517 $rLL_new->[$Ktop]->[_TYPE_] = ';';
8519 $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++;
8521 # NOTE: we are changing the output stack without updating variables
8522 # $last_nonblank_code_type, etc. Future needs might require that
8523 # those variables be updated here. For now, it seems ok to skip
8526 # Then store a new blank
8527 $self->store_token($rcopy);
8531 # Patch for issue c078: keep line indexes in order. If the top
8532 # token is a space that we are keeping (due to '-wls=';') then
8533 # we have to check that old line indexes stay in order.
8535 # instances in which side comments have been deleted and converted
8536 # into blanks, we may have filtered down multiple blanks into just
8537 # one. In that case the top blank may have a higher line number
8538 # than the previous nonblank token. Although the line indexes of
8539 # blanks are not really significant, we need to keep them in order
8540 # in order to pass error checks.
8541 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
8542 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
8543 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
8544 if ( $new_top_ix < $old_top_ix ) {
8545 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
8549 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
8550 $self->store_token($rcopy);
8553 } ## end sub add_phantom_semicolon
8555 sub add_trailing_comma {
8557 # Implement the --add-trailing-commas flag to the line end before index $KK:
8559 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
8562 # $KK = index of closing token in old ($rLL) token list
8563 # which starts a new line and is not preceded by a comma
8564 # $Kfirst = index of first token on the current line of input tokens
8565 # $add_flags = user control flags
8567 # For example, we might want to add a comma here:
8572 # _rebate => $rebate <------ location of possible bare comma
8574 # ^-------------------closing token at index $KK on new line
8576 # Do not add a comma if it would follow a comment
8577 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8578 return unless ( defined($Kp) );
8579 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
8580 return if ( $type_p eq '#' );
8582 # see if the user wants a trailing comma here
8584 $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
8585 $trailing_comma_rule, 1 );
8587 # if so, add a comma
8589 my $Knew = $self->store_new_token( ',', ',', $Kp );
8594 } ## end sub add_trailing_comma
8596 sub delete_trailing_comma {
8598 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
8600 # Apply the --delete-trailing-commas flag to the comma before index $KK
8603 # $KK = index of a closing token in OLD ($rLL) token list
8604 # which is preceded by a comma on the same line.
8605 # $Kfirst = index of first token on the current line of input tokens
8606 # $delete_option = user control flag
8608 # Returns true if the comma was deleted
8610 # For example, we might want to delete this comma:
8611 # my @asset = ("FASMX", "FASGX", "FASIX",);
8612 # | |^--------token at index $KK
8613 # | ^------comma of interest
8614 # ^-------------token at $Kfirst
8616 # Verify that the previous token is a comma. Note that we are working in
8617 # the new token list $rLL_new.
8618 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8619 return unless ( defined($Kp) );
8620 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
8622 # there must be a '#' between the ',' and closing token; give up.
8626 # Do not delete commas when formatting under stress to avoid instability.
8627 # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has
8628 # been found to work well for trailing commas.
8629 if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) {
8633 # See if the user wants this trailing comma
8635 $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
8636 $trailing_comma_rule, 0 );
8638 # Patch: the --noadd-whitespace flag can cause instability in complex
8639 # structures. In this case do not delete the comma. Fixes b1409.
8640 if ( !$match && !$rOpts_add_whitespace ) {
8641 my $Kn = $self->K_next_nonblank($KK);
8642 if ( defined($Kn) ) {
8643 my $type_n = $rLL->[$Kn]->[_TYPE_];
8644 if ( $type_n ne ';' && $type_n ne '#' ) { return }
8648 # If no match, delete it
8651 return $self->unstore_last_nonblank_token(',');
8655 } ## end sub delete_trailing_comma
8657 sub delete_weld_interfering_comma {
8659 my ( $self, $KK ) = @_;
8661 # Apply the flag '--delete-weld-interfering-commas' to the comma
8665 # $KK = index of a closing token in OLD ($rLL) token list
8666 # which is preceded by a comma on the same line.
8668 # Returns true if the comma was deleted
8670 # For example, we might want to delete this comma:
8672 # my $tmpl = { foo => {no_override => 1, default => 42}, };
8678 # index $KK is in the old $rLL array, but
8679 # indexes $Kp and $Kpp are in the new $rLL_new array.
8681 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8682 return unless ($type_sequence);
8684 # Find the previous token and verify that it is a comma.
8685 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8686 return unless ( defined($Kp) );
8687 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
8689 # it is not a comma, so give up ( it is probably a '#' )
8693 # This must be the only comma in this list
8694 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
8696 unless ( defined($rtype_count)
8697 && $rtype_count->{','}
8698 && $rtype_count->{','} == 1 );
8700 # Back up to the previous closing token
8701 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
8702 return unless ( defined($Kpp) );
8703 my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
8704 my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
8706 # The containers must be nesting (i.e., sequence numbers must differ by 1 )
8707 if ( $seqno_pp && $is_closing_type{$type_pp} ) {
8708 if ( $seqno_pp == $type_sequence + 1 ) {
8710 # remove the ',' from the top of the new token list
8711 return $self->unstore_last_nonblank_token(',');
8716 } ## end sub delete_weld_interfering_comma
8718 sub unstore_last_nonblank_token {
8720 my ( $self, $type ) = @_;
8722 # remove the most recent nonblank token from the new token list
8724 # $type = type to be removed (for safety check)
8726 # Returns true if success
8729 # This was written and is used for removing commas, but might
8730 # be useful for other tokens. If it is ever used for other tokens
8731 # then the issue of what to do about the other variables, such
8732 # as token counts and the '$last...' vars needs to be considered.
8734 # Safety check, shouldn't happen
8735 if ( @{$rLL_new} < 3 ) {
8736 DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n");
8740 my ( $rcomma, $rblank );
8742 # case 1: pop comma from top of stack
8743 if ( $rLL_new->[-1]->[_TYPE_] eq $type ) {
8744 $rcomma = pop @{$rLL_new};
8747 # case 2: pop blank and then comma from top of stack
8748 elsif ($rLL_new->[-1]->[_TYPE_] eq 'b'
8749 && $rLL_new->[-2]->[_TYPE_] eq $type )
8751 $rblank = pop @{$rLL_new};
8752 $rcomma = pop @{$rLL_new};
8755 # case 3: error, shouldn't happen unless bad call
8757 DEVEL_MODE && Fault("Could not find token type '$type' to remove\n");
8761 # A note on updating vars set by sub store_token for this comma: If we
8762 # reduce the comma count by 1 then we also have to change the variable
8763 # $last_nonblank_code_type to be $last_last_nonblank_code_type because
8764 # otherwise sub store_token is going to ALSO reduce the comma count.
8765 # Alternatively, we can leave the count alone and the
8766 # $last_nonblank_code_type alone. Then sub store_token will produce
8767 # the correct result. This is simpler and is done here.
8769 # Now add a blank space after the comma if appropriate.
8770 # Some unusual spacing controls might need another iteration to
8771 # reach a final state.
8772 if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
8773 if ( defined($rblank) ) {
8774 $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma
8775 push @{$rLL_new}, $rblank;
8779 } ## end sub unstore_last_nonblank_token
8781 sub match_trailing_comma_rule {
8783 my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_;
8785 # Decide if a trailing comma rule is matched.
8788 # $KK = index of closing token in old ($rLL) token list which follows
8789 # the location of a possible trailing comma. See diagram below.
8790 # $Kfirst = (old) index of first token on the current line of input tokens
8791 # $Kp = index of previous nonblank token in new ($rLL_new) array
8792 # $trailing_comma_rule = packed user control flags
8793 # $if_add = true if adding comma, false if deleteing comma
8799 # For example, we might be checking for addition of a comma here:
8804 # _rebate => $rebate <------ location of possible trailing comma
8806 # ^-------------------closing token at index $KK
8808 return unless ($trailing_comma_rule);
8809 my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule};
8811 # List of $trailing_comma_style values:
8812 # undef stable: do not change
8813 # '0' : no list should have a trailing comma
8814 # '1' or '*' : every list should have a trailing comma
8815 # 'm' a multi-line list should have a trailing commas
8816 # 'b' trailing commas should be 'bare' (comma followed by newline)
8817 # 'h' lists of key=>value pairs with a bare trailing comma
8818 # 'i' same as s=h but also include any list with no more than about one
8820 # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT].
8822 # Note: an interesting generalization would be to let an upper case
8823 # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might
8824 # be useful for undoing operations. It would be implemented as a wrapper
8825 # around this routine.
8827 #-----------------------------------------
8828 # No style defined : do not add or delete
8829 #-----------------------------------------
8830 if ( !defined($trailing_comma_style) ) { return !$if_add }
8832 #----------------------------------------
8833 # Set some flags describing this location
8834 #----------------------------------------
8835 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8836 return unless ($type_sequence);
8837 my $closing_token = $rLL->[$KK]->[_TOKEN_];
8838 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
8839 return unless ( defined($rtype_count) && $rtype_count->{','} );
8840 my $is_permanently_broken =
8841 $self->[_ris_permanently_broken_]->{$type_sequence};
8843 # Note that _ris_broken_container_ also stores the line diff
8844 # but it is not available at this early stage.
8845 my $K_opening = $self->[_K_opening_container_]->{$type_sequence};
8846 return if ( !defined($K_opening) );
8848 # multiline definition 1: opening and closing tokens on different lines
8849 my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_];
8850 my $iline_c = $rLL->[$KK]->[_LINE_INDEX_];
8851 my $line_diff_containers = $iline_c - $iline_o;
8852 my $has_multiline_containers = $line_diff_containers > 0;
8854 # multiline definition 2: first and last commas on different lines
8855 my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence};
8856 my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_];
8857 my $has_multiline_commas;
8858 my $line_diff_commas = 0;
8859 if ( !defined($iline_first) ) {
8861 # shouldn't happen if caller checked comma count
8862 my $type_kp = $rLL_new->[$Kp]->[_TYPE_];
8864 "at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n"
8868 $line_diff_commas = $iline_last - $iline_first;
8869 $has_multiline_commas = $line_diff_commas > 0;
8872 # To avoid instability in edge cases, when adding commas we uses the
8873 # multiline_commas definition, but when deleting we use multiline
8874 # containers. This fixes b1384, b1396, b1397, b1398, b1400.
8876 $if_add ? $has_multiline_commas : $has_multiline_containers;
8878 my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst;
8882 #----------------------------
8883 # 0 : does not match any list
8884 #----------------------------
8885 if ( $trailing_comma_style eq '0' ) {
8889 #------------------------------
8890 # '*' or '1' : matches any list
8891 #------------------------------
8892 elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) {
8896 #-----------------------------
8897 # 'm' matches a Multiline list
8898 #-----------------------------
8899 elsif ( $trailing_comma_style eq 'm' ) {
8900 $match = $is_multiline;
8903 #----------------------------------
8904 # 'b' matches a Bare trailing comma
8905 #----------------------------------
8906 elsif ( $trailing_comma_style eq 'b' ) {
8907 $match = $is_bare_multiline_comma;
8910 #--------------------------------------------------------------------------
8911 # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line.
8912 # 'i' matches a bare stable list with about 1 comma per line.
8913 #--------------------------------------------------------------------------
8914 elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) {
8916 # We can treat these together because they are similar.
8917 # The set of 'i' matches includes the set of 'h' matches.
8919 # the trailing comma must be bare for both 'h' and 'i'
8920 return if ( !$is_bare_multiline_comma );
8922 # There must be no more than one comma per line for both 'h' and 'i'
8923 # The new_comma_count here will include the trailing comma.
8924 my $new_comma_count = $rtype_count->{','};
8925 $new_comma_count += 1 if ($if_add);
8926 my $excess_commas = $new_comma_count - $line_diff_commas - 1;
8927 if ( $excess_commas > 0 ) {
8929 # Exception for a special edge case for option 'i': if the trailing
8930 # comma is followed by a blank line or comment, then it cannot be
8931 # covered. Then we can safely accept a small list to avoid
8932 # instability (issue b1443).
8933 if ( $trailing_comma_style eq 'i'
8934 && $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1
8935 && $new_comma_count <= 2 )
8944 # a list of key=>value pairs with at least 2 fat commas is a match
8945 # for both 'h' and 'i'
8946 my $fat_comma_count = $rtype_count->{'=>'};
8947 if ( !$match && $fat_comma_count && $fat_comma_count >= 2 ) {
8949 # comma count (including trailer) and fat comma count must differ by
8950 # by no more than 1. This allows for some small variations.
8951 my $comma_diff = $new_comma_count - $fat_comma_count;
8952 $match = ( $comma_diff >= -1 && $comma_diff <= 1 );
8955 # For 'i' only, a list that can be shown to be stable is a match
8956 if ( !$match && $trailing_comma_style eq 'i' ) {
8958 $is_permanently_broken
8959 || ( $rOpts_break_at_old_comma_breakpoints
8960 && !$rOpts_ignore_old_breakpoints )
8965 #-------------------------------------------------------------------------
8966 # Unrecognized parameter. This should have been caught in the input check.
8967 #-------------------------------------------------------------------------
8970 DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n");
8972 # do not add or delete
8976 # Now do any special paren check
8979 && $paren_flag ne '1'
8980 && $paren_flag ne '*'
8981 && $closing_token eq ')' )
8984 $self->match_paren_control_flag( $type_sequence, $paren_flag,
8988 # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas
8989 # for use by -vtc logic to avoid instability when -dtc and -atc are both
8992 if ( $if_add && $rOpts_delete_trailing_commas
8993 || !$if_add && $rOpts_add_trailing_commas )
8995 $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1;
8997 # The combination of -atc and -dtc and -cab=3 can be unstable
8998 # (b1394). So we deactivate -cab=3 in this case.
8999 # A value of '0' or '4' is required for stability of case b1451.
9000 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
9001 $self->[_roverride_cab3_]->{$type_sequence} = 0;
9006 } ## end sub match_trailing_comma_rule
9008 sub store_new_token {
9010 my ( $self, $type, $token, $Kp ) = @_;
9012 # Create and insert a completely new token into the output stream
9015 # $type = the token type
9016 # $token = the token text
9017 # $Kp = index of the previous token in the new list, $rLL_new
9020 # $Knew = index in $rLL_new of the new token
9022 # This operation is a little tricky because we are creating a new token and
9023 # we have to take care to follow the requested whitespace rules.
9025 my $Ktop = @{$rLL_new} - 1;
9026 my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b';
9028 if ( $top_is_space && $want_left_space{$type} == WS_NO ) {
9030 #----------------------------------------------------
9031 # Method 1: Convert the top blank into the new token.
9032 #----------------------------------------------------
9034 # Be Careful: we are working on the top of the new stack, on a token
9035 # which has been stored.
9037 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
9040 $rLL_new->[$Knew]->[_TOKEN_] = $token;
9041 $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token);
9042 $rLL_new->[$Knew]->[_TYPE_] = $type;
9044 # NOTE: we are changing the output stack without updating variables
9045 # $last_nonblank_code_type, etc. Future needs might require that
9046 # those variables be updated here. For now, we just update the
9047 # type counts as necessary.
9049 if ( $is_counted_type{$type} ) {
9050 my $seqno = $seqno_stack{ $depth_next - 1 };
9052 $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++;
9056 # Then store a new blank
9057 $self->store_token($rcopy);
9061 #----------------------------------------
9062 # Method 2: Use the normal storage method
9063 #----------------------------------------
9065 # Patch for issue c078: keep line indexes in order. If the top
9066 # token is a space that we are keeping (due to '-wls=...) then
9067 # we have to check that old line indexes stay in order.
9069 # instances in which side comments have been deleted and converted
9070 # into blanks, we may have filtered down multiple blanks into just
9071 # one. In that case the top blank may have a higher line number
9072 # than the previous nonblank token. Although the line indexes of
9073 # blanks are not really significant, we need to keep them in order
9074 # in order to pass error checks.
9075 if ($top_is_space) {
9076 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
9077 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
9078 if ( $new_top_ix < $old_top_ix ) {
9079 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
9083 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token );
9084 $self->store_token($rcopy);
9085 $Knew = @{$rLL_new} - 1;
9088 } ## end sub store_new_token
9092 # Check that a quote looks okay, and report possible problems
9095 my ( $self, $KK, $Kfirst, $line_number ) = @_;
9096 my $token = $rLL->[$KK]->[_TOKEN_];
9097 if ( $token =~ /\t/ ) {
9098 $self->note_embedded_tab($line_number);
9101 # The remainder of this routine looks for something like
9102 # '$var = s/xxx/yyy/;'
9103 # in case it should have been '$var =~ s/xxx/yyy/;'
9105 # Start by looking for a token beginning with one of: s y m / tr
9107 unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
9108 || substr( $token, 0, 2 ) eq 'tr' );
9110 # ... and preceded by one of: = == !=
9111 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9112 return unless ( defined($Kp) );
9113 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
9114 return unless ( $is_unexpected_equals{$previous_nonblank_type} );
9115 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
9117 my $previous_nonblank_type_2 = 'b';
9118 my $previous_nonblank_token_2 = EMPTY_STRING;
9119 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
9120 if ( defined($Kpp) ) {
9121 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
9122 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
9125 my $next_nonblank_token = EMPTY_STRING;
9127 my $Kmax = @{$rLL} - 1;
9128 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
9129 if ( $Kn <= $Kmax ) {
9130 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
9133 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
9134 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
9138 # preceded by simple scalar
9139 $previous_nonblank_type_2 eq 'i'
9140 && $previous_nonblank_token_2 =~ /^\$/
9142 # followed by some kind of termination
9143 # (but give complaint if we can not see far enough ahead)
9144 && $next_nonblank_token =~ /^[; \)\}]$/
9146 # scalar is not declared
9147 ## =~ /^(my|our|local)$/
9148 && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
9151 my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
9152 my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
9154 "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
9158 } ## end sub check_Q
9160 } ## end closure respace_tokens
9162 sub copy_token_as_type {
9164 # This provides a quick way to create a new token by
9165 # slightly modifying an existing token.
9166 my ( $rold_token, $type, $token ) = @_;
9167 if ( !defined($token) ) {
9168 if ( $type eq 'b' ) {
9171 elsif ( $type eq 'q' ) {
9172 $token = EMPTY_STRING;
9174 elsif ( $type eq '->' ) {
9177 elsif ( $type eq ';' ) {
9180 elsif ( $type eq ',' ) {
9185 # Unexpected type ... this sub will work as long as both $token and
9186 # $type are defined, but we should catch any unexpected types during
9190 sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
9194 # Shouldn't get here
9199 my @rnew_token = @{$rold_token};
9200 $rnew_token[_TYPE_] = $type;
9201 $rnew_token[_TOKEN_] = $token;
9202 $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
9203 return \@rnew_token;
9204 } ## end sub copy_token_as_type
9207 my ( $self, $KK, $rLL ) = @_;
9209 # return the index K of the next nonblank, non-comment token
9210 return unless ( defined($KK) && $KK >= 0 );
9212 # use the standard array unless given otherwise
9213 $rLL = $self->[_rLL_] unless ( defined($rLL) );
9216 while ( $Knnb < $Num ) {
9217 if ( !defined( $rLL->[$Knnb] ) ) {
9219 # We seem to have encountered a gap in our array.
9220 # This shouldn't happen because sub write_line() pushed
9221 # items into the $rLL array.
9222 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
9225 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
9226 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
9233 } ## end sub K_next_code
9235 sub K_next_nonblank {
9236 my ( $self, $KK, $rLL ) = @_;
9238 # return the index K of the next nonblank token, or
9239 # return undef if none
9240 return unless ( defined($KK) && $KK >= 0 );
9242 # The third arg allows this routine to be used on any array. This is
9243 # useful in sub respace_tokens when we are copying tokens from an old $rLL
9244 # to a new $rLL array. But usually the third arg will not be given and we
9245 # will just use the $rLL array in $self.
9246 $rLL = $self->[_rLL_] unless ( defined($rLL) );
9249 return unless ( $Knnb < $Num );
9250 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
9251 return unless ( ++$Knnb < $Num );
9252 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
9254 # Backup loop. Very unlikely to get here; it means we have neighboring
9255 # blanks in the token stream.
9257 while ( $Knnb < $Num ) {
9259 # Safety check, this fault shouldn't happen: The $rLL array is the
9260 # main array of tokens, so all entries should be used. It is
9261 # initialized in sub write_line, and then re-initialized by sub
9262 # store_token() within sub respace_tokens. Tokens are pushed on
9263 # so there shouldn't be any gaps.
9264 if ( !defined( $rLL->[$Knnb] ) ) {
9265 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
9268 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
9272 } ## end sub K_next_nonblank
9274 sub K_previous_code {
9276 # return the index K of the previous nonblank, non-comment token
9277 # Call with $KK=undef to start search at the top of the array
9278 my ( $self, $KK, $rLL ) = @_;
9280 # use the standard array unless given otherwise
9281 $rLL = $self->[_rLL_] unless ( defined($rLL) );
9283 if ( !defined($KK) ) { $KK = $Num }
9284 elsif ( $KK > $Num ) {
9286 # This fault can be caused by a programming error in which a bad $KK is
9287 # given. The caller should make the first call with KK_new=undef to
9290 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
9295 while ( $Kpnb >= 0 ) {
9296 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
9297 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
9304 } ## end sub K_previous_code
9306 sub K_previous_nonblank {
9308 # return index of previous nonblank token before item K;
9309 # Call with $KK=undef to start search at the top of the array
9310 my ( $self, $KK, $rLL ) = @_;
9312 # use the standard array unless given otherwise
9313 $rLL = $self->[_rLL_] unless ( defined($rLL) );
9315 if ( !defined($KK) ) { $KK = $Num }
9316 elsif ( $KK > $Num ) {
9318 # This fault can be caused by a programming error in which a bad $KK is
9319 # given. The caller should make the first call with KK_new=undef to
9322 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
9327 return unless ( $Kpnb >= 0 );
9328 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
9329 return unless ( --$Kpnb >= 0 );
9330 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
9332 # Backup loop. We should not get here unless some routine
9333 # slipped repeated blanks into the token stream.
9334 return unless ( --$Kpnb >= 0 );
9335 while ( $Kpnb >= 0 ) {
9336 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
9340 } ## end sub K_previous_nonblank
9342 sub parent_seqno_by_K {
9344 # Return the sequence number of the parent container of token K, if any.
9346 my ( $self, $KK ) = @_;
9347 my $rLL = $self->[_rLL_];
9349 # The task is to jump forward to the next container token
9350 # and use the sequence number of either it or its parent.
9352 # For example, consider the following with seqno=5 of the '[' and ']'
9353 # being called with index K of the first token of each line:
9358 # sub { 99 }, 'do {&{%s} for 1,2}', # 5
9359 # '(&{})(&{})', undef, # 5
9360 # [ 2, 2, 0 ], 0 # 5
9363 # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
9364 # unbalanced files, last sequence number will either be undefined or it may
9365 # be at a deeper level. In either case we will just return SEQ_ROOT to
9366 # have a defined value and allow formatting to proceed.
9367 my $parent_seqno = SEQ_ROOT;
9368 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9369 if ($type_sequence) {
9370 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
9373 my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
9374 if ( defined($Kt) ) {
9375 $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
9376 my $type = $rLL->[$Kt]->[_TYPE_];
9378 # if next container token is closing, it is the parent seqno
9379 if ( $is_closing_type{$type} ) {
9380 $parent_seqno = $type_sequence;
9383 # otherwise we want its parent container
9385 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
9389 $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
9390 return $parent_seqno;
9391 } ## end sub parent_seqno_by_K
9393 sub is_in_block_by_i {
9394 my ( $self, $i ) = @_;
9397 # token at i is contained in a BLOCK
9398 # or is at root level
9399 # or there is some kind of error (i.e. unbalanced file)
9400 # returns false otherwise
9403 DEVEL_MODE && Fault("Bad call, i='$i'\n");
9407 my $seqno = $parent_seqno_to_go[$i];
9408 return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
9409 return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
9411 } ## end sub is_in_block_by_i
9413 sub is_in_list_by_i {
9414 my ( $self, $i ) = @_;
9416 # returns true if token at i is contained in a LIST
9417 # returns false otherwise
9418 my $seqno = $parent_seqno_to_go[$i];
9419 return unless ( $seqno && $seqno ne SEQ_ROOT );
9420 if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
9424 } ## end sub is_in_list_by_i
9428 # Return true if token K is in a list
9429 my ( $self, $KK ) = @_;
9431 my $parent_seqno = $self->parent_seqno_by_K($KK);
9432 return unless defined($parent_seqno);
9433 return $self->[_ris_list_by_seqno_]->{$parent_seqno};
9434 } ## end sub is_list_by_K
9436 sub is_list_by_seqno {
9438 # Return true if the immediate contents of a container appears to be a
9440 my ( $self, $seqno ) = @_;
9441 return unless defined($seqno);
9442 return $self->[_ris_list_by_seqno_]->{$seqno};
9443 } ## end sub is_list_by_seqno
9445 sub resync_lines_and_tokens {
9449 # Re-construct the arrays of tokens associated with the original input
9450 # lines since they have probably changed due to inserting and deleting
9451 # blanks and a few other tokens.
9453 # Return paremeters:
9454 # set severe_error = true if processing needs to terminate
9458 my $rLL = $self->[_rLL_];
9459 my $Klimit = $self->[_Klimit_];
9460 my $rlines = $self->[_rlines_];
9461 my @Krange_code_without_comments;
9462 my @Klast_valign_code;
9464 # This is the next token and its line index:
9466 my $Kmax = defined($Klimit) ? $Klimit : -1;
9468 # Verify that old line indexes are in still order. If this error occurs,
9469 # check locations where sub 'respace_tokens' creates new tokens (like
9470 # blank spaces). It must have set a bad old line index.
9471 if ( DEVEL_MODE && defined($Klimit) ) {
9472 my $iline = $rLL->[0]->[_LINE_INDEX_];
9473 foreach my $KK ( 1 .. $Klimit ) {
9474 my $iline_last = $iline;
9475 $iline = $rLL->[$KK]->[_LINE_INDEX_];
9476 if ( $iline < $iline_last ) {
9478 my $token_m = $rLL->[$KK_m]->[_TOKEN_];
9479 my $token = $rLL->[$KK]->[_TOKEN_];
9480 my $type_m = $rLL->[$KK_m]->[_TYPE_];
9481 my $type = $rLL->[$KK]->[_TYPE_];
9483 Line indexes out of order at index K=$KK:
9484 at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
9485 at KK =$KK: old line=$iline, type='$type', token='$token',
9492 foreach my $line_of_tokens ( @{$rlines} ) {
9494 my $line_type = $line_of_tokens->{_line_type};
9495 if ( $line_type eq 'CODE' ) {
9497 # Get the old number of tokens on this line
9498 my $rK_range_old = $line_of_tokens->{_rK_range};
9499 my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
9501 if ( defined($Kfirst_old) ) {
9502 $Kdiff_old = $Klast_old - $Kfirst_old;
9505 # Find the range of NEW K indexes for the line:
9506 # $Kfirst = index of first token on line
9507 # $Klast = index of last token on line
9508 my ( $Kfirst, $Klast );
9510 my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
9512 # Optimization: Although the actual K indexes may be completely
9513 # changed after respacing, the number of tokens on any given line
9514 # will often be nearly unchanged. So we will see if we can start
9515 # our search by guessing that the new line has the same number
9516 # of tokens as the old line.
9517 my $Knext_guess = $Knext + $Kdiff_old;
9518 if ( $Knext_guess > $Knext
9519 && $Knext_guess < $Kmax
9520 && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
9523 # the guess is good, so we can start our search here
9524 $Knext = $Knext_guess + 1;
9527 while ($Knext <= $Kmax
9528 && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
9533 if ( $Knext > $Knext_beg ) {
9535 $Klast = $Knext - 1;
9537 # Delete any terminal blank token
9538 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
9540 if ( $Klast < $Knext_beg ) {
9545 $Kfirst = $Knext_beg;
9547 # Save ranges of non-comment code. This will be used by
9548 # sub keep_old_line_breaks.
9549 if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
9550 push @Krange_code_without_comments, [ $Kfirst, $Klast ];
9553 # Only save ending K indexes of code types which are blank
9554 # or 'VER'. These will be used for a convergence check.
9555 # See related code in sub 'convey_batch_to_vertical_aligner'
9556 my $CODE_type = $line_of_tokens->{_code_type};
9558 || $CODE_type eq 'VER' )
9560 push @Klast_valign_code, $Klast;
9565 # It is only safe to trim the actual line text if the input
9566 # line had a terminal blank token. Otherwise, we may be
9568 if ( $line_of_tokens->{_ended_in_blank_token} ) {
9569 $line_of_tokens->{_line_text} =~ s/\s+$//;
9571 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
9573 # Deleting semicolons can create new empty code lines
9574 # which should be marked as blank
9575 if ( !defined($Kfirst) ) {
9576 my $CODE_type = $line_of_tokens->{_code_type};
9577 if ( !$CODE_type ) {
9578 $line_of_tokens->{_code_type} = 'BL';
9583 #---------------------------------------------------
9584 # save indexes of all lines with a 'q' at either end
9585 # for later use by sub find_multiline_qw
9586 #---------------------------------------------------
9587 if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
9588 || $rLL->[$Klast]->[_TYPE_] eq 'q' )
9590 push @{$rqw_lines}, $iline;
9596 # There shouldn't be any nodes beyond the last one. This routine is
9597 # relinking lines and tokens after the tokens have been respaced. A fault
9598 # here indicates some kind of bug has been introduced into the above loops.
9599 # There is not good way to keep going; we better stop here.
9600 if ( $Knext <= $Kmax ) {
9602 "unexpected tokens at end of file when reconstructing lines");
9604 return ( $severe_error, $rqw_lines );
9606 $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
9608 # Setup the convergence test in the FileWriter based on line-ending indexes
9609 my $file_writer_object = $self->[_file_writer_object_];
9610 $file_writer_object->setup_convergence_test( \@Klast_valign_code );
9612 return ( $severe_error, $rqw_lines );
9614 } ## end sub resync_lines_and_tokens
9616 sub check_for_old_break {
9617 my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
9619 # This sub is called to help implement flags:
9620 # --keep-old-breakpoints-before and --keep-old-breakpoints-after
9622 # $KK = index of a token,
9623 # $rkeep_break_hash = user control for --keep-old-...
9624 # $rbreak_hash = hash of tokens where breaks are requested
9625 # Set $rbreak_hash as follows if a user break is requested:
9626 # = 1 make a hard break (flush the current batch)
9627 # best for something like leading commas (-kbb=',')
9628 # = 2 make a soft break (keep building current batch)
9629 # best for something like leading ->
9631 my $rLL = $self->[_rLL_];
9633 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9635 # non-container tokens use the type as the key
9637 my $type = $rLL->[$KK]->[_TYPE_];
9638 if ( $rkeep_break_hash->{$type} ) {
9639 $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
9643 # container tokens use the token as the key
9645 my $token = $rLL->[$KK]->[_TOKEN_];
9646 my $flag = $rkeep_break_hash->{$token};
9649 my $match = $flag eq '1' || $flag eq '*';
9651 # check for special matching codes
9653 if ( $token eq '(' || $token eq ')' ) {
9654 $match = $self->match_paren_control_flag( $seqno, $flag );
9656 elsif ( $token eq '{' || $token eq '}' ) {
9658 # These tentative codes 'b' and 'B' for brace types are
9659 # placeholders for possible future brace types. They
9660 # are not documented and may be changed.
9661 my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno};
9662 if ( $flag eq 'b' ) { $match = $block_type }
9663 elsif ( $flag eq 'B' ) { $match = !$block_type }
9665 # unknown code - no match
9670 my $type = $rLL->[$KK]->[_TYPE_];
9671 $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
9676 } ## end sub check_for_old_break
9678 sub keep_old_line_breaks {
9680 # Called once per file to find and mark any old line breaks which
9681 # should be kept. We will be translating the input hashes into
9684 # A flag is set as follows:
9685 # = 1 make a hard break (flush the current batch)
9686 # best for something like leading commas (-kbb=',')
9687 # = 2 make a soft break (keep building current batch)
9688 # best for something like leading ->
9692 my $rLL = $self->[_rLL_];
9693 my $rKrange_code_without_comments =
9694 $self->[_rKrange_code_without_comments_];
9695 my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
9696 my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
9697 my $rbreak_container = $self->[_rbreak_container_];
9699 #----------------------------------------
9700 # Apply --break-at-old-method-breakpoints
9701 #----------------------------------------
9703 # This code moved here from sub break_lists to fix b1120
9704 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
9705 foreach my $item ( @{$rKrange_code_without_comments} ) {
9706 my ( $Kfirst, $Klast ) = @{$item};
9707 my $type = $rLL->[$Kfirst]->[_TYPE_];
9708 my $token = $rLL->[$Kfirst]->[_TOKEN_];
9710 # leading '->' use a value of 2 which causes a soft
9711 # break rather than a hard break
9712 if ( $type eq '->' ) {
9713 $rbreak_before_Kfirst->{$Kfirst} = 2;
9716 # leading ')->' use a special flag to insure that both
9717 # opening and closing parens get opened
9718 # Fix for b1120: only for parens, not braces
9719 elsif ( $token eq ')' ) {
9720 my $Kn = $self->K_next_nonblank($Kfirst);
9722 unless ( defined($Kn)
9724 && $rLL->[$Kn]->[_TYPE_] eq '->' );
9725 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
9726 next unless ($seqno);
9728 # Note: in previous versions there was a fix here to avoid
9729 # instability between conflicting -bom and -pvt or -pvtc flags.
9730 # The fix skipped -bom for a small line difference. But this
9731 # was troublesome, and instead the fix has been moved to
9732 # sub set_vertical_tightness_flags where priority is given to
9733 # the -bom flag over -pvt and -pvtc flags. Both opening and
9734 # closing paren flags are involved because even though -bom only
9735 # requests breaking before the closing paren, automated logic
9736 # opens the opening paren when the closing paren opens.
9737 # Relevant cases are b977, b1215, b1270, b1303
9739 $rbreak_container->{$seqno} = 1;
9744 #---------------------------------------------------------------------
9745 # Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after
9746 #---------------------------------------------------------------------
9748 return unless ( %keep_break_before_type || %keep_break_after_type );
9750 foreach my $item ( @{$rKrange_code_without_comments} ) {
9751 my ( $Kfirst, $Klast ) = @{$item};
9752 $self->check_for_old_break( $Kfirst, \%keep_break_before_type,
9753 $rbreak_before_Kfirst );
9754 $self->check_for_old_break( $Klast, \%keep_break_after_type,
9755 $rbreak_after_Klast );
9758 } ## end sub keep_old_line_breaks
9760 sub weld_containers {
9762 # Called once per file to do any welding operations requested by --weld*
9766 # This count is used to eliminate needless calls for weld checks elsewhere
9767 $total_weld_count = 0;
9769 return if ( $rOpts->{'indent-only'} );
9770 return unless ($rOpts_add_newlines);
9772 # Important: sub 'weld_cuddled_blocks' must be called before
9773 # sub 'weld_nested_containers'. This is because the cuddled option needs to
9774 # use the original _LEVEL_ values of containers, but the weld nested
9775 # containers changes _LEVEL_ of welded containers.
9777 # Here is a good test case to be sure that both cuddling and welding
9778 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
9782 # if ($BOLD_MATH) { (
9783 # $labels, $comment,
9784 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
9786 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
9790 $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
9792 if ( $rOpts->{'weld-nested-containers'} ) {
9794 $self->weld_nested_containers();
9796 $self->weld_nested_quotes();
9799 #-------------------------------------------------------------
9800 # All welding is done. Finish setting up weld data structures.
9801 #-------------------------------------------------------------
9803 my $rLL = $self->[_rLL_];
9804 my $rK_weld_left = $self->[_rK_weld_left_];
9805 my $rK_weld_right = $self->[_rK_weld_right_];
9806 my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
9809 my @keys = keys %{$rK_weld_right};
9810 $total_weld_count = @keys;
9812 # First pass to process binary welds.
9813 # This loop is processed in unsorted order for efficiency.
9814 foreach my $Kstart (@keys) {
9815 my $Kend = $rK_weld_right->{$Kstart};
9817 # An error here would be due to an incorrect initialization introduced
9818 # in one of the above weld routines, like sub weld_nested.
9819 if ( $Kend <= $Kstart ) {
9820 Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
9825 # Set weld values for all tokens this welded pair
9826 foreach ( $Kstart + 1 .. $Kend ) {
9827 $rK_weld_left->{$_} = $Kstart;
9829 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
9830 $rK_weld_right->{$Kx} = $Kend;
9831 $rweld_len_right_at_K->{$Kx} =
9832 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
9833 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
9836 # Remember the leftmost index of welds which continue to the right
9837 if ( defined( $rK_weld_right->{$Kend} )
9838 && !defined( $rK_weld_left->{$Kstart} ) )
9840 push @K_multi_weld, $Kstart;
9844 # Second pass to process chains of welds (these are rare).
9845 # This has to be processed in sorted order.
9846 if (@K_multi_weld) {
9848 foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
9850 # Skip any interior K which was originally missing a left link
9851 next if ( $Kstart <= $Kend );
9853 # Find the end of this chain
9854 $Kend = $rK_weld_right->{$Kstart};
9855 my $Knext = $rK_weld_right->{$Kend};
9856 while ( defined($Knext) ) {
9858 $Knext = $rK_weld_right->{$Kend};
9861 # Set weld values this chain
9862 foreach ( $Kstart + 1 .. $Kend ) {
9863 $rK_weld_left->{$_} = $Kstart;
9865 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
9866 $rK_weld_right->{$Kx} = $Kend;
9867 $rweld_len_right_at_K->{$Kx} =
9868 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
9869 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
9875 } ## end sub weld_containers
9877 sub cumulative_length_before_K {
9878 my ( $self, $KK ) = @_;
9879 my $rLL = $self->[_rLL_];
9880 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
9883 sub weld_cuddled_blocks {
9886 # Called once per file to handle cuddled formatting
9888 my $rK_weld_left = $self->[_rK_weld_left_];
9889 my $rK_weld_right = $self->[_rK_weld_right_];
9890 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9892 # This routine implements the -cb flag by finding the appropriate
9893 # closing and opening block braces and welding them together.
9894 return unless ( %{$rcuddled_block_types} );
9896 my $rLL = $self->[_rLL_];
9897 return unless ( defined($rLL) && @{$rLL} );
9899 my $rbreak_container = $self->[_rbreak_container_];
9900 my $ris_broken_container = $self->[_ris_broken_container_];
9901 my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
9902 my $K_closing_container = $self->[_K_closing_container_];
9904 # A stack to remember open chains at all levels: This is a hash rather than
9905 # an array for safety because negative levels can occur in files with
9906 # errors. This allows us to keep processing with negative levels.
9907 # $in_chain{$level} = [$chain_type, $type_sequence];
9909 my $CBO = $rOpts->{'cuddled-break-option'};
9911 # loop over structure items to find cuddled pairs
9913 my $KNEXT = $self->[_K_first_seq_item_];
9914 while ( defined($KNEXT) ) {
9916 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9917 my $rtoken_vars = $rLL->[$KK];
9918 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
9919 if ( !$type_sequence ) {
9920 next if ( $KK == 0 ); # first token in file may not be container
9922 # A fault here implies that an error was made in the little loop at
9923 # the bottom of sub 'respace_tokens' which set the values of
9924 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
9925 # loop control lines above.
9926 Fault("sequence = $type_sequence not defined at K=$KK")
9931 # NOTE: we must use the original levels here. They can get changed
9932 # by sub 'weld_nested_containers', so this routine must be called
9933 # before sub 'weld_nested_containers'.
9934 my $last_level = $level;
9935 $level = $rtoken_vars->[_LEVEL_];
9937 if ( $level < $last_level ) { $in_chain{$last_level} = undef }
9938 elsif ( $level > $last_level ) { $in_chain{$level} = undef }
9940 # We are only looking at code blocks
9941 my $token = $rtoken_vars->[_TOKEN_];
9942 my $type = $rtoken_vars->[_TYPE_];
9943 next unless ( $type eq $token );
9945 if ( $token eq '{' ) {
9947 my $block_type = $rblock_type_of_seqno->{$type_sequence};
9948 if ( !$block_type ) {
9950 # patch for unrecognized block types which may not be labeled
9951 my $Kp = $self->K_previous_nonblank($KK);
9952 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
9953 $Kp = $self->K_previous_nonblank($Kp);
9956 $block_type = $rLL->[$Kp]->[_TOKEN_];
9958 if ( $in_chain{$level} ) {
9960 # we are in a chain and are at an opening block brace.
9961 # See if we are welding this opening brace with the previous
9962 # block brace. Get their identification numbers:
9963 my $closing_seqno = $in_chain{$level}->[1];
9964 my $opening_seqno = $type_sequence;
9966 # The preceding block must be on multiple lines so that its
9967 # closing brace will start a new line.
9968 if ( !$ris_broken_container->{$closing_seqno}
9969 && !$rbreak_container->{$closing_seqno} )
9971 next unless ( $CBO == 2 );
9972 $rbreak_container->{$closing_seqno} = 1;
9975 # We can weld the closing brace to its following word ..
9976 my $Ko = $K_closing_container->{$closing_seqno};
9978 if ( defined($Ko) ) {
9979 $Kon = $self->K_next_nonblank($Ko);
9982 # ..unless it is a comment
9983 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
9985 # OK to weld these two tokens...
9986 $rK_weld_right->{$Ko} = $Kon;
9987 $rK_weld_left->{$Kon} = $Ko;
9989 # Set flag that we want to break the next container
9990 # so that the cuddled line is balanced.
9991 $rbreak_container->{$opening_seqno} = 1
9994 # Remember which braces are cuddled.
9995 # The closing brace is used to set adjusted indentations.
9996 # The opening brace is not yet used but might eventually
9997 # be needed in setting adjusted indentation.
9998 $ris_cuddled_closing_brace->{$closing_seqno} = 1;
10005 # We are not in a chain. Start a new chain if we see the
10006 # starting block type.
10007 if ( $rcuddled_block_types->{$block_type} ) {
10008 $in_chain{$level} = [ $block_type, $type_sequence ];
10012 $in_chain{$level} = [ $block_type, $type_sequence ];
10016 elsif ( $token eq '}' ) {
10017 if ( $in_chain{$level} ) {
10019 # We are in a chain at a closing brace. See if this chain
10021 my $Knn = $self->K_next_code($KK);
10024 my $chain_type = $in_chain{$level}->[0];
10025 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
10027 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
10031 # Note that we do not weld yet because we must wait until
10032 # we we are sure that an opening brace for this follows.
10033 $in_chain{$level}->[1] = $type_sequence;
10035 else { $in_chain{$level} = undef }
10040 } ## end sub weld_cuddled_blocks
10042 sub find_nested_pairs {
10045 # This routine is called once per file to do preliminary work needed for
10046 # the --weld-nested option. This information is also needed for adding
10049 my $rLL = $self->[_rLL_];
10050 return unless ( defined($rLL) && @{$rLL} );
10053 my $K_opening_container = $self->[_K_opening_container_];
10054 my $K_closing_container = $self->[_K_closing_container_];
10055 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10057 # We define an array of pairs of nested containers
10060 # Names of calling routines can either be marked as 'i' or 'w',
10061 # and they may invoke a sub call with an '->'. We will consider
10062 # any consecutive string of such types as a single unit when making
10063 # weld decisions. We also allow a leading !
10064 my $is_name_type = {
10072 # Loop over all closing container tokens
10073 foreach my $inner_seqno ( keys %{$K_closing_container} ) {
10074 my $K_inner_closing = $K_closing_container->{$inner_seqno};
10076 # See if it is immediately followed by another, outer closing token
10077 my $K_outer_closing = $K_inner_closing + 1;
10078 $K_outer_closing += 1
10079 if ( $K_outer_closing < $Num
10080 && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
10082 next unless ( $K_outer_closing < $Num );
10083 my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
10084 next unless ($outer_seqno);
10085 my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
10086 next unless ( $is_closing_token{$token_outer_closing} );
10088 # Simple filter: No commas or semicolons in the outer container
10089 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
10090 if ($rtype_count) {
10091 next if ( $rtype_count->{','} || $rtype_count->{';'} );
10094 # Now we have to check the opening tokens.
10095 my $K_outer_opening = $K_opening_container->{$outer_seqno};
10096 my $K_inner_opening = $K_opening_container->{$inner_seqno};
10097 next unless defined($K_outer_opening) && defined($K_inner_opening);
10099 my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
10100 my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
10102 # Verify that the inner opening token is the next container after the
10103 # outer opening token.
10104 my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
10105 next unless defined($K_io_check);
10106 if ( $K_io_check != $K_inner_opening ) {
10108 # The inner opening container does not immediately follow the outer
10109 # opening container, but we may still allow a weld if they are
10110 # separated by a sub signature. For example, we may have something
10111 # like this, where $K_io_check may be at the first 'x' instead of
10112 # 'io'. So we need to hop over the signature and see if we arrive
10117 # $obj->then( sub ( $code ) {
10119 # return $c->render(text => '', status => $code);
10124 next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
10125 next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
10126 my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
10127 next unless defined($seqno_signature);
10128 my $K_signature_closing = $K_closing_container->{$seqno_signature};
10129 next unless defined($K_signature_closing);
10130 my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
10132 unless ( defined($K_test) && $K_test == $K_inner_opening );
10134 # OK, we have arrived at 'io' in the above diagram. We should put
10135 # a limit on the length or complexity of the signature here. There
10136 # is no perfect way to do this, one way is to put a limit on token
10137 # count. For consistency with older versions, we should allow a
10138 # signature with a single variable to weld, but not with
10139 # multiple variables. A single variable as in 'sub ($code) {' can
10140 # have a $Kdiff of 2 to 4, depending on spacing.
10142 # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
10143 # 7, depending on spacing. So to keep formatting consistent with
10144 # previous versions, we will also avoid welding if there is a comma
10145 # in the signature.
10147 my $Kdiff = $K_signature_closing - $K_io_check;
10148 next if ( $Kdiff > 4 );
10150 # backup comma count test; but we cannot get here with Kdiff<=4
10151 my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
10152 next if ( $rtc && $rtc->{','} );
10155 # Yes .. this is a possible nesting pair.
10156 # They can be separated by a small amount.
10157 my $K_diff = $K_inner_opening - $K_outer_opening;
10159 # Count the number of nonblank characters separating them.
10160 # Note: the $nonblank_count includes the inner opening container
10161 # but not the outer opening container, so it will be >= 1.
10162 if ( $K_diff < 0 ) { next } # Shouldn't happen
10163 my $nonblank_count = 0;
10167 # Here is an example of a long identifier chain which counts as a
10168 # single nonblank here (this spans about 10 K indexes):
10169 # if ( !Boucherot::SetOfConnections->new->handler->execute(
10170 # ^--K_o_o ^--K_i_o
10172 my $Kn_first = $K_outer_opening;
10173 my $Kn_last_nonblank;
10176 foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
10177 next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
10178 if ( !$nonblank_count ) { $Kn_first = $Kn }
10179 if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
10180 $Kn_last_nonblank = $Kn;
10182 # skip chain of identifier tokens
10183 my $last_type = $type;
10184 my $last_is_name = $is_name;
10185 $type = $rLL->[$Kn]->[_TYPE_];
10186 if ( $type eq '#' ) { $saw_comment = 1; last }
10187 $is_name = $is_name_type->{$type};
10188 next if ( $is_name && $last_is_name );
10190 # do not count a possible leading - of bareword hash key
10191 next if ( $type eq 'm' && !$last_type );
10194 last if ( $nonblank_count > 2 );
10197 # Do not weld across a comment .. fix for c058.
10198 next if ($saw_comment);
10200 # Patch for b1104: do not weld to a paren preceded by sort/map/grep
10201 # because the special line break rules may cause a blinking state
10202 if ( defined($Kn_last_nonblank)
10203 && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
10204 && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
10206 my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
10208 # Turn off welding at sort/map/grep (
10209 if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
10212 my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
10216 # 1: adjacent opening containers, like: do {{
10217 $nonblank_count == 1
10219 # 2. anonymous sub + prototype or sig: )->then( sub ($code) {
10220 # ... but it seems best not to stack two structural blocks, like
10222 # sub make_anon_with_my_sub { sub {
10223 # because it probably hides the structure a little too much.
10224 || ( $inner_blocktype
10225 && $inner_blocktype eq 'sub'
10226 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
10227 && !$outer_blocktype )
10229 # 3. short item following opening paren, like: fun( yyy (
10230 || $nonblank_count == 2 && $token_oo eq '('
10232 # 4. weld around fat commas, if requested (git #108), such as
10233 # elf->call_method( method_name_foo => {
10235 && $nonblank_count <= 3
10236 && %weld_fat_comma_rules
10237 && $weld_fat_comma_rules{$token_oo} )
10240 push @nested_pairs,
10241 [ $inner_seqno, $outer_seqno, $K_inner_closing ];
10246 # The weld routine expects the pairs in order in the form
10247 # [$seqno_inner, $seqno_outer]
10248 # And they must be in the same order as the inner closing tokens
10249 # (otherwise, welds of three or more adjacent tokens will not work). The K
10250 # value of this inner closing token has temporarily been stored for
10254 # Drop the K index after sorting (it would cause trouble downstream)
10255 map { [ $_->[0], $_->[1] ] }
10257 # Sort on the K values
10258 sort { $a->[2] <=> $b->[2] } @nested_pairs;
10260 return \@nested_pairs;
10261 } ## end sub find_nested_pairs
10263 sub match_paren_control_flag {
10265 # Decide if this paren is excluded by user request:
10266 # undef matches no parens
10267 # '*' matches all parens
10268 # 'k' matches only if the previous nonblank token is a perl builtin
10269 # keyword (such as 'if', 'while'),
10270 # 'K' matches if 'k' does not, meaning if the previous token is not a
10272 # 'f' matches if the previous token is a function other than a keyword.
10273 # 'F' matches if 'f' does not.
10274 # 'w' matches if either 'k' or 'f' match.
10275 # 'W' matches if 'w' does not.
10276 my ( $self, $seqno, $flag, $rLL ) = @_;
10278 # Input parameters:
10279 # $seqno = sequence number of the container (should be paren)
10280 # $flag = the flag which defines what matches
10281 # $rLL = an optional alternate token list needed for respace operations
10282 $rLL = $self->[_rLL_] unless ( defined($rLL) );
10284 return 0 unless ( defined($flag) );
10285 return 0 if $flag eq '0';
10286 return 1 if $flag eq '1';
10287 return 1 if $flag eq '*';
10288 return 0 unless ($seqno);
10289 my $K_opening = $self->[_K_opening_container_]->{$seqno};
10290 return unless ( defined($K_opening) );
10292 my ( $is_f, $is_k, $is_w );
10293 my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
10294 if ( defined($Kp) ) {
10295 my $type_p = $rLL->[$Kp]->[_TYPE_];
10298 $is_k = $type_p eq 'k';
10301 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
10303 # either keyword or function call?
10304 $is_w = $is_k || $is_f;
10307 if ( $flag eq 'k' ) { $match = $is_k }
10308 elsif ( $flag eq 'K' ) { $match = !$is_k }
10309 elsif ( $flag eq 'f' ) { $match = $is_f }
10310 elsif ( $flag eq 'F' ) { $match = !$is_f }
10311 elsif ( $flag eq 'w' ) { $match = $is_w }
10312 elsif ( $flag eq 'W' ) { $match = !$is_w }
10314 } ## end sub match_paren_control_flag
10316 sub is_excluded_weld {
10318 # decide if this weld is excluded by user request
10319 my ( $self, $KK, $is_leading ) = @_;
10320 my $rLL = $self->[_rLL_];
10321 my $rtoken_vars = $rLL->[$KK];
10322 my $token = $rtoken_vars->[_TOKEN_];
10323 my $rflags = $weld_nested_exclusion_rules{$token};
10324 return 0 unless ( defined($rflags) );
10325 my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
10326 return 0 unless ( defined($flag) );
10327 return 1 if $flag eq '*';
10328 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
10329 return $self->match_paren_control_flag( $seqno, $flag );
10330 } ## end sub is_excluded_weld
10332 # hashes to simplify welding logic
10333 my %type_ok_after_bareword;
10334 my %has_tight_paren;
10338 # types needed for welding RULE 6
10339 my @q = qw# => -> { ( [ #;
10340 @type_ok_after_bareword{@q} = (1) x scalar(@q);
10342 # these types do not 'like' to be separated from a following paren
10343 @q = qw(w i q Q G C Z U);
10344 @{has_tight_paren}{@q} = (1) x scalar(@q);
10347 use constant DEBUG_WELD => 0;
10349 sub setup_new_weld_measurements {
10351 # Define quantities to check for excess line lengths when welded.
10352 # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
10354 my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
10356 # Given indexes of outer and inner opening containers to be welded:
10357 # $Kouter_opening, $Kinner_opening
10359 # Returns these variables:
10360 # $new_weld_ok = true (new weld ok) or false (do not start new weld)
10361 # $starting_indent = starting indentation
10362 # $starting_lentot = starting cumulative length
10363 # $msg = diagnostic message for debugging
10365 my $rLL = $self->[_rLL_];
10366 my $rlines = $self->[_rlines_];
10368 my $starting_level;
10370 my $starting_lentot;
10371 my $maximum_text_length;
10372 my $msg = EMPTY_STRING;
10374 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
10375 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
10376 my ( $Kfirst, $Klast ) = @{$rK_range};
10378 #-------------------------------------------------------------------------
10379 # We now define a reference index, '$Kref', from which to start measuring
10380 # This choice turns out to be critical for keeping welds stable during
10381 # iterations, so we go through a number of STEPS...
10382 #-------------------------------------------------------------------------
10384 # STEP 1: Our starting guess is to use measure from the first token of the
10385 # current line. This is usually a good guess.
10386 my $Kref = $Kfirst;
10388 # STEP 2: See if we should go back a little farther
10389 my $Kprev = $self->K_previous_nonblank($Kfirst);
10390 if ( defined($Kprev) ) {
10392 # Avoid measuring from between an opening paren and a previous token
10393 # which should stay close to it ... fixes b1185
10394 my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
10395 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
10396 if ( $Kouter_opening == $Kfirst
10397 && $token_oo eq '('
10398 && $has_tight_paren{$type_prev} )
10403 # Back up and count length from a token like '=' or '=>' if -lp
10404 # is used (this fixes b520)
10405 # ...or if a break is wanted before there
10406 elsif ($rOpts_line_up_parentheses
10407 || $want_break_before{$type_prev} )
10410 # If there are other sequence items between the start of this line
10411 # and the opening token in question, then do not include tokens on
10412 # the previous line in length calculations. This check added to
10413 # fix case b1174 which had a '?' on the line
10414 my $no_previous_seq_item = $Kref == $Kouter_opening
10415 || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
10417 if ( $no_previous_seq_item
10418 && substr( $type_prev, 0, 1 ) eq '=' )
10422 # Fix for b1144 and b1112: backup to the first nonblank
10423 # character before the =>, or to the start of its line.
10424 if ( $type_prev eq '=>' ) {
10425 my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
10426 my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
10427 my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
10428 foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
10429 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
10438 # STEP 3: Now look ahead for a ternary and, if found, use it.
10439 # This fixes case b1182.
10440 # Also look for a ')' at the same level and, if found, use it.
10441 # This fixes case b1224.
10442 if ( $Kref < $Kouter_opening ) {
10443 my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
10444 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
10445 while ( $Knext < $Kouter_opening ) {
10446 if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
10447 if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
10448 || $rLL->[$Knext]->[_TOKEN_] eq ')' )
10454 $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
10458 # Define the starting measurements we will need
10460 $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
10461 $starting_level = $rLL->[$Kref]->[_LEVEL_];
10462 $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
10464 $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
10465 $starting_ci * $rOpts_continuation_indentation;
10467 # STEP 4: Switch to using the outer opening token as the reference
10468 # point if a line break before it would make a longer line.
10469 # Fixes case b1055 and is also an alternate fix for b1065.
10470 my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
10471 if ( $Kref < $Kouter_opening ) {
10472 my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
10473 my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
10474 my $maximum_text_length_oo =
10475 $maximum_text_length_at_level[$starting_level_oo] -
10476 $starting_ci_oo * $rOpts_continuation_indentation;
10478 # The excess length to any cumulative length K = lenK is either
10479 # $excess = $lenk - ($lentot + $maximum_text_length), or
10480 # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
10481 # so the worst case (maximum excess) corresponds to the configuration
10482 # with minimum value of the sum: $lentot + $maximum_text_length
10483 if ( $lentot_oo + $maximum_text_length_oo <
10484 $starting_lentot + $maximum_text_length )
10486 $Kref = $Kouter_opening;
10487 $starting_level = $starting_level_oo;
10488 $starting_ci = $starting_ci_oo;
10489 $starting_lentot = $lentot_oo;
10490 $maximum_text_length = $maximum_text_length_oo;
10494 my $new_weld_ok = 1;
10496 # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
10497 # combination -wn -lp -dws -naws does not work well and can cause blinkers.
10498 # It will probably only occur in stress testing. For this situation we
10499 # will only start a new weld if we start at a 'good' location.
10500 # - Added 'if' to fix case b1032.
10501 # - Require blank before certain previous characters to fix b1111.
10502 # - Add ';' to fix case b1139
10503 # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
10504 # - relaxed constraints for b1227
10505 # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353
10506 # - added skip if type is 'Q' for b1447
10508 && $rOpts_line_up_parentheses
10509 && $rOpts_delete_old_whitespace
10510 && !$rOpts_add_whitespace
10511 && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q'
10512 && $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q'
10513 && defined($Kprev) )
10515 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
10516 my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
10517 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
10519 if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
10521 $type_prev =~ /^[\,\.\;]/
10522 || $type_prev =~ /^[=\{\[\(\L]/
10523 && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
10524 || $type_first =~ /^[=\,\.\;\{\[\(\L]/
10525 || $type_first eq '||'
10528 && ( $token_first eq 'if'
10529 || $token_first eq 'or' )
10534 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
10538 return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
10539 } ## end sub setup_new_weld_measurements
10541 sub excess_line_length_for_Krange {
10542 my ( $self, $Kfirst, $Klast ) = @_;
10544 # returns $excess_length =
10545 # by how many characters a line composed of tokens $Kfirst .. $Klast will
10546 # exceed the allowed line length
10548 my $rLL = $self->[_rLL_];
10549 my $length_before_Kfirst =
10552 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
10554 # backup before a side comment if necessary
10556 if ( $rOpts_ignore_side_comment_lengths
10557 && $rLL->[$Klast]->[_TYPE_] eq '#' )
10559 my $Kprev = $self->K_previous_nonblank($Klast);
10560 if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
10563 # get the length of the text
10564 my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
10566 # get the size of the text window
10567 my $level = $rLL->[$Kfirst]->[_LEVEL_];
10568 my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
10569 my $max_text_length = $maximum_text_length_at_level[$level] -
10570 $ci_level * $rOpts_continuation_indentation;
10572 my $excess_length = $length - $max_text_length;
10576 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
10577 return ($excess_length);
10578 } ## end sub excess_line_length_for_Krange
10580 sub weld_nested_containers {
10583 # Called once per file for option '--weld-nested-containers'
10585 my $rK_weld_left = $self->[_rK_weld_left_];
10586 my $rK_weld_right = $self->[_rK_weld_right_];
10588 # This routine implements the -wn flag by "welding together"
10589 # the nested closing and opening tokens which were previously
10590 # identified by sub 'find_nested_pairs'. "welding" simply
10591 # involves setting certain hash values which will be checked
10592 # later during formatting.
10594 my $rLL = $self->[_rLL_];
10595 my $rlines = $self->[_rlines_];
10596 my $K_opening_container = $self->[_K_opening_container_];
10597 my $K_closing_container = $self->[_K_closing_container_];
10598 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10599 my $ris_asub_block = $self->[_ris_asub_block_];
10600 my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_];
10602 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
10604 # Find nested pairs of container tokens for any welding.
10605 my $rnested_pairs = $self->find_nested_pairs();
10607 # Return unless there are nested pairs to weld
10608 return unless defined($rnested_pairs) && @{$rnested_pairs};
10610 # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
10611 # pairs. But it isn't clear if this is possible because we don't know
10612 # which sequences might actually start a weld.
10614 my $rOpts_break_at_old_method_breakpoints =
10615 $rOpts->{'break-at-old-method-breakpoints'};
10617 # This array will hold the sequence numbers of the tokens to be welded.
10620 # Variables needed for estimating line lengths
10621 my $maximum_text_length; # maximum spaces available for text
10622 my $starting_lentot; # cumulative text to start of current line
10624 my $iline_outer_opening = -1;
10625 my $weld_count_this_start = 0;
10627 # OLD: $single_line_tol added to fix cases b1180 b1181
10628 # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
10629 # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
10630 my $single_line_tol = 0;
10632 my $multiline_tol = $single_line_tol + 1 +
10633 max( $rOpts_indent_columns, $rOpts_continuation_indentation );
10635 # Define a welding cutoff level: do not start a weld if the inside
10636 # container level equals or exceeds this level.
10638 # We use the minimum of two criteria, either of which may be more
10639 # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
10640 # the 'beta' value is more restrictive in other cases (b1243).
10641 # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously:
10642 # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2);
10643 # This is now '$high_stress_level'.
10645 # The vertical tightness flags can throw off line length calculations.
10646 # This patch was added to fix instability issue b1284.
10647 # It works to always use a tol of 1 for 1 line block length tests, but
10648 # this restricted value keeps test case wn6.wn working as before.
10649 # It may be necessary to include '[' and '{' here in the future.
10650 my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
10653 # _oo=outer opening, i.e. first of { {
10654 # _io=inner opening, i.e. second of { {
10655 # _oc=outer closing, i.e. second of } {
10656 # _ic=inner closing, i.e. first of } }
10660 # Main loop over nested pairs...
10661 # We are working from outermost to innermost pairs so that
10662 # level changes will be complete when we arrive at the inner pairs.
10663 while ( my $item = pop( @{$rnested_pairs} ) ) {
10664 my ( $inner_seqno, $outer_seqno ) = @{$item};
10666 my $Kouter_opening = $K_opening_container->{$outer_seqno};
10667 my $Kinner_opening = $K_opening_container->{$inner_seqno};
10668 my $Kouter_closing = $K_closing_container->{$outer_seqno};
10669 my $Kinner_closing = $K_closing_container->{$inner_seqno};
10671 # RULE: do not weld if inner container has <= 3 tokens unless the next
10672 # token is a heredoc (so we know there will be multiple lines)
10673 if ( $Kinner_closing - $Kinner_opening <= 4 ) {
10674 my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
10675 next unless defined($Knext_nonblank);
10676 my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
10677 next unless ( $type eq 'h' );
10680 my $outer_opening = $rLL->[$Kouter_opening];
10681 my $inner_opening = $rLL->[$Kinner_opening];
10682 my $outer_closing = $rLL->[$Kouter_closing];
10683 my $inner_closing = $rLL->[$Kinner_closing];
10685 # RULE: do not weld to a hash brace. The reason is that it has a very
10686 # strong bond strength to the next token, so a line break after it
10687 # may not work. Previously we allowed welding to something like @{
10688 # but that caused blinking states (cases b751, b779).
10689 if ( $inner_opening->[_TYPE_] eq 'L' ) {
10693 # RULE: do not weld to a square bracket which does not contain commas
10694 if ( $inner_opening->[_TYPE_] eq '[' ) {
10695 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
10696 next unless ( $rtype_count && $rtype_count->{','} );
10698 # Do not weld if there is text before a '[' such as here:
10699 # curr_opt ( @beg [2,5] )
10700 # It will not break into the desired sandwich structure.
10701 # This fixes case b109, 110.
10702 my $Kdiff = $Kinner_opening - $Kouter_opening;
10703 next if ( $Kdiff > 2 );
10706 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
10710 # RULE: Avoid welding under stress. The idea is that we need to have a
10711 # little space* within a welded container to avoid instability. Note
10712 # that after each weld the level values are reduced, so long multiple
10713 # welds can still be made. This rule will seldom be a limiting factor
10714 # in actual working code. Fixes b1206, b1243.
10715 my $inner_level = $inner_opening->[_LEVEL_];
10716 if ( $inner_level >= $high_stress_level ) { next }
10718 # Set flag saying if this pair starts a new weld
10719 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
10721 # Set flag saying if this pair is adjacent to the previous nesting pair
10722 # (even if previous pair was rejected as a weld)
10723 my $touch_previous_pair =
10724 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
10725 $previous_pair = $item;
10727 my $do_not_weld_rule = 0;
10728 my $Msg = EMPTY_STRING;
10729 my $is_one_line_weld;
10731 my $iline_oo = $outer_opening->[_LINE_INDEX_];
10732 my $iline_io = $inner_opening->[_LINE_INDEX_];
10733 my $iline_ic = $inner_closing->[_LINE_INDEX_];
10734 my $iline_oc = $outer_closing->[_LINE_INDEX_];
10735 my $token_oo = $outer_opening->[_TOKEN_];
10736 my $token_io = $inner_opening->[_TOKEN_];
10738 # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
10739 # Added for case b973. Moved here from below to fix b1423.
10740 if ( !$do_not_weld_rule
10741 && $rOpts_break_at_old_method_breakpoints
10742 && $iline_io > $iline_oo )
10745 foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
10746 my $rK_range = $rlines->[$iline]->{_rK_range};
10747 next unless defined($rK_range);
10748 my ( $Kfirst, $Klast ) = @{$rK_range};
10749 next unless defined($Kfirst);
10750 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
10751 $do_not_weld_rule = 7;
10756 next if ($do_not_weld_rule);
10758 # Turn off vertical tightness at possible one-line welds. Fixes b1402,
10759 # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339,
10760 # b1340, b1341, b1342, b1343, which previously used a separate fix.
10761 # Issue c161 is the latest and simplest check, using
10762 # $iline_ic==$iline_io as the test.
10763 if ( %opening_vertical_tightness
10764 && $iline_ic == $iline_io
10765 && $opening_vertical_tightness{$token_oo} )
10767 $rmax_vertical_tightness->{$outer_seqno} = 0;
10770 my $is_multiline_weld =
10771 $iline_oo == $iline_io
10772 && $iline_ic == $iline_oc
10773 && $iline_io != $iline_ic;
10776 my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
10777 my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
10779 Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
10780 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
10781 tokens '$token_oo' .. '$token_io'
10785 # DO-NOT-WELD RULE 0:
10786 # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
10787 # by one line). This can produce instabilities (fixes b1250 b1251
10789 if ( !$is_multiline_weld
10790 && $iline_ic == $iline_io + 1
10791 && $token_oo eq '('
10792 && $token_io eq '(' )
10795 $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
10801 # If this pair is not adjacent to the previous pair (skipped or not),
10802 # then measure lengths from the start of line of oo.
10804 !$touch_previous_pair
10806 # Also do this if restarting at a new line; fixes case b965, s001
10807 || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
10811 # Remember the line we are using as a reference
10812 $iline_outer_opening = $iline_oo;
10813 $weld_count_this_start = 0;
10815 ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
10816 = $self->setup_new_weld_measurements( $Kouter_opening,
10821 && ( $iline_oo != $iline_io
10822 || $iline_ic != $iline_oc )
10825 if (DEBUG_WELD) { print $msg}
10829 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
10830 my ( $Kfirst, $Klast ) = @{$rK_range};
10832 # An existing one-line weld is a line in which
10833 # (1) the containers are all on one line, and
10834 # (2) the line does not exceed the allowable length
10835 if ( $iline_oo == $iline_oc ) {
10837 # All the tokens are on one line, now check their length.
10838 # Start with the full line index range. We will reduce this
10839 # in the coding below in some cases.
10840 my $Kstart = $Kfirst;
10841 my $Kstop = $Klast;
10843 # Note that the following minimal choice for measuring will
10844 # work and will not cause any instabilities because it is
10847 ## my $Kstart = $Kouter_opening;
10848 ## my $Kstop = $Kouter_closing;
10850 # But that can lead to some undesirable welds. So a little
10851 # more complicated method has been developed.
10853 # We are trying to avoid creating bad two-line welds when we are
10854 # working on long, previously un-welded input text, such as
10856 # INPUT (example of a long input line weld candidate):
10857 ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
10859 # GOOD two-line break: (not welded; result marked too long):
10860 ## $mutation->transpos(
10861 ## $self->RNA->position($mutation->label, $atg_label));
10863 # BAD two-line break: (welded; result if we weld):
10864 ## $mutation->transpos($self->RNA->position(
10865 ## $mutation->label, $atg_label));
10867 # We can only get an approximate estimate of the final length,
10868 # since the line breaks may change, and for -lp mode because
10869 # even the indentation is not yet known.
10871 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
10872 my $level_last = $rLL->[$Klast]->[_LEVEL_];
10873 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
10874 my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
10876 # - measure to the end of the original line if balanced
10877 # - measure to the closing container if unbalanced (fixes b1230)
10878 #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
10879 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
10881 # - measure from the start of the original line if balanced
10882 # - measure from the most previous token with same level
10883 # if unbalanced (b1232)
10884 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
10885 $Kstart = $Kouter_opening;
10888 my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
10890 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
10891 last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
10897 $self->excess_line_length_for_Krange( $Kstart, $Kstop );
10899 # Coding simplified here for case b1219.
10900 # Increased tol from 0 to 1 when pvt>0 to fix b1284.
10901 $is_one_line_weld = $excess <= $one_line_tol;
10904 # DO-NOT-WELD RULE 1:
10905 # Do not weld something that looks like the start of a two-line
10906 # function call, like this: <<snippets/wn6.in>>
10907 # $trans->add_transformation(
10908 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
10909 # We will look for a semicolon after the closing paren.
10911 # We want to weld something complex, like this though
10912 # my $compass = uc( opposite_direction( line_to_canvas_direction(
10913 # @{ $coords[0] }, @{ $coords[1] } ) ) );
10914 # Otherwise we will get a 'blinker'. For example, the following
10915 # would become a blinker without this rule:
10916 # $Self->_Add( $SortOrderDisplay{ $Field
10917 # ->GenerateFieldForSelectSQL() } );
10918 # But it is okay to weld a two-line statement if it looks like
10919 # it was already welded, meaning that the two opening containers are
10920 # on a different line that the two closing containers. This is
10921 # necessary to prevent blinking of something like this with
10922 # perltidy -wn -pbp (starting indentation two levels deep):
10924 # $top_label->set_text( gettext(
10925 # "Unable to create personal directory - check permissions.") );
10926 if ( $iline_oc == $iline_oo + 1
10927 && $iline_io == $iline_ic
10928 && $token_oo eq '(' )
10931 # Look for following semicolon...
10932 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
10933 my $next_nonblank_type =
10934 defined($Knext_nonblank)
10935 ? $rLL->[$Knext_nonblank]->[_TYPE_]
10937 if ( $next_nonblank_type eq ';' ) {
10939 # Then do not weld if no other containers between inner
10940 # opening and closing.
10941 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
10942 if ( $Knext_seq_item == $Kinner_closing ) {
10943 $do_not_weld_rule = 1;
10947 } ## end starting new weld sequence
10951 # set the 1-line flag if continuing a weld sequence; fixes b1239
10952 $is_one_line_weld = ( $iline_oo == $iline_oc );
10955 # DO-NOT-WELD RULE 2:
10956 # Do not weld an opening paren to an inner one line brace block
10957 # We will just use old line numbers for this test and require
10958 # iterations if necessary for convergence
10960 # For example, otherwise we could cause the opening paren
10961 # in the following example to separate from the caller name
10964 # $_[0]->code_handler
10965 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
10967 # Here is another example where we do not want to weld:
10968 # $wrapped->add_around_modifier(
10969 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
10971 # If the one line sub block gets broken due to length or by the
10972 # user, then we can weld. The result will then be:
10973 # $wrapped->add_around_modifier( sub {
10974 # push @tracelog => 'around 1';
10978 # Updated to fix cases b1082 b1102 b1106 b1115:
10979 # Also, do not weld to an intact inner block if the outer opening token
10980 # is on a different line. For example, this prevents oscillation
10981 # between these two states in case b1106:
10984 # ($_,[$self->$_(@_[1..$#_])])
10988 # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
10991 # The effect of this change on typical code is very minimal. Sometimes
10992 # it may take a second iteration to converge, but this gives protection
10993 # against blinking.
10994 if ( !$do_not_weld_rule
10995 && !$is_one_line_weld
10996 && $iline_ic == $iline_io )
10998 $do_not_weld_rule = 2
10999 if ( $token_oo eq '(' || $iline_oo != $iline_io );
11002 # DO-NOT-WELD RULE 2A:
11003 # Do not weld an opening asub brace in -lp mode if -asbl is set. This
11004 # helps avoid instabilities in one-line block formation, and fixes
11005 # b1241. Previously, the '$is_one_line_weld' flag was tested here
11006 # instead of -asbl, and this fixed most cases. But it turns out that
11007 # the real problem was the -asbl flag, and switching to this was
11008 # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
11009 if ( !$do_not_weld_rule
11010 && $rOpts_line_up_parentheses
11012 && $ris_asub_block->{$outer_seqno} )
11014 $do_not_weld_rule = '2A';
11017 # DO-NOT-WELD RULE 3:
11018 # Do not weld if this makes our line too long.
11019 # Use a tolerance which depends on if the old tokens were welded
11020 # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
11021 if ( !$do_not_weld_rule ) {
11023 # Measure to a little beyond the inner opening token if it is
11024 # followed by a bare word, which may have unusual line break rules.
11026 # NOTE: Originally this was OLD RULE 6: do not weld to a container
11027 # which is followed on the same line by an unknown bareword token.
11028 # This can cause blinkers (cases b626, b611). But OK to weld one
11029 # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
11030 # has been merged into RULE 3 here to also fix cases b1078 b1091.
11032 my $K_for_length = $Kinner_opening;
11033 my $Knext_io = $self->K_next_nonblank($Kinner_opening);
11034 next unless ( defined($Knext_io) ); # shouldn't happen
11035 my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
11037 # Note: may need to eventually also include other types here,
11038 # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
11039 if ( $type_io_next eq 'w' ) {
11040 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
11041 next unless ( defined($Knext_io2) );
11042 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
11043 if ( !$type_ok_after_bareword{$type_io_next2} ) {
11044 $K_for_length = $Knext_io2;
11048 # Use a tolerance for welds over multiple lines to avoid blinkers.
11049 # We can use zero tolerance if it looks like we are working on an
11052 $is_one_line_weld || $is_multiline_weld
11056 # By how many characters does this exceed the text window?
11058 $self->cumulative_length_before_K($K_for_length) -
11059 $starting_lentot + 1 + $tol -
11060 $maximum_text_length;
11062 # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
11063 # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
11064 # Revised patch: New tolerance definition allows going back to '> 0'
11065 # here. This fixes case b1124. See also cases b1087 and b1087a.
11066 if ( $excess > 0 ) { $do_not_weld_rule = 3 }
11070 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
11074 # DO-NOT-WELD RULE 4; implemented for git#10:
11075 # Do not weld an opening -ce brace if the next container is on a single
11076 # line, different from the opening brace. (This is very rare). For
11077 # example, given the following with -ce, we will avoid joining the {
11081 # [ $_, length($_) ]
11084 # because this would produce a terminal one-line block:
11086 # } else { [ $_, length($_) ] }
11088 # which may not be what is desired. But given this input:
11090 # } else { [ $_, length($_) ] }
11092 # then we will do the weld and retain the one-line block
11093 if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
11094 my $block_type = $rblock_type_of_seqno->{$outer_seqno};
11095 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
11096 my $io_line = $inner_opening->[_LINE_INDEX_];
11097 my $ic_line = $inner_closing->[_LINE_INDEX_];
11098 my $oo_line = $outer_opening->[_LINE_INDEX_];
11099 if ( $oo_line < $io_line && $ic_line == $io_line ) {
11100 $do_not_weld_rule = 4;
11105 # DO-NOT-WELD RULE 5: do not include welds excluded by user
11108 && %weld_nested_exclusion_rules
11109 && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
11110 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
11113 $do_not_weld_rule = 5;
11116 # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
11118 if ($do_not_weld_rule) {
11120 # After neglecting a pair, we start measuring from start of point
11121 # io ... but not if previous type does not like to be separated
11122 # from its container (fixes case b1184)
11123 my $Kprev = $self->K_previous_nonblank($Kinner_opening);
11124 my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
11125 if ( !$has_tight_paren{$type_prev} ) {
11126 my $starting_level = $inner_opening->[_LEVEL_];
11127 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
11129 $self->cumulative_length_before_K($Kinner_opening);
11130 $maximum_text_length =
11131 $maximum_text_length_at_level[$starting_level] -
11132 $starting_ci_level * $rOpts_continuation_indentation;
11136 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
11140 # Normally, a broken pair should not decrease indentation of
11141 # intermediate tokens:
11142 ## if ( $last_pair_broken ) { next }
11143 # However, for long strings of welded tokens, such as '{{{{{{...'
11144 # we will allow broken pairs to also remove indentation.
11145 # This will keep very long strings of opening and closing
11146 # braces from marching off to the right. We will do this if the
11147 # number of tokens in a weld before the broken weld is 4 or more.
11148 # This rule will mainly be needed for test scripts, since typical
11149 # welds have fewer than about 4 welded tokens.
11150 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
11153 # otherwise start new weld ...
11154 elsif ($starting_new_weld) {
11155 $weld_count_this_start++;
11157 $Msg .= "Starting new weld\n";
11160 push @welds, $item;
11162 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
11163 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
11165 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
11166 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
11169 # ... or extend current weld
11171 $weld_count_this_start++;
11173 $Msg .= "Extending current weld\n";
11176 unshift @{ $welds[-1] }, $inner_seqno;
11177 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
11178 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
11180 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
11181 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
11183 # Keep a broken container broken at multiple welds. This might
11184 # also be useful for simple welds, but for now it is restricted
11185 # to multiple welds to minimize changes to existing coding. This
11186 # fixes b1429, b1430. Updated for issue c198: but allow a
11187 # line differences of 1 (simple shear) so that a simple shear
11188 # can remain or become a single line.
11189 if ( $iline_ic - $iline_io > 1 ) {
11191 # Only set this break if it is the last possible weld in this
11192 # chain. This will keep some extreme test cases unchanged.
11193 my $is_chain_end = !@{$rnested_pairs}
11194 || $rnested_pairs->[-1]->[1] != $inner_seqno;
11195 if ($is_chain_end) {
11196 $self->[_rbreak_container_]->{$inner_seqno} = 1;
11201 # After welding, reduce the indentation level if all intermediate tokens
11202 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
11203 if ( $dlevel != 0 ) {
11204 my $Kstart = $Kinner_opening;
11205 my $Kstop = $Kinner_closing;
11206 foreach my $KK ( $Kstart .. $Kstop ) {
11207 $rLL->[$KK]->[_LEVEL_] += $dlevel;
11210 # Copy opening ci level to help break at = for -lp mode (case b1124)
11211 $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
11212 $rLL->[$Kouter_opening]->[_CI_LEVEL_];
11214 # But do not copy the closing ci level ... it can give poor results
11215 ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
11216 ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
11221 } ## end sub weld_nested_containers
11223 sub weld_nested_quotes {
11225 # Called once per file for option '--weld-nested-containers'. This
11226 # does welding on qw quotes.
11230 # See if quotes are excluded from welding
11231 my $rflags = $weld_nested_exclusion_rules{'q'};
11232 return if ( defined($rflags) && defined( $rflags->[1] ) );
11234 my $rK_weld_left = $self->[_rK_weld_left_];
11235 my $rK_weld_right = $self->[_rK_weld_right_];
11237 my $rLL = $self->[_rLL_];
11238 return unless ( defined($rLL) && @{$rLL} );
11241 my $K_opening_container = $self->[_K_opening_container_];
11242 my $K_closing_container = $self->[_K_closing_container_];
11243 my $rlines = $self->[_rlines_];
11245 my $starting_lentot;
11246 my $maximum_text_length;
11248 my $is_single_quote = sub {
11249 my ( $Kbeg, $Kend, $quote_type ) = @_;
11250 foreach my $K ( $Kbeg .. $Kend ) {
11251 my $test_type = $rLL->[$K]->[_TYPE_];
11252 next if ( $test_type eq 'b' );
11253 return if ( $test_type ne $quote_type );
11258 # Length tolerance - same as previously used for sub weld_nested
11259 my $multiline_tol =
11260 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
11262 # look for single qw quotes nested in containers
11263 my $KNEXT = $self->[_K_first_seq_item_];
11264 while ( defined($KNEXT) ) {
11266 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
11267 my $rtoken_vars = $rLL->[$KK];
11268 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11269 if ( !$outer_seqno ) {
11270 next if ( $KK == 0 ); # first token in file may not be container
11272 # A fault here implies that an error was made in the little loop at
11273 # the bottom of sub 'respace_tokens' which set the values of
11274 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
11275 # loop control lines above.
11276 Fault("sequence = $outer_seqno not defined at K=$KK")
11281 my $token = $rtoken_vars->[_TOKEN_];
11282 if ( $is_opening_token{$token} ) {
11284 # see if the next token is a quote of some type
11287 if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
11288 next unless ( $Kn < $Num );
11290 my $next_token = $rLL->[$Kn]->[_TOKEN_];
11291 my $next_type = $rLL->[$Kn]->[_TYPE_];
11293 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
11294 && substr( $next_token, 0, 1 ) eq 'q' );
11296 # The token before the closing container must also be a quote
11297 my $Kouter_closing = $K_closing_container->{$outer_seqno};
11298 my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
11299 next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
11301 # This is an inner opening container
11302 my $Kinner_opening = $Kn;
11304 # Do not weld to single-line quotes. Nothing is gained, and it may
11306 next if ( $Kinner_closing == $Kinner_opening );
11308 # Only weld to quotes delimited with container tokens. This is
11309 # because welding to arbitrary quote delimiters can produce code
11310 # which is less readable than without welding.
11311 my $closing_delimiter =
11312 substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
11314 unless ( $is_closing_token{$closing_delimiter}
11315 || $closing_delimiter eq '>' );
11317 # Now make sure that there is just a single quote in the container
11320 $is_single_quote->(
11321 $Kinner_opening + 1,
11322 $Kinner_closing - 1,
11327 # OK: This is a candidate for welding
11328 my $Msg = EMPTY_STRING;
11331 my $Kouter_opening = $K_opening_container->{$outer_seqno};
11332 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
11333 my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
11334 my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
11335 my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
11337 ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
11339 # Fix for case b1189. If quote is marked as type 'Q' then only weld
11340 # if the two closing tokens are on the same input line. Otherwise,
11341 # the closing line will be output earlier in the pipeline than
11342 # other CODE lines and welding will not actually occur. This will
11343 # leave a half-welded structure with potential formatting
11344 # instability. This might be fixed by adding a check for a weld on
11345 # a closing Q token and sending it down the normal channel, but it
11346 # would complicate the code and is potentially risky.
11349 && $next_type eq 'Q'
11350 && $iline_ic != $iline_oc );
11352 # If welded, the line must not exceed allowed line length
11353 ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
11354 = $self->setup_new_weld_measurements( $Kouter_opening,
11356 if ( !$ok_to_weld ) {
11357 if (DEBUG_WELD) { print $msg}
11362 $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
11363 my $excess = $length + $multiline_tol - $maximum_text_length;
11365 my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
11366 if ( $excess >= $excess_max ) {
11371 if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
11373 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
11376 # Check weld exclusion rules for outer container
11377 if ( !$do_not_weld ) {
11378 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
11379 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
11382 "No qw weld due to weld exclusion rules for outer container\n";
11388 # Check the length of the last line (fixes case b1039)
11389 if ( !$do_not_weld ) {
11390 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
11391 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
11393 $self->excess_line_length_for_Krange( $Kfirst_ic,
11396 # Allow extra space for additional welded closing container(s)
11397 # and a space and comma or semicolon.
11398 # NOTE: weld len has not been computed yet. Use 2 spaces
11399 # for now, correct for a single weld. This estimate could
11400 # be made more accurate if necessary.
11402 defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
11403 if ( $excess_ic + $weld_len + 2 > 0 ) {
11406 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
11412 if ($do_not_weld) {
11414 $Msg .= "Not Welding QW\n";
11422 $Msg .= "Welding QW\n";
11426 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
11427 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
11429 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
11430 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
11432 # Undo one indentation level if an extra level was added to this
11435 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
11437 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
11439 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
11440 $rLL->[$K]->[_LEVEL_] -= 1;
11442 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
11443 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
11446 # undo CI for other welded quotes
11449 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
11450 $rLL->[$K]->[_CI_LEVEL_] = 0;
11454 # Change the level of a closing qw token to be that of the outer
11455 # containing token. This will allow -lp indentation to function
11456 # correctly in the vertical aligner.
11457 # Patch to fix c002: but not if it contains text
11458 if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
11459 $rLL->[$Kinner_closing]->[_LEVEL_] =
11460 $rLL->[$Kouter_closing]->[_LEVEL_];
11465 } ## end sub weld_nested_quotes
11467 sub is_welded_at_seqno {
11469 my ( $self, $seqno ) = @_;
11471 # given a sequence number:
11472 # return true if it is welded either left or right
11473 # return false otherwise
11474 return unless ( $total_weld_count && defined($seqno) );
11475 my $KK_o = $self->[_K_opening_container_]->{$seqno};
11476 return unless defined($KK_o);
11477 return defined( $self->[_rK_weld_left_]->{$KK_o} )
11478 || defined( $self->[_rK_weld_right_]->{$KK_o} );
11479 } ## end sub is_welded_at_seqno
11481 sub mark_short_nested_blocks {
11483 # This routine looks at the entire file and marks any short nested blocks
11484 # which should not be broken. The results are stored in the hash
11485 # $rshort_nested->{$type_sequence}
11486 # which will be true if the container should remain intact.
11488 # For example, consider the following line:
11490 # sub cxt_two { sort { $a <=> $b } test_if_list() }
11492 # The 'sort' block is short and nested within an outer sub block.
11493 # Normally, the existence of the 'sort' block will force the sub block to
11494 # break open, but this is not always desirable. Here we will set a flag for
11495 # the sort block to prevent this. To give the user control, we will
11496 # follow the input file formatting. If either of the blocks is broken in
11497 # the input file then we will allow it to remain broken. Otherwise we will
11498 # set a flag to keep it together in later formatting steps.
11500 # The flag which is set here will be checked in two places:
11501 # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
11504 return if $rOpts->{'indent-only'};
11506 my $rLL = $self->[_rLL_];
11507 return unless ( defined($rLL) && @{$rLL} );
11509 return unless ( $rOpts->{'one-line-block-nesting'} );
11511 my $K_opening_container = $self->[_K_opening_container_];
11512 my $K_closing_container = $self->[_K_closing_container_];
11513 my $rbreak_container = $self->[_rbreak_container_];
11514 my $ris_broken_container = $self->[_ris_broken_container_];
11515 my $rshort_nested = $self->[_rshort_nested_];
11516 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11518 # Variables needed for estimating line lengths
11519 my $maximum_text_length;
11520 my $starting_lentot;
11521 my $length_tol = 1;
11523 my $excess_length_to_K = sub {
11526 # Estimate the length from the line start to a given token
11527 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
11528 my $excess_length = $length + $length_tol - $maximum_text_length;
11529 return ($excess_length);
11532 # loop over all containers
11533 my @open_block_stack;
11535 my $KNEXT = $self->[_K_first_seq_item_];
11536 while ( defined($KNEXT) ) {
11538 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
11539 my $rtoken_vars = $rLL->[$KK];
11540 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
11541 if ( !$type_sequence ) {
11542 next if ( $KK == 0 ); # first token in file may not be container
11544 # A fault here implies that an error was made in the little loop at
11545 # the bottom of sub 'respace_tokens' which set the values of
11546 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
11547 # loop control lines above.
11548 Fault("sequence = $type_sequence not defined at K=$KK")
11553 # Patch: do not mark short blocks with welds.
11554 # In some cases blinkers can form (case b690).
11555 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
11559 # We are just looking at code blocks
11560 my $token = $rtoken_vars->[_TOKEN_];
11561 my $type = $rtoken_vars->[_TYPE_];
11562 next unless ( $type eq $token );
11563 next unless ( $rblock_type_of_seqno->{$type_sequence} );
11565 # Keep a stack of all acceptable block braces seen.
11566 # Only consider blocks entirely on one line so dump the stack when line
11568 my $iline_last = $iline;
11569 $iline = $rLL->[$KK]->[_LINE_INDEX_];
11570 if ( $iline != $iline_last ) { @open_block_stack = () }
11572 if ( $token eq '}' ) {
11573 if (@open_block_stack) { pop @open_block_stack }
11575 next unless ( $token eq '{' );
11577 # block must be balanced (bad scripts may be unbalanced)
11578 my $K_opening = $K_opening_container->{$type_sequence};
11579 my $K_closing = $K_closing_container->{$type_sequence};
11580 next unless ( defined($K_opening) && defined($K_closing) );
11582 # require that this block be entirely on one line
11584 if ( $ris_broken_container->{$type_sequence}
11585 || $rbreak_container->{$type_sequence} );
11587 # See if this block fits on one line of allowed length (which may
11588 # be different from the input script)
11590 $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11591 my $level = $rLL->[$KK]->[_LEVEL_];
11592 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
11593 $maximum_text_length =
11594 $maximum_text_length_at_level[$level] -
11595 $ci_level * $rOpts_continuation_indentation;
11597 # Dump the stack if block is too long and skip this block
11598 if ( $excess_length_to_K->($K_closing) > 0 ) {
11599 @open_block_stack = ();
11603 # OK, Block passes tests, remember it
11604 push @open_block_stack, $type_sequence;
11606 # We are only marking nested code blocks,
11607 # so check for a previous block on the stack
11608 next unless ( @open_block_stack > 1 );
11610 # Looks OK, mark this as a short nested block
11611 $rshort_nested->{$type_sequence} = 1;
11615 } ## end sub mark_short_nested_blocks
11617 sub special_indentation_adjustments {
11621 # Called once per file to do special indentation adjustments.
11622 # These routines adjust levels either by changing _CI_LEVEL_ directly or
11623 # by setting modified levels in the array $self->[_radjusted_levels_].
11625 # Initialize the adjusted levels. These will be the levels actually used
11626 # for computing indentation.
11628 # NOTE: This routine is called after the weld routines, which may have
11629 # already adjusted _LEVEL_, so we are making adjustments on top of those
11630 # levels. It would be much nicer to have the weld routines also use this
11631 # adjustment, but that gets complicated when we combine -gnu -wn and have
11632 # some welded quotes.
11633 my $Klimit = $self->[_Klimit_];
11634 my $rLL = $self->[_rLL_];
11635 my $radjusted_levels = $self->[_radjusted_levels_];
11637 return unless ( defined($Klimit) );
11639 foreach my $KK ( 0 .. $Klimit ) {
11640 $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
11643 # First set adjusted levels for any non-indenting braces.
11644 $self->do_non_indenting_braces();
11646 # Adjust breaks and indentation list containers
11647 $self->break_before_list_opening_containers();
11649 # Set adjusted levels for the whitespace cycle option.
11650 $self->whitespace_cycle_adjustment();
11652 $self->braces_left_setup();
11654 # Adjust continuation indentation if -bli is set
11655 $self->bli_adjustment();
11657 $self->extended_ci()
11658 if ($rOpts_extended_continuation_indentation);
11660 # Now clip any adjusted levels to be non-negative
11661 $self->clip_adjusted_levels();
11664 } ## end sub special_indentation_adjustments
11666 sub clip_adjusted_levels {
11668 # Replace any negative adjusted levels with zero.
11669 # Negative levels can occur in files with brace errors.
11671 my $radjusted_levels = $self->[_radjusted_levels_];
11672 return unless defined($radjusted_levels) && @{$radjusted_levels};
11673 my $min = min( @{$radjusted_levels} ); # fast check for min
11676 # slow loop, but rarely needed
11677 foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
11680 } ## end sub clip_adjusted_levels
11682 sub do_non_indenting_braces {
11684 # Called once per file to handle the --non-indenting-braces parameter.
11685 # Remove indentation within marked braces if requested
11688 # Any non-indenting braces have been found by sub find_non_indenting_braces
11689 # and are defined by the following hash:
11690 my $rseqno_non_indenting_brace_by_ix =
11691 $self->[_rseqno_non_indenting_brace_by_ix_];
11692 return unless ( %{$rseqno_non_indenting_brace_by_ix} );
11694 my $rlines = $self->[_rlines_];
11695 my $K_opening_container = $self->[_K_opening_container_];
11696 my $K_closing_container = $self->[_K_closing_container_];
11697 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
11698 my $radjusted_levels = $self->[_radjusted_levels_];
11700 # First locate all of the marked blocks
11702 foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
11703 my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
11704 my $KK = $K_opening_container->{$seqno};
11705 my $line_of_tokens = $rlines->[$ix];
11706 my $rK_range = $line_of_tokens->{_rK_range};
11707 my ( $Kfirst, $Klast ) = @{$rK_range};
11708 $rspecial_side_comment_type->{$Klast} = 'NIB';
11709 push @K_stack, [ $KK, 1 ];
11710 my $Kc = $K_closing_container->{$seqno};
11711 push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
11713 return unless (@K_stack);
11714 @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
11716 # Then loop to remove indentation within marked blocks
11719 foreach my $item (@K_stack) {
11720 my ( $KK, $inc ) = @{$item};
11721 if ( $ndeep > 0 ) {
11723 foreach ( $KK_last + 1 .. $KK ) {
11724 $radjusted_levels->[$_] -= $ndeep;
11727 # We just subtracted the old $ndeep value, which only applies to a
11728 # '{'. The new $ndeep applies to a '}', so we undo the error.
11729 if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
11736 } ## end sub do_non_indenting_braces
11738 sub whitespace_cycle_adjustment {
11742 # Called once per file to implement the --whitespace-cycle option
11743 my $rLL = $self->[_rLL_];
11744 return unless ( defined($rLL) && @{$rLL} );
11745 my $radjusted_levels = $self->[_radjusted_levels_];
11746 my $maximum_level = $self->[_maximum_level_];
11748 if ( $rOpts_whitespace_cycle
11749 && $rOpts_whitespace_cycle > 0
11750 && $rOpts_whitespace_cycle < $maximum_level )
11753 my $Kmax = @{$rLL} - 1;
11755 my $whitespace_last_level = -1;
11756 my @whitespace_level_stack = ();
11757 my $last_nonblank_type = 'b';
11758 my $last_nonblank_token = EMPTY_STRING;
11759 foreach my $KK ( 0 .. $Kmax ) {
11760 my $level_abs = $radjusted_levels->[$KK];
11761 my $level = $level_abs;
11762 if ( $level_abs < $whitespace_last_level ) {
11763 pop(@whitespace_level_stack);
11765 if ( !@whitespace_level_stack ) {
11766 push @whitespace_level_stack, $level_abs;
11768 elsif ( $level_abs > $whitespace_last_level ) {
11769 $level = $whitespace_level_stack[-1] +
11770 ( $level_abs - $whitespace_last_level );
11773 # 1 Try to break at a block brace
11775 $level > $rOpts_whitespace_cycle
11776 && $last_nonblank_type eq '{'
11777 && $last_nonblank_token eq '{'
11780 # 2 Then either a brace or bracket
11781 || ( $level > $rOpts_whitespace_cycle + 1
11782 && $last_nonblank_token =~ /^[\{\[]$/ )
11784 # 3 Then a paren too
11785 || $level > $rOpts_whitespace_cycle + 2
11790 push @whitespace_level_stack, $level;
11792 $level = $whitespace_level_stack[-1];
11793 $radjusted_levels->[$KK] = $level;
11795 $whitespace_last_level = $level_abs;
11796 my $type = $rLL->[$KK]->[_TYPE_];
11797 my $token = $rLL->[$KK]->[_TOKEN_];
11798 if ( $type ne 'b' ) {
11799 $last_nonblank_type = $type;
11800 $last_nonblank_token = $token;
11805 } ## end sub whitespace_cycle_adjustment
11807 use constant DEBUG_BBX => 0;
11809 sub break_before_list_opening_containers {
11813 # This routine is called once per batch to implement parameters
11814 # --break-before-hash-brace=n and similar -bbx=n flags
11815 # and their associated indentation flags:
11816 # --break-before-hash-brace-and-indent and similar -bbxi=n
11818 # Nothing to do if none of the -bbx=n parameters has been set
11819 return unless %break_before_container_types;
11821 my $rLL = $self->[_rLL_];
11822 return unless ( defined($rLL) && @{$rLL} );
11824 # Loop over all opening container tokens
11825 my $K_opening_container = $self->[_K_opening_container_];
11826 my $K_closing_container = $self->[_K_closing_container_];
11827 my $ris_broken_container = $self->[_ris_broken_container_];
11828 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
11829 my $rhas_list = $self->[_rhas_list_];
11830 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
11831 my $radjusted_levels = $self->[_radjusted_levels_];
11832 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
11833 my $rlines = $self->[_rlines_];
11834 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
11835 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
11836 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
11837 my $rK_weld_right = $self->[_rK_weld_right_];
11838 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11841 max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
11842 if ($rOpts_ignore_old_breakpoints) {
11844 # Patch suggested by b1231; the old tol was excessive.
11845 ## $length_tol += $rOpts_maximum_line_length;
11849 my $rbreak_before_container_by_seqno = {};
11850 my $rwant_reduced_ci = {};
11851 foreach my $seqno ( keys %{$K_opening_container} ) {
11853 #----------------------------------------------------------------
11854 # Part 1: Examine any -bbx=n flags
11855 #----------------------------------------------------------------
11857 next if ( $rblock_type_of_seqno->{$seqno} );
11858 my $KK = $K_opening_container->{$seqno};
11860 # This must be a list or contain a list.
11861 # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
11862 # Note2: 'has_list' holds the depth to the sub-list. We will require
11863 # a depth of just 1
11864 my $is_list = $self->is_list_by_seqno($seqno);
11865 my $has_list = $rhas_list->{$seqno};
11867 # Fix for b1173: if welded opening container, use flag of innermost
11868 # seqno. Otherwise, the restriction $has_list==1 prevents triple and
11869 # higher welds from following the -BBX parameters.
11870 if ($total_weld_count) {
11871 my $KK_test = $rK_weld_right->{$KK};
11872 if ( defined($KK_test) ) {
11873 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
11874 $is_list ||= $self->is_list_by_seqno($seqno_inner);
11875 $has_list = $rhas_list->{$seqno_inner};
11879 next unless ( $is_list || $has_list && $has_list == 1 );
11881 my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
11883 # Only for types of container tokens with a non-default break option
11884 my $token = $rLL->[$KK]->[_TOKEN_];
11885 my $break_option = $break_before_container_types{$token};
11886 next unless ($break_option);
11888 # Do not use -bbx under stress for stability ... fixes b1300
11889 # TODO: review this; do we also need to look at stress_level_lalpha?
11890 my $level = $rLL->[$KK]->[_LEVEL_];
11891 if ( $level >= $stress_level_beta ) {
11894 "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
11898 # Require previous nonblank to be '=' or '=>'
11899 my $Kprev = $KK - 1;
11900 next if ( $Kprev < 0 );
11901 my $prev_type = $rLL->[$Kprev]->[_TYPE_];
11902 if ( $prev_type eq 'b' ) {
11904 next if ( $Kprev < 0 );
11905 $prev_type = $rLL->[$Kprev]->[_TYPE_];
11907 next unless ( $is_equal_or_fat_comma{$prev_type} );
11909 my $ci = $rLL->[$KK]->[_CI_LEVEL_];
11911 #--------------------------------------------
11912 # New coding for option 2 (break if complex).
11913 #--------------------------------------------
11914 # This new coding uses clues which are invariant under formatting to
11915 # decide if a list is complex. For now it is only applied when -lp
11916 # and -vmll are used, but eventually it may become the standard method.
11917 # Fixes b1274, b1275, and others, including b1099.
11918 if ( $break_option == 2 ) {
11920 if ( $rOpts_line_up_parentheses
11921 || $rOpts_variable_maximum_line_length )
11924 # Start with the basic definition of a complex list...
11925 my $is_complex = $is_list && $has_list;
11927 # and it is also complex if the parent is a list
11928 if ( !$is_complex ) {
11929 my $parent = $rparent_of_seqno->{$seqno};
11930 if ( $self->is_list_by_seqno($parent) ) {
11935 # finally, we will call it complex if there are inner opening
11936 # and closing container tokens, not parens, within the outer
11937 # container tokens.
11938 if ( !$is_complex ) {
11939 my $Kp = $self->K_next_nonblank($KK);
11940 my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
11941 if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
11943 my $Kc = $K_closing_container->{$seqno};
11944 my $Km = $self->K_previous_nonblank($Kc);
11946 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
11948 # ignore any optional ending comma
11949 if ( $token_m eq ',' ) {
11950 $Km = $self->K_previous_nonblank($Km);
11952 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
11956 $is_closing_token{$token_m} && $token_m ne ')';
11960 # Convert to option 3 (always break) if complex
11961 next unless ($is_complex);
11966 # Fix for b1231: the has_list_with_lec does not cover all cases.
11967 # A broken container containing a list and with line-ending commas
11968 # will stay broken, so can be treated as if it had a list with lec.
11969 $has_list_with_lec ||=
11971 && $ris_broken_container->{$seqno}
11972 && $rlec_count_by_seqno->{$seqno};
11976 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
11978 # -bbx=1 = stable, try to follow input
11979 if ( $break_option == 1 ) {
11981 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
11982 my $rK_range = $rlines->[$iline]->{_rK_range};
11983 my ( $Kfirst, $Klast ) = @{$rK_range};
11984 next unless ( $KK == $Kfirst );
11987 # -bbx=2 => apply this style only for a 'complex' list
11988 elsif ( $break_option == 2 ) {
11990 # break if this list contains a broken list with line-ending comma
11992 my $Msg = EMPTY_STRING;
11993 if ($has_list_with_lec) {
11995 DEBUG_BBX && do { $Msg = "has list with lec;" };
11998 if ( !$ok_to_break ) {
12000 # Turn off -xci if -bbx=2 and this container has a sublist but
12001 # not a broken sublist. This avoids creating blinkers. The
12002 # problem is that -xci can cause one-line lists to break open,
12003 # and thereby creating formatting instability.
12004 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
12005 # b1045 b1046 b1047 b1051 b1052 b1061.
12006 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
12008 my $parent = $rparent_of_seqno->{$seqno};
12009 if ( $self->is_list_by_seqno($parent) ) {
12010 DEBUG_BBX && do { $Msg = "parent is list" };
12015 if ( !$ok_to_break ) {
12017 && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
12022 && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
12024 # Patch: turn off -xci if -bbx=2 and -lp
12025 # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
12026 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
12029 # -bbx=3 = always break
12030 elsif ( $break_option == 3 ) {
12035 # Shouldn't happen! Bad flag, but make behavior same as 3
12040 # Set a flag for actual implementation later in
12041 # sub insert_breaks_before_list_opening_containers
12042 $rbreak_before_container_by_seqno->{$seqno} = 1;
12044 && print STDOUT "BBX: ok to break at seqno=$seqno\n";
12046 # -bbxi=0: Nothing more to do if the ci value remains unchanged
12047 my $ci_flag = $container_indentation_options{$token};
12048 next unless ($ci_flag);
12050 # -bbxi=1: This option removes ci and is handled in
12051 # later sub get_final_indentation
12052 if ( $ci_flag == 1 ) {
12053 $rwant_reduced_ci->{$seqno} = 1;
12057 # -bbxi=2: This option changes the level ...
12058 # This option can conflict with -xci in some cases. We can turn off
12059 # -xci for this container to avoid blinking. For now, only do this if
12060 # -vmll is set. ( fixes b1335, b1336 )
12061 if ($rOpts_variable_maximum_line_length) {
12062 $rno_xci_by_seqno->{$seqno} = 1;
12065 #----------------------------------------------------------------
12066 # Part 2: Perform tests before committing to changing ci and level
12067 #----------------------------------------------------------------
12069 # Before changing the ci level of the opening container, we need
12070 # to be sure that the container will be broken in the later stages of
12071 # formatting. We have to do this because we are working early in the
12072 # formatting pipeline. A problem can occur if we change the ci or
12073 # level of the opening token but do not actually break the container
12074 # open as expected. In most cases it wouldn't make any difference if
12075 # we changed ci or not, but there are some edge cases where this
12076 # can cause blinking states, so we need to try to only change ci if
12077 # the container will really be broken.
12079 # Only consider containers already broken
12080 next if ( !$ris_broken_container->{$seqno} );
12082 # Patch to fix issue b1305: the combination of -naws and ci>i appears
12083 # to cause an instability. It should almost never occur in practice.
12085 if (!$rOpts_add_whitespace
12086 && $rOpts_continuation_indentation > $rOpts_indent_columns );
12088 # Always ok to change ci for permanently broken containers
12089 if ( $ris_permanently_broken->{$seqno} ) { }
12091 # Always OK if this list contains a broken sub-container with
12092 # a non-terminal line-ending comma
12093 elsif ($has_list_with_lec) { }
12095 # Otherwise, we are considering a single container...
12098 # A single container must have at least 1 line-ending comma:
12099 next unless ( $rlec_count_by_seqno->{$seqno} );
12103 # Since it has a line-ending comma, it will stay broken if the
12105 if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 }
12107 # OK if the container contains multiple fat commas
12108 # Better: multiple lines with fat commas
12109 if ( !$OK && !$rOpts_ignore_old_breakpoints ) {
12110 my $rtype_count = $rtype_count_by_seqno->{$seqno};
12111 next unless ($rtype_count);
12112 my $fat_comma_count = $rtype_count->{'=>'};
12114 && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
12115 if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 }
12118 # The last check we can make is to see if this container could
12119 # fit on a single line. Use the least possible indentation
12120 # estimate, ci=0, so we are not subtracting $ci *
12121 # $rOpts_continuation_indentation from tabulated
12122 # $maximum_text_length value.
12124 my $maximum_text_length = $maximum_text_length_at_level[$level];
12125 my $K_closing = $K_closing_container->{$seqno};
12126 my $length = $self->cumulative_length_before_K($K_closing) -
12127 $self->cumulative_length_before_K($KK);
12128 my $excess_length = $length - $maximum_text_length;
12131 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
12133 # OK if the net container definitely breaks on length
12134 if ( $excess_length > $length_tol ) {
12137 && print STDOUT "BBX: excess_length=$excess_length\n";
12140 # Otherwise skip it
12145 #------------------------------------------------------------
12146 # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
12147 #------------------------------------------------------------
12149 DEBUG_BBX && print STDOUT "BBX: OK to break\n";
12157 # n=0 default indentation (usually one ci)
12158 # n=1 outdent one ci
12159 # n=2 indent one level (minus one ci)
12160 # n=3 indent one extra ci [This may be dropped]
12162 # NOTE: We are adjusting indentation of the opening container. The
12163 # closing container will normally follow the indentation of the opening
12164 # container automatically, so this is not currently done.
12167 # option 1: outdent
12168 if ( $ci_flag == 1 ) {
12172 # option 2: indent one level
12173 elsif ( $ci_flag == 2 ) {
12175 $radjusted_levels->[$KK] += 1;
12180 # Shouldn't happen - leave ci unchanged
12183 $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
12186 $self->[_rbreak_before_container_by_seqno_] =
12187 $rbreak_before_container_by_seqno;
12188 $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
12190 } ## end sub break_before_list_opening_containers
12192 use constant DEBUG_XCI => 0;
12196 # This routine implements the -xci (--extended-continuation-indentation)
12197 # flag. We add CI to interior tokens of a container which itself has CI but
12198 # only if a token does not already have CI.
12200 # To do this, we will locate opening tokens which themselves have
12201 # continuation indentation (CI). We track them with their sequence
12202 # numbers. These sequence numbers are called 'controlling sequence
12203 # numbers'. They apply continuation indentation to the tokens that they
12204 # contain. These inner tokens remember their controlling sequence numbers.
12205 # Later, when these inner tokens are output, they have to see if the output
12206 # lines with their controlling tokens were output with CI or not. If not,
12207 # then they must remove their CI too.
12209 # The controlling CI concept works hierarchically. But CI itself is not
12210 # hierarchical; it is either on or off. There are some rare instances where
12211 # it would be best to have hierarchical CI too, but not enough to be worth
12212 # the programming effort.
12214 # The operations to remove unwanted CI are done in sub 'undo_ci'.
12218 my $rLL = $self->[_rLL_];
12219 return unless ( defined($rLL) && @{$rLL} );
12221 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
12222 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
12223 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
12224 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
12225 my $ris_bli_container = $self->[_ris_bli_container_];
12226 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12228 my %available_space;
12230 # Loop over all opening container tokens
12231 my $K_opening_container = $self->[_K_opening_container_];
12232 my $K_closing_container = $self->[_K_closing_container_];
12236 my $KNEXT = $self->[_K_first_seq_item_];
12238 # The following variable can be used to allow a little extra space to
12239 # avoid blinkers. A value $len_tol = 20 fixed the following
12240 # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
12241 # It turned out that the real problem was mis-parsing a list brace as
12242 # a code block in a 'use' statement when the line length was extremely
12243 # small. A value of 0 works now, but a slightly larger value can
12244 # be used to minimize the chance of a blinker.
12247 while ( defined($KNEXT) ) {
12249 # Fix all tokens up to the next sequence item if we are changing CI
12252 my $is_list = $ris_list_by_seqno->{$seqno_top};
12253 my $space = $available_space{$seqno_top};
12255 foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
12257 next if ( $rLL->[$Kt]->[_CI_LEVEL_] );
12259 # But do not include tokens which might exceed the line length
12260 # and are not in a list.
12261 # ... This fixes case b1031
12263 || $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space
12264 || $rLL->[$Kt]->[_TYPE_] eq '#' )
12266 $rLL->[$Kt]->[_CI_LEVEL_] = 1;
12267 $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
12271 $ris_seqno_controlling_ci->{$seqno_top} += $count;
12276 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
12278 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
12280 # see if we have reached the end of the current controlling container
12281 if ( $seqno_top && $seqno == $seqno_top ) {
12282 $seqno_top = pop @seqno_stack;
12285 # Patch to fix some block types...
12286 # Certain block types arrive from the tokenizer without CI but should
12287 # have it for this option. These include anonymous subs and
12288 # do sort map grep eval
12289 my $block_type = $rblock_type_of_seqno->{$seqno};
12290 if ( $block_type && $is_block_with_ci{$block_type} ) {
12291 $rLL->[$KK]->[_CI_LEVEL_] = 1;
12293 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
12294 $ris_seqno_controlling_ci->{$seqno_top}++;
12298 # If this does not have ci, update ci if necessary and continue looking
12299 elsif ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
12301 $rLL->[$KK]->[_CI_LEVEL_] = 1;
12302 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
12303 $ris_seqno_controlling_ci->{$seqno_top}++;
12308 # We are looking for opening container tokens with ci
12309 my $K_opening = $K_opening_container->{$seqno};
12310 next unless ( defined($K_opening) && $KK == $K_opening );
12312 # Make sure there is a corresponding closing container
12313 # (could be missing if the script has a brace error)
12314 my $K_closing = $K_closing_container->{$seqno};
12315 next unless defined($K_closing);
12317 # Skip if requested by -bbx to avoid blinkers
12318 next if ( $rno_xci_by_seqno->{$seqno} );
12320 # Skip if this is a -bli container (this fixes case b1065) Note: case
12321 # b1065 is also fixed by the update for b1055, so this update is not
12322 # essential now. But there does not seem to be a good reason to add
12323 # xci and bli together, so the update is retained.
12324 next if ( $ris_bli_container->{$seqno} );
12326 # Require different input lines. This will filter out a large number
12327 # of small hash braces and array brackets. If we accidentally filter
12328 # out an important container, it will get fixed on the next pass.
12330 $rLL->[$K_opening]->[_LINE_INDEX_] ==
12331 $rLL->[$K_closing]->[_LINE_INDEX_]
12332 && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
12333 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
12334 $rOpts_maximum_line_length )
12338 && print "XCI: Skipping seqno=$seqno, require different lines\n";
12342 # Do not apply -xci if adding extra ci will put the container contents
12343 # beyond the line length limit (fixes cases b899 b935)
12344 my $level = $rLL->[$K_opening]->[_LEVEL_];
12345 my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
12346 my $maximum_text_length =
12347 $maximum_text_length_at_level[$level] -
12348 $ci_level * $rOpts_continuation_indentation;
12350 # Fix for b1197 b1198 b1199 b1200 b1201 b1202
12351 # Do not apply -xci if we are running out of space
12352 # TODO: review this; do we also need to look at stress_level_alpha?
12353 if ( $level >= $stress_level_beta ) {
12356 "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
12360 # remember how much space is available for patch b1031 above
12362 $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
12364 if ( $space < 0 ) {
12365 DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
12368 DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
12370 $available_space{$seqno} = $space;
12372 # This becomes the next controlling container
12373 push @seqno_stack, $seqno_top if ($seqno_top);
12374 $seqno_top = $seqno;
12377 } ## end sub extended_ci
12379 sub braces_left_setup {
12381 # Called once per file to mark all -bl, -sbl, and -asbl containers
12384 my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
12385 my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
12386 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
12387 return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
12389 my $rLL = $self->[_rLL_];
12390 return unless ( defined($rLL) && @{$rLL} );
12392 # We will turn on this hash for braces controlled by these flags:
12393 my $rbrace_left = $self->[_rbrace_left_];
12395 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12396 my $ris_asub_block = $self->[_ris_asub_block_];
12397 my $ris_sub_block = $self->[_ris_sub_block_];
12398 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
12400 my $block_type = $rblock_type_of_seqno->{$seqno};
12402 # use -asbl flag for an anonymous sub block
12403 if ( $ris_asub_block->{$seqno} ) {
12405 $rbrace_left->{$seqno} = 1;
12409 # use -sbl flag for a named sub
12410 elsif ( $ris_sub_block->{$seqno} ) {
12412 $rbrace_left->{$seqno} = 1;
12416 # use -bl flag if not a sub block of any type
12419 && $block_type =~ /$bl_pattern/
12420 && $block_type !~ /$bl_exclusion_pattern/ )
12422 $rbrace_left->{$seqno} = 1;
12427 } ## end sub braces_left_setup
12429 sub bli_adjustment {
12431 # Called once per file to implement the --brace-left-and-indent option.
12432 # If -bli is set, adds one continuation indentation for certain braces
12434 return unless ( $rOpts->{'brace-left-and-indent'} );
12435 my $rLL = $self->[_rLL_];
12436 return unless ( defined($rLL) && @{$rLL} );
12438 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12439 my $ris_bli_container = $self->[_ris_bli_container_];
12440 my $rbrace_left = $self->[_rbrace_left_];
12441 my $K_opening_container = $self->[_K_opening_container_];
12442 my $K_closing_container = $self->[_K_closing_container_];
12444 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
12445 my $block_type = $rblock_type_of_seqno->{$seqno};
12447 && $block_type =~ /$bli_pattern/
12448 && $block_type !~ /$bli_exclusion_pattern/ )
12450 $ris_bli_container->{$seqno} = 1;
12451 $rbrace_left->{$seqno} = 1;
12452 my $Ko = $K_opening_container->{$seqno};
12453 my $Kc = $K_closing_container->{$seqno};
12454 if ( defined($Ko) && defined($Kc) ) {
12455 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
12460 } ## end sub bli_adjustment
12462 sub find_multiline_qw {
12464 my ( $self, $rqw_lines ) = @_;
12466 # Multiline qw quotes are not sequenced items like containers { [ (
12467 # but behave in some respects in a similar way. So this routine finds them
12468 # and creates a separate sequence number system for later use.
12470 # This is straightforward because they always begin at the end of one line
12471 # and end at the beginning of a later line. This is true no matter how we
12472 # finally make our line breaks, so we can find them before deciding on new
12476 # if $rqw_lines is defined it is a ref to array of all line index numbers
12477 # for which there is a type 'q' qw quote at either end of the line. This
12478 # was defined by sub resync_lines_and_tokens for efficiency.
12481 my $rlines = $self->[_rlines_];
12483 # if $rqw_lines is not defined (this will occur with -io option) then we
12484 # will have to scan all lines.
12485 if ( !defined($rqw_lines) ) {
12486 $rqw_lines = [ 0 .. @{$rlines} - 1 ];
12489 # if $rqw_lines is defined but empty, just return because there are no
12492 if ( !@{$rqw_lines} ) { return }
12495 my $rstarting_multiline_qw_seqno_by_K = {};
12496 my $rending_multiline_qw_seqno_by_K = {};
12497 my $rKrange_multiline_qw_by_seqno = {};
12498 my $rmultiline_qw_has_extra_level = {};
12500 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
12502 my $rLL = $self->[_rLL_];
12504 my $num_qw_seqno = 0;
12505 my $K_start_multiline_qw;
12507 # For reference, here is the old loop, before $rqw_lines became available:
12508 ## foreach my $line_of_tokens ( @{$rlines} ) {
12509 foreach my $iline ( @{$rqw_lines} ) {
12510 my $line_of_tokens = $rlines->[$iline];
12512 # Note that these first checks are required in case we have to scan
12513 # all lines, not just lines with type 'q' at the ends.
12514 my $line_type = $line_of_tokens->{_line_type};
12515 next unless ( $line_type eq 'CODE' );
12516 my $rK_range = $line_of_tokens->{_rK_range};
12517 my ( $Kfirst, $Klast ) = @{$rK_range};
12518 next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
12520 # Continuing a sequence of qw lines ...
12521 if ( defined($K_start_multiline_qw) ) {
12522 my $type = $rLL->[$Kfirst]->[_TYPE_];
12525 if ( $type ne 'q' ) {
12526 DEVEL_MODE && print STDERR <<EOM;
12527 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
12529 $K_start_multiline_qw = undef;
12532 my $Kprev = $self->K_previous_nonblank($Kfirst);
12533 my $Knext = $self->K_next_nonblank($Kfirst);
12534 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
12535 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
12536 if ( $type_m eq 'q' && $type_p ne 'q' ) {
12537 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
12538 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
12539 [ $K_start_multiline_qw, $Kfirst ];
12540 $K_start_multiline_qw = undef;
12545 # Starting a new a sequence of qw lines ?
12546 if ( !defined($K_start_multiline_qw)
12547 && $rLL->[$Klast]->[_TYPE_] eq 'q' )
12549 my $Kprev = $self->K_previous_nonblank($Klast);
12550 my $Knext = $self->K_next_nonblank($Klast);
12551 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
12552 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
12553 if ( $type_m ne 'q' && $type_p eq 'q' ) {
12555 $qw_seqno = 'q' . $num_qw_seqno;
12556 $K_start_multiline_qw = $Klast;
12557 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
12562 # Give multiline qw lists extra indentation instead of CI. This option
12563 # works well but is currently only activated when the -xci flag is set.
12564 # The reason is to avoid unexpected changes in formatting.
12565 if ($rOpts_extended_continuation_indentation) {
12566 while ( my ( $qw_seqno_x, $rKrange ) =
12567 each %{$rKrange_multiline_qw_by_seqno} )
12569 my ( $Kbeg, $Kend ) = @{$rKrange};
12571 # require isolated closing token
12572 my $token_end = $rLL->[$Kend]->[_TOKEN_];
12574 unless ( length($token_end) == 1
12575 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
12577 # require isolated opening token
12578 my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
12580 # allow space(s) after the qw
12581 if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
12583 $token_beg =~ s/\s+//;
12586 next unless ( length($token_beg) == 3 );
12588 foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
12589 $rLL->[$KK]->[_LEVEL_]++;
12590 $rLL->[$KK]->[_CI_LEVEL_] = 0;
12593 # set flag for -wn option, which will remove the level
12594 $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
12598 # For the -lp option we need to mark all parent containers of
12600 if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
12602 while ( my ( $qw_seqno_x, $rKrange ) =
12603 each %{$rKrange_multiline_qw_by_seqno} )
12605 my ( $Kbeg, $Kend ) = @{$rKrange};
12606 my $parent_seqno = $self->parent_seqno_by_K($Kend);
12607 next unless ($parent_seqno);
12609 # If the parent container exactly surrounds this qw, then -lp
12610 # formatting seems to work so we will not mark it.
12611 my $is_tightly_contained;
12612 my $Kn = $self->K_next_nonblank($Kend);
12613 my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
12614 if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
12616 my $Kp = $self->K_previous_nonblank($Kbeg);
12618 defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
12619 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
12620 $is_tightly_contained = 1;
12624 $ris_excluded_lp_container->{$parent_seqno} = 1
12625 unless ($is_tightly_contained);
12627 # continue up the tree marking parent containers
12629 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
12631 unless ( defined($parent_seqno)
12632 && $parent_seqno ne SEQ_ROOT );
12633 $ris_excluded_lp_container->{$parent_seqno} = 1;
12638 $self->[_rstarting_multiline_qw_seqno_by_K_] =
12639 $rstarting_multiline_qw_seqno_by_K;
12640 $self->[_rending_multiline_qw_seqno_by_K_] =
12641 $rending_multiline_qw_seqno_by_K;
12642 $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
12643 $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
12646 } ## end sub find_multiline_qw
12648 use constant DEBUG_COLLAPSED_LENGTHS => 0;
12650 # Minimum space reserved for contents of a code block. A value of 40 has given
12651 # reasonable results. With a large line length, say -l=120, this will not
12652 # normally be noticeable but it will prevent making a mess in some edge cases.
12653 use constant MIN_BLOCK_LEN => 40;
12655 my %is_handle_type;
12658 my @q = qw( w C U G i k => );
12659 @is_handle_type{@q} = (1) x scalar(@q);
12663 _max_prong_len_ => $i++,
12664 _handle_len_ => $i++,
12669 _interrupted_list_rule_ => $i++,
12673 sub is_fragile_block_type {
12674 my ( $self, $block_type, $seqno ) = @_;
12677 # $block_type = the block type of a token, and
12678 # $seqno = its sequence number
12681 # true if this block type stays broken after being broken,
12684 # This sub has been added to isolate a tricky decision needed
12685 # to fix issue b1428.
12687 # The coding here needs to agree with:
12688 # - sub process_line where variable '$rbrace_follower' is set
12689 # - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set,
12691 if ( $is_sort_map_grep_eval{$block_type}
12692 || $block_type eq 't'
12693 || $self->[_rshort_nested_]->{$seqno} )
12700 } ## end sub is_fragile_block_type
12702 { ## closure xlp_collapsed_lengths
12706 my $last_nonblank_type;
12709 sub xlp_collapsed_lengths_initialize {
12711 $max_prong_len = 0;
12713 $last_nonblank_type = 'b';
12717 0, # $max_prong_len,
12719 SEQ_ROOT, # $seqno,
12723 undef, # $interrupted_list_rule
12727 } ## end sub xlp_collapsed_lengths_initialize
12729 sub cumulative_length_to_comma {
12730 my ( $self, $KK, $K_comma, $K_closing ) = @_;
12733 # $KK = index of starting token, or blank before start
12734 # $K_comma = index of line-ending comma
12735 # $K_closing = index of the container closing token
12738 # $length = cumulative length of the term
12740 my $rLL = $self->[_rLL_];
12741 if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ }
12745 && $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true
12747 # Ignore if terminal comma, causes instability (b1297,
12750 $K_closing - $K_comma > 2
12751 || ( $K_closing - $K_comma == 2
12752 && $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' )
12755 # The comma should be in this container
12756 && ( $rLL->[$K_comma]->[_LEVEL_] - 1 ==
12757 $rLL->[$K_closing]->[_LEVEL_] )
12761 # An additional check: if line ends in ), and the ) has vtc then
12762 # skip this estimate. Otherwise, vtc can give oscillating results.
12763 # Fixes b1448. For example, this could be unstable:
12765 # ( $os ne 'win' ? ( -selectcolor => "red" ) : () ),
12770 # An alternative, possibly better strategy would be to try to turn
12771 # off -vtc locally, but it turns out to be difficult to locate the
12772 # appropriate closing token when it is not on the same line as its
12775 my $K_prev = $self->K_previous_nonblank($K_comma);
12776 if ( defined($K_prev)
12778 && $rLL->[$K_prev]->[_TYPE_SEQUENCE_] )
12780 my $token = $rLL->[$K_prev]->[_TOKEN_];
12781 my $type = $rLL->[$K_prev]->[_TYPE_];
12782 if ( $closing_vertical_tightness{$token} && $type ne 'R' ) {
12783 ## type 'R' does not normally get broken, so ignore
12784 ## skip length calculation
12789 $KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
12790 $length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len;
12793 } ## end sub cumulative_length_to_comma
12795 sub xlp_collapsed_lengths {
12799 #----------------------------------------------------------------
12800 # Define the collapsed lengths of containers for -xlp indentation
12801 #----------------------------------------------------------------
12803 # We need an estimate of the minimum required line length starting at
12804 # any opening container for the -xlp style. This is needed to avoid
12805 # using too much indentation space for lower level containers and
12806 # thereby running out of space for outer container tokens due to the
12807 # maximum line length limit.
12809 # The basic idea is that at each node in the tree we imagine that we
12810 # have a fork with a handle and collapsible prongs:
12814 # ------------|-------
12815 # handle |------------
12819 # Each prong has a minimum collapsed length. The collapsed length at a
12820 # node is the maximum of these minimum lengths, plus the handle length.
12821 # Each of the prongs may itself be a tree node.
12823 # This is just a rough calculation to get an approximate starting point
12824 # for indentation. Later routines will be more precise. It is
12825 # important that these estimates be independent of the line breaks of
12826 # the input stream in order to avoid instabilities.
12828 my $rLL = $self->[_rLL_];
12829 my $rlines = $self->[_rlines_];
12830 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
12831 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
12833 my $K_start_multiline_qw;
12834 my $level_start_multiline_qw = 0;
12836 xlp_collapsed_lengths_initialize();
12838 #--------------------------------
12839 # Loop over all lines in the file
12840 #--------------------------------
12842 my $skip_next_line;
12843 foreach my $line_of_tokens ( @{$rlines} ) {
12845 if ($skip_next_line) {
12846 $skip_next_line = 0;
12849 my $line_type = $line_of_tokens->{_line_type};
12850 next if ( $line_type ne 'CODE' );
12851 my $CODE_type = $line_of_tokens->{_code_type};
12853 # Always skip blank lines
12854 next if ( $CODE_type eq 'BL' );
12856 # Note on other line types:
12857 # 'FS' (Format Skipping) lines may contain opening/closing tokens so
12858 # we have to process them to keep the stack correctly sequenced
12859 # 'VB' (Verbatim) lines could be skipped, but testing shows that
12860 # results look better if we include their lengths.
12862 # Also note that we could exclude -xlp formatting of containers with
12863 # 'FS' and 'VB' lines, but in testing that was not really beneficial
12865 # So we process tokens in 'FS' and 'VB' lines like all the rest...
12867 my $rK_range = $line_of_tokens->{_rK_range};
12868 my ( $K_first, $K_last ) = @{$rK_range};
12869 next unless ( defined($K_first) && defined($K_last) );
12871 my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
12873 # Always ignore block comments
12874 next if ( $has_comment && $K_first == $K_last );
12876 # Handle an intermediate line of a multiline qw quote. These may
12877 # require including some -ci or -i spaces. See cases c098/x063.
12878 # Updated to check all lines (not just $K_first==$K_last) to fix
12880 my $K_begin_loop = $K_first;
12881 if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
12884 my $level = $rLL->[$KK]->[_LEVEL_];
12885 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
12887 # remember the level of the start
12888 if ( !defined($K_start_multiline_qw) ) {
12889 $K_start_multiline_qw = $K_first;
12890 $level_start_multiline_qw = $level;
12892 $self->[_rstarting_multiline_qw_seqno_by_K_]
12893 ->{$K_start_multiline_qw};
12894 if ( !$seqno_qw ) {
12895 my $Kp = $self->K_previous_nonblank($K_first);
12896 if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
12898 $K_start_multiline_qw = $Kp;
12899 $level_start_multiline_qw =
12900 $rLL->[$K_start_multiline_qw]->[_LEVEL_];
12904 # Fix for b1319, b1320
12905 $K_start_multiline_qw = undef;
12910 if ( defined($K_start_multiline_qw) ) {
12911 $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
12912 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
12914 # We may have to add the spaces of one level or ci level
12915 # ... it depends depends on the -xci flag, the -wn flag,
12916 # and if the qw uses a container token as the quote
12919 # First rule: add ci if there is a $ci_level
12921 $len += $rOpts_continuation_indentation;
12924 # Second rule: otherwise, look for an extra indentation
12925 # level from the start and add one indentation level if
12927 elsif ( $level > $level_start_multiline_qw ) {
12928 $len += $rOpts_indent_columns;
12931 if ( $len > $max_prong_len ) { $max_prong_len = $len }
12933 $last_nonblank_type = 'q';
12935 $K_begin_loop = $K_first + 1;
12937 # We can skip to the next line if more tokens
12938 next if ( $K_begin_loop > $K_last );
12942 $K_start_multiline_qw = undef;
12944 # Find the terminal token, before any side comment
12945 my $K_terminal = $K_last;
12946 if ($has_comment) {
12949 if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
12950 && $K_terminal > $K_first );
12953 # Use length to terminal comma if interrupted list rule applies
12954 if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
12955 my $K_c = $stack[-1]->[_K_c_];
12956 if ( defined($K_c) ) {
12958 #----------------------------------------------------------
12959 # BEGIN patch for issue b1408: If this line ends in an
12960 # opening token, look for the closing token and comma at
12961 # the end of the next line. If so, combine the two lines to
12962 # get the correct sums. This problem seems to require -xlp
12963 # -vtc=2 and blank lines to occur. Use %is_opening_type to
12965 #----------------------------------------------------------
12966 if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] }
12969 my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_];
12970 my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_];
12972 # We are looking for a short broken remnant on the next
12973 # line; something like the third line here (b1408):
12976 # Moose::Util::TypeConstraints::find_type_constraint(
12980 # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user(
12981 # $story_set_all_chores),
12984 # 'borrowernumber'}, # borrowernumber
12985 if ( defined($Kc_test)
12986 && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_]
12987 && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 )
12989 my $line_of_tokens_next = $rlines->[ $iline + 1 ];
12991 $rtype_count_by_seqno->{$seqno_end};
12992 my ( $K_first_next, $K_terminal_next ) =
12993 @{ $line_of_tokens_next->{_rK_range} };
12995 # backup at a side comment
12996 if ( defined($K_terminal_next)
12997 && $rLL->[$K_terminal_next]->[_TYPE_] eq '#' )
13000 $self->K_previous_nonblank($K_terminal_next);
13001 if ( defined($Kprev)
13002 && $Kprev >= $K_first_next )
13004 $K_terminal_next = $Kprev;
13009 defined($K_terminal_next)
13011 # next line ends with a comma
13012 && $rLL->[$K_terminal_next]->[_TYPE_] eq ','
13014 # which follows the closing container token
13016 $K_terminal_next - $Kc_test == 1
13017 || ( $K_terminal_next - $Kc_test == 2
13018 && $rLL->[ $K_terminal_next - 1 ]
13019 ->[_TYPE_] eq 'b' )
13022 # no commas in the container
13023 && ( !defined($rtype_count)
13024 || !$rtype_count->{','} )
13026 # for now, restrict this to a container with
13027 # just 1 or two tokens
13028 && $K_terminal_next - $K_terminal <= 5
13033 # combine the next line with the current line
13034 $K_terminal = $K_terminal_next;
13035 $skip_next_line = 1;
13036 if (DEBUG_COLLAPSED_LENGTHS) {
13037 print "Combining lines at line $iline\n";
13043 #--------------------------
13044 # END patch for issue b1408
13045 #--------------------------
13046 if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) {
13049 $self->cumulative_length_to_comma( $K_first,
13050 $K_terminal, $K_c );
13052 # Fix for b1331: at a broken => item, include the
13053 # length of the previous half of the item plus one for
13054 # the missing space
13055 if ( $last_nonblank_type eq '=>' ) {
13056 $length += $len + 1;
13058 if ( $length > $max_prong_len ) {
13059 $max_prong_len = $length;
13065 #----------------------------------
13066 # Loop over all tokens on this line
13067 #----------------------------------
13068 $self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop,
13069 $K_terminal, $K_last );
13071 # Now take care of any side comment;
13072 if ($has_comment) {
13073 if ($rOpts_ignore_side_comment_lengths) {
13078 # For a side comment when -iscl is not set, measure length from
13079 # the start of the previous nonblank token
13082 ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
13084 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
13085 if ( $len > $max_prong_len ) { $max_prong_len = $len }
13089 } ## end loop over lines
13091 if (DEBUG_COLLAPSED_LENGTHS) {
13092 print "\nCollapsed lengths--\n";
13094 my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
13096 my $clen = $rcollapsed_length_by_seqno->{$key};
13097 print "$key -> $clen\n";
13102 } ## end sub xlp_collapsed_lengths
13104 sub xlp_collapse_lengths_inner_loop {
13106 my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_;
13108 my $rLL = $self->[_rLL_];
13109 my $K_closing_container = $self->[_K_closing_container_];
13111 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13112 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
13113 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
13114 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
13115 my $rhas_broken_list = $self->[_rhas_broken_list_];
13116 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
13118 #----------------------------------
13119 # Loop over tokens on this line ...
13120 #----------------------------------
13121 foreach my $KK ( $K_begin_loop .. $K_terminal ) {
13123 my $type = $rLL->[$KK]->[_TYPE_];
13124 next if ( $type eq 'b' );
13126 #------------------------
13127 # Handle sequenced tokens
13128 #------------------------
13129 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
13132 my $token = $rLL->[$KK]->[_TOKEN_];
13134 #----------------------------
13135 # Entering a new container...
13136 #----------------------------
13137 if ( $is_opening_token{$token}
13138 && defined( $K_closing_container->{$seqno} ) )
13141 # save current prong length
13142 $stack[-1]->[_max_prong_len_] = $max_prong_len;
13143 $max_prong_len = 0;
13145 # Start new prong one level deeper
13146 my $handle_len = 0;
13147 if ( $rblock_type_of_seqno->{$seqno} ) {
13149 # code blocks do not use -lp indentation, but behave as
13150 # if they had a handle of one indentation length
13151 $handle_len = $rOpts_indent_columns;
13154 elsif ( $is_handle_type{$last_nonblank_type} ) {
13155 $handle_len = $len;
13157 if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
13160 # Set a flag if the 'Interrupted List Rule' will be applied
13161 # (see sub copy_old_breakpoints).
13162 # - Added check on has_broken_list to fix issue b1298
13164 my $interrupted_list_rule =
13165 $ris_permanently_broken->{$seqno}
13166 && $ris_list_by_seqno->{$seqno}
13167 && !$rhas_broken_list->{$seqno}
13168 && !$rOpts_ignore_old_breakpoints;
13170 # NOTES: Since we are looking at old line numbers we have
13171 # to be very careful not to introduce an instability.
13173 # This following causes instability (b1288-b1296):
13174 # $interrupted_list_rule ||=
13175 # $rOpts_break_at_old_comma_breakpoints;
13177 # - We could turn off the interrupted list rule if there is
13178 # a broken sublist, to follow 'Compound List Rule 1'.
13179 # - We could use the _rhas_broken_list_ flag for this.
13180 # - But it seems safer not to do this, to avoid
13181 # instability, since the broken sublist could be
13182 # temporary. It seems better to let the formatting
13183 # stabilize by itself after one or two iterations.
13184 # - So, not doing this for now
13186 # Turn off the interrupted list rule if -vmll is set and a
13187 # list has '=>' characters. This avoids instabilities due
13188 # to dependence on old line breaks; issue b1325.
13189 if ( $interrupted_list_rule
13190 && $rOpts_variable_maximum_line_length )
13192 my $rtype_count = $rtype_count_by_seqno->{$seqno};
13193 if ( $rtype_count && $rtype_count->{'=>'} ) {
13194 $interrupted_list_rule = 0;
13198 my $K_c = $K_closing_container->{$seqno};
13200 # Add length of any terminal list item if interrupted
13201 # so that the result is the same as if the term is
13202 # in the next line (b1446).
13205 $interrupted_list_rule
13206 && $KK < $K_terminal
13208 # The line should end in a comma
13209 # NOTE: this currently assumes break after comma.
13210 # As long as the other call to cumulative_length..
13211 # makes the same assumption we should remain stable.
13212 && $rLL->[$K_terminal]->[_TYPE_] eq ','
13217 $self->cumulative_length_to_comma( $KK + 1,
13218 $K_terminal, $K_c );
13229 $interrupted_list_rule
13234 #--------------------
13235 # Exiting a container
13236 #--------------------
13237 elsif ( $is_closing_token{$token} && @stack ) {
13239 # The current prong ends - get its handle
13240 my $item = pop @stack;
13241 my $handle_len = $item->[_handle_len_];
13242 my $seqno_o = $item->[_seqno_o_];
13243 my $iline_o = $item->[_iline_o_];
13244 my $K_o = $item->[_K_o_];
13245 my $K_c_expect = $item->[_K_c_];
13246 my $collapsed_len = $max_prong_len;
13248 if ( $seqno_o ne $seqno ) {
13250 # This can happen if input file has brace errors.
13251 # Otherwise it shouldn't happen. Not fatal but -lp
13252 # formatting could get messed up.
13253 if ( DEVEL_MODE && !get_saw_brace_error() ) {
13255 sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
13260 #------------------------------------------
13261 # Rules to avoid scrunching code blocks ...
13262 #------------------------------------------
13264 # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
13265 my $block_type = $rblock_type_of_seqno->{$seqno};
13269 my $block_length = MIN_BLOCK_LEN;
13270 my $is_one_line_block;
13271 my $level = $rLL->[$K_o]->[_LEVEL_];
13272 if ( defined($K_o) && defined($K_c) ) {
13274 # note: fixed 3 May 2022 (removed 'my')
13276 $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
13277 $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
13278 $is_one_line_block = $iline == $iline_o;
13281 # Code block rule 1: Use the total block length if
13282 # it is less than the minimum.
13283 if ( $block_length < MIN_BLOCK_LEN ) {
13284 $collapsed_len = $block_length;
13287 # Code block rule 2: Use the full length of a
13288 # one-line block to avoid breaking it, unless
13289 # extremely long. We do not need to do a precise
13290 # check here, because if it breaks then it will
13291 # stay broken on later iterations.
13295 $maximum_line_length_at_level[$level]
13297 # But skip this for blocks types which can reform,
13298 # like sort/map/grep/eval blocks, to avoid
13299 # instability (b1345, b1428)
13300 && $self->is_fragile_block_type( $block_type,
13304 $collapsed_len = $block_length;
13307 # Code block rule 3: Otherwise the length should be
13308 # at least MIN_BLOCK_LEN to avoid scrunching code
13310 elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
13311 $collapsed_len = MIN_BLOCK_LEN;
13315 # Store the result. Some extra space, '2', allows for
13316 # length of an opening token, inside space, comma, ...
13317 # This constant has been tuned to give good overall
13319 $collapsed_len += 2;
13320 $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
13322 # Restart scanning the lower level prong
13324 $max_prong_len = $stack[-1]->[_max_prong_len_];
13325 $collapsed_len += $handle_len;
13326 if ( $collapsed_len > $max_prong_len ) {
13327 $max_prong_len = $collapsed_len;
13332 # it is a ternary - no special processing for these yet
13338 $last_nonblank_type = $type;
13342 #----------------------------
13343 # Handle non-container tokens
13344 #----------------------------
13345 my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
13347 # Count lengths of things like 'xx => yy' as a single item
13348 if ( $type eq '=>' ) {
13349 $len += $token_length + 1;
13350 if ( $len > $max_prong_len ) { $max_prong_len = $len }
13352 elsif ( $last_nonblank_type eq '=>' ) {
13353 $len += $token_length;
13354 if ( $len > $max_prong_len ) { $max_prong_len = $len }
13356 # but only include one => per item
13357 $len = $token_length;
13360 # include everything to end of line after a here target
13361 elsif ( $type eq 'h' ) {
13362 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
13363 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
13364 if ( $len > $max_prong_len ) { $max_prong_len = $len }
13367 # for everything else just use the token length
13369 $len = $token_length;
13370 if ( $len > $max_prong_len ) { $max_prong_len = $len }
13372 $last_nonblank_type = $type;
13374 } ## end loop over tokens on this line
13378 } ## end sub xlp_collapse_lengths_inner_loop
13380 } ## end closure xlp_collapsed_lengths
13382 sub is_excluded_lp {
13384 # Decide if this container is excluded by user request:
13385 # returns true if this token is excluded (i.e., may not use -lp)
13386 # returns false otherwise
13388 # The control hash can either describe:
13389 # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
13390 # what to include: $line_up_parentheses_control_is_lxpl = 0
13393 # $KK = index of the container opening token
13395 my ( $self, $KK ) = @_;
13396 my $rLL = $self->[_rLL_];
13397 my $rtoken_vars = $rLL->[$KK];
13398 my $token = $rtoken_vars->[_TOKEN_];
13399 my $rflags = $line_up_parentheses_control_hash{$token};
13401 #-----------------------------------------------
13402 # TEST #1: check match to listed container types
13403 #-----------------------------------------------
13404 if ( !defined($rflags) ) {
13406 # There is no entry for this container, so we are done
13407 return !$line_up_parentheses_control_is_lxpl;
13410 my ( $flag1, $flag2 ) = @{$rflags};
13412 #-----------------------------------------------------------
13413 # TEST #2: check match to flag1, the preceding nonblank word
13414 #-----------------------------------------------------------
13415 my $match_flag1 = !defined($flag1) || $flag1 eq '*';
13416 if ( !$match_flag1 ) {
13418 # Find the previous token
13419 my ( $is_f, $is_k, $is_w );
13420 my $Kp = $self->K_previous_nonblank($KK);
13421 if ( defined($Kp) ) {
13422 my $type_p = $rLL->[$Kp]->[_TYPE_];
13423 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
13426 $is_k = $type_p eq 'k';
13429 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
13431 # either keyword or function call?
13432 $is_w = $is_k || $is_f;
13435 # Check for match based on flag1 and the previous token:
13436 if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
13437 elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
13438 elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
13439 elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
13440 elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
13441 elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
13442 ## else { no match found }
13445 # See if we can exclude this based on the flag1 test...
13446 if ($line_up_parentheses_control_is_lxpl) {
13447 return 1 if ($match_flag1);
13450 return 1 if ( !$match_flag1 );
13453 #-------------------------------------------------------------
13454 # TEST #3: exclusion based on flag2 and the container contents
13455 #-------------------------------------------------------------
13457 # Note that this is an exclusion test for both -lpxl or -lpil input methods
13459 # 0 or blank: ignore container contents
13460 # 1 exclude non-lists or lists with sublists
13461 # 2 same as 1 but also exclude lists with code blocks
13466 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
13468 my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
13469 my $has_list = $self->[_rhas_list_]->{$seqno};
13470 my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
13471 my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
13475 || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
13480 return $match_flag2;
13481 } ## end sub is_excluded_lp
13483 sub set_excluded_lp_containers {
13486 return unless ($rOpts_line_up_parentheses);
13487 my $rLL = $self->[_rLL_];
13488 return unless ( defined($rLL) && @{$rLL} );
13490 my $K_opening_container = $self->[_K_opening_container_];
13491 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
13492 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13494 foreach my $seqno ( keys %{$K_opening_container} ) {
13496 # code blocks are always excluded by the -lp coding so we can skip them
13497 next if ( $rblock_type_of_seqno->{$seqno} );
13499 my $KK = $K_opening_container->{$seqno};
13500 next unless defined($KK);
13502 # see if a user exclusion rule turns off -lp for this container
13503 if ( $self->is_excluded_lp($KK) ) {
13504 $ris_excluded_lp_container->{$seqno} = 1;
13508 } ## end sub set_excluded_lp_containers
13510 ######################################
13511 # CODE SECTION 6: Process line-by-line
13512 ######################################
13514 sub process_all_lines {
13516 #----------------------------------------------------------
13517 # Main loop to format all lines of a file according to type
13518 #----------------------------------------------------------
13521 my $rlines = $self->[_rlines_];
13522 my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
13523 my $file_writer_object = $self->[_file_writer_object_];
13524 my $logger_object = $self->[_logger_object_];
13525 my $vertical_aligner_object = $self->[_vertical_aligner_object_];
13526 my $save_logfile = $self->[_save_logfile_];
13528 # Flag to prevent blank lines when POD occurs in a format skipping sect.
13529 my $in_format_skipping_section;
13531 # set locations for blanks around long runs of keywords
13532 my $rwant_blank_line_after = $self->keyword_group_scan();
13534 my $line_type = EMPTY_STRING;
13535 my $i_last_POD_END = -10;
13537 foreach my $line_of_tokens ( @{$rlines} ) {
13539 # insert blank lines requested for keyword sequences
13540 if ( defined( $rwant_blank_line_after->{$i} )
13541 && $rwant_blank_line_after->{$i} == 1 )
13543 $self->want_blank_line();
13548 my $last_line_type = $line_type;
13549 $line_type = $line_of_tokens->{_line_type};
13550 my $input_line = $line_of_tokens->{_line_text};
13552 # _line_type codes are:
13553 # SYSTEM - system-specific code before hash-bang line
13554 # CODE - line of perl code (including comments)
13555 # POD_START - line starting pod, such as '=head'
13556 # POD - pod documentation text
13557 # POD_END - last line of pod section, '=cut'
13558 # HERE - text of here-document
13559 # HERE_END - last line of here-doc (target word)
13560 # FORMAT - format section
13561 # FORMAT_END - last line of format section, '.'
13562 # SKIP - code skipping section
13563 # SKIP_END - last line of code skipping section, '#>>V'
13564 # DATA_START - __DATA__ line
13565 # DATA - unidentified text following __DATA__
13566 # END_START - __END__ line
13567 # END - unidentified text following __END__
13568 # ERROR - we are in big trouble, probably not a perl script
13570 # put a blank line after an =cut which comes before __END__ and __DATA__
13571 # (required by podchecker)
13572 if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
13573 $i_last_POD_END = $i;
13574 $file_writer_object->reset_consecutive_blank_lines();
13575 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
13576 $self->want_blank_line();
13580 # handle line of code..
13581 if ( $line_type eq 'CODE' ) {
13583 my $CODE_type = $line_of_tokens->{_code_type};
13584 $in_format_skipping_section = $CODE_type eq 'FS';
13586 # Handle blank lines
13587 if ( $CODE_type eq 'BL' ) {
13589 # Keep this blank? Start with the flag -kbl=n, where
13590 # n=0 ignore all old blank lines
13591 # n=1 stable: keep old blanks, but limited by -mbl=n
13592 # n=2 keep all old blank lines, regardless of -mbl=n
13593 # If n=0 we delete all old blank lines and let blank line
13594 # rules generate any needed blank lines.
13595 my $kgb_keep = $rOpts_keep_old_blank_lines;
13597 # Then delete lines requested by the keyword-group logic if
13599 if ( $kgb_keep == 1
13600 && defined( $rwant_blank_line_after->{$i} )
13601 && $rwant_blank_line_after->{$i} == 2 )
13606 # But always keep a blank line following an =cut
13607 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
13612 $self->flush($CODE_type);
13613 $file_writer_object->write_blank_code_line(
13614 $rOpts_keep_old_blank_lines == 2 );
13615 $self->[_last_line_leading_type_] = 'b';
13621 # Let logger see all non-blank lines of code. This is a slow
13622 # operation so we avoid it if it is not going to be saved.
13623 if ( $save_logfile && $logger_object ) {
13624 $logger_object->black_box( $line_of_tokens,
13625 $vertical_aligner_object->get_output_line_number );
13629 # Handle Format Skipping (FS) and Verbatim (VB) Lines
13630 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
13631 $self->write_unindented_line("$input_line");
13632 $file_writer_object->reset_consecutive_blank_lines();
13636 # Handle all other lines of code
13637 $self->process_line_of_CODE($line_of_tokens);
13640 # handle line of non-code..
13643 # set special flags
13645 if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
13647 # Pod docs should have a preceding blank line. But stay
13648 # out of __END__ and __DATA__ sections, because
13649 # the user may be using this section for any purpose whatsoever
13650 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
13651 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
13653 && !$in_format_skipping_section
13654 && $line_type eq 'POD_START'
13655 && !$self->[_saw_END_or_DATA_] )
13657 $self->want_blank_line();
13661 # leave the blank counters in a predictable state
13662 # after __END__ or __DATA__
13663 elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
13664 $file_writer_object->reset_consecutive_blank_lines();
13665 $self->[_saw_END_or_DATA_] = 1;
13668 # Patch to avoid losing blank lines after a code-skipping block;
13670 elsif ( $line_type eq 'SKIP_END' ) {
13671 $file_writer_object->reset_consecutive_blank_lines();
13674 # write unindented non-code line
13675 if ( !$skip_line ) {
13676 $self->write_unindented_line($input_line);
13682 } ## end sub process_all_lines
13684 { ## closure keyword_group_scan
13686 # this is the return var
13687 my $rhash_of_desires;
13689 # user option variables for -kgb
13696 $rOpts_kgb_size_max,
13697 $rOpts_kgb_size_min,
13701 # group variables, initialized by kgb_initialize_group_vars
13702 my ( $ibeg, $iend, $count, $level_beg, $K_closing );
13703 my ( @iblanks, @group, @subgroup );
13705 # line variables, updated by sub keyword_group_scan
13706 my ( $line_type, $CODE_type, $K_first, $K_last );
13707 my $number_of_groups_seen;
13709 #------------------------
13710 # -kgb helper subroutines
13711 #------------------------
13713 sub kgb_initialize_options {
13715 # check and initialize user options for -kgb
13716 # return error flag:
13717 # true for some input error, do not continue
13720 # Local copies of the various control parameters
13721 $rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
13722 $rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
13723 $rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
13724 $rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
13726 # A range of sizes can be input with decimal notation like 'min.max'
13727 # with any number of dots between the two numbers. Examples:
13728 # string => min max matches
13729 # 1.1 1 1 exactly 1
13730 # 1.3 1 3 1,2, or 3
13731 # 1..3 1 3 1,2, or 3
13736 my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
13737 ( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/,
13739 if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/
13740 || $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ )
13743 Unexpected value for -kgbs: '$rOpts_kgb_size'; expecting 'min' or 'min.max';
13744 ignoring all -kgb flags
13747 # Turn this option off so that this message does not keep repeating
13748 # during iterations and other files.
13749 $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
13750 return $rhash_of_desires;
13752 $rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min);
13754 if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min )
13756 return $rhash_of_desires;
13759 # check codes for $rOpts_kgb_before and
13760 # $rOpts_kgb_after:
13761 # 0 = never (delete if exist)
13762 # 1 = stable (keep unchanged)
13763 # 2 = always (insert if missing)
13764 return $rhash_of_desires
13765 unless $rOpts_kgb_size_min > 0
13766 && ( $rOpts_kgb_before != 1
13767 || $rOpts_kgb_after != 1
13768 || $rOpts_kgb_inside
13769 || $rOpts_kgb_delete );
13772 } ## end sub kgb_initialize_options
13774 sub kgb_initialize_group_vars {
13777 # $ibeg = first line index of this entire group
13778 # $iend = last line index of this entire group
13779 # $count = total number of keywords seen in this entire group
13780 # $level_beg = indentation level of this group
13781 # @group = [ $i, $token, $count ] =list of all keywords & blanks
13782 # @subgroup = $j, index of group where token changes
13783 # @iblanks = line indexes of blank lines in input stream in this group
13784 # where i=starting line index
13785 # token (the keyword)
13786 # count = number of this token in this subgroup
13787 # j = index in group where token changes
13791 $K_closing = undef;
13797 } ## end sub kgb_initialize_group_vars
13799 sub kgb_initialize_line_vars {
13800 $CODE_type = EMPTY_STRING;
13803 $line_type = EMPTY_STRING;
13805 } ## end sub kgb_initialize_line_vars
13807 sub kgb_initialize {
13809 # initialize all closure variables for -kgb
13811 # true to cause immediate exit (something is wrong)
13812 # false to continue ... all is okay
13814 # This is the return variable:
13815 $rhash_of_desires = {};
13817 # initialize and check user options;
13818 my $quit = kgb_initialize_options();
13819 if ($quit) { return $quit }
13821 # initialize variables for the current group and subgroups:
13822 kgb_initialize_group_vars();
13824 # initialize variables for the most recently seen line:
13825 kgb_initialize_line_vars();
13827 $number_of_groups_seen = 0;
13831 } ## end sub kgb_initialize
13833 sub kgb_insert_blank_after {
13835 $rhash_of_desires->{$i} = 1;
13837 if ( defined( $rhash_of_desires->{$ip} )
13838 && $rhash_of_desires->{$ip} == 2 )
13840 $rhash_of_desires->{$ip} = 0;
13843 } ## end sub kgb_insert_blank_after
13845 sub kgb_split_into_sub_groups {
13847 # place blanks around long sub-groups of keywords
13849 return unless ($rOpts_kgb_inside);
13851 # loop over sub-groups, index k
13852 push @subgroup, scalar @group;
13854 my $kend = @subgroup - 1;
13855 foreach my $k ( $kbeg .. $kend ) {
13857 # index j runs through all keywords found
13858 my $j_b = $subgroup[ $k - 1 ];
13859 my $j_e = $subgroup[$k] - 1;
13861 # index i is the actual line number of a keyword
13862 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
13863 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
13864 my $num = $count_e - $count_b + 1;
13866 # This subgroup runs from line $ib to line $ie-1, but may contain
13868 if ( $num >= $rOpts_kgb_size_min ) {
13870 # if there are blank lines, we require that at least $num lines
13871 # be non-blank up to the boundary with the next subgroup.
13872 my $nog_b = my $nog_e = 1;
13873 if ( @iblanks && !$rOpts_kgb_delete ) {
13874 my $j_bb = $j_b + $num - 1;
13875 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
13876 $nog_b = $count_bb - $count_b + 1 == $num;
13878 my $j_ee = $j_e - ( $num - 1 );
13879 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
13880 $nog_e = $count_e - $count_ee + 1 == $num;
13882 if ( $nog_b && $k > $kbeg ) {
13883 kgb_insert_blank_after( $i_b - 1 );
13885 if ( $nog_e && $k < $kend ) {
13886 my ( $i_ep, $tok_ep, $count_ep ) =
13887 @{ $group[ $j_e + 1 ] };
13888 kgb_insert_blank_after( $i_ep - 1 );
13893 } ## end sub kgb_split_into_sub_groups
13895 sub kgb_delete_if_blank {
13896 my ( $self, $i ) = @_;
13898 # delete line $i if it is blank
13899 my $rlines = $self->[_rlines_];
13900 return unless ( $i >= 0 && $i < @{$rlines} );
13901 return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
13902 my $code_type = $rlines->[$i]->{_code_type};
13903 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
13905 } ## end sub kgb_delete_if_blank
13907 sub kgb_delete_inner_blank_lines {
13909 # always remove unwanted trailing blank lines from our list
13910 return unless (@iblanks);
13911 while ( my $ibl = pop(@iblanks) ) {
13912 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
13916 # now mark mark interior blank lines for deletion if requested
13917 return unless ($rOpts_kgb_delete);
13919 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
13922 } ## end sub kgb_delete_inner_blank_lines
13924 sub kgb_end_group {
13926 # end a group of keywords
13927 my ( $self, $bad_ending ) = @_;
13928 if ( defined($ibeg) && $ibeg >= 0 ) {
13930 # then handle sufficiently large groups
13931 if ( $count >= $rOpts_kgb_size_min ) {
13933 $number_of_groups_seen++;
13935 # do any blank deletions regardless of the count
13936 kgb_delete_inner_blank_lines();
13938 my $rlines = $self->[_rlines_];
13940 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
13942 # patch for hash bang line which is not currently marked as
13943 # a comment; mark it as a comment
13944 if ( $ibeg == 1 && !$code_type ) {
13945 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
13947 if ( $line_text && $line_text =~ /^#/ );
13950 # Do not insert a blank after a comment
13951 # (this could be subject to a flag in the future)
13952 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
13953 if ( $rOpts_kgb_before == INSERT ) {
13954 kgb_insert_blank_after( $ibeg - 1 );
13957 elsif ( $rOpts_kgb_before == DELETE ) {
13958 $self->kgb_delete_if_blank( $ibeg - 1 );
13963 # We will only put blanks before code lines. We could loosen
13964 # this rule a little, but we have to be very careful because
13965 # for example we certainly don't want to drop a blank line
13966 # after a line like this:
13968 if ( $line_type eq 'CODE' && defined($K_first) ) {
13970 # - Do not put a blank before a line of different level
13971 # - Do not put a blank line if we ended the search badly
13972 # - Do not put a blank at the end of the file
13973 # - Do not put a blank line before a hanging side comment
13974 my $rLL = $self->[_rLL_];
13975 my $level = $rLL->[$K_first]->[_LEVEL_];
13976 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
13978 if ( $level == $level_beg
13981 && $iend < @{$rlines}
13982 && $CODE_type ne 'HSC' )
13984 if ( $rOpts_kgb_after == INSERT ) {
13985 kgb_insert_blank_after($iend);
13987 elsif ( $rOpts_kgb_after == DELETE ) {
13988 $self->kgb_delete_if_blank( $iend + 1 );
13993 kgb_split_into_sub_groups();
13996 # reset for another group
13997 kgb_initialize_group_vars();
14000 } ## end sub kgb_end_group
14002 sub kgb_find_container_end {
14004 # If the keyword line is continued onto subsequent lines, find the
14005 # closing token '$K_closing' so that we can easily skip past the
14006 # contents of the container.
14008 # We only set this value if we find a simple list, meaning
14009 # -contents only one level deep
14014 # First check: skip if next line is not one deeper
14015 my $Knext_nonblank = $self->K_next_nonblank($K_last);
14016 return if ( !defined($Knext_nonblank) );
14017 my $rLL = $self->[_rLL_];
14018 my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
14019 return if ( $level_next != $level_beg + 1 );
14021 # Find the parent container of the first token on the next line
14022 my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
14023 return unless ( defined($parent_seqno) );
14025 # Must not be a weld (can be unstable)
14027 if ( $total_weld_count
14028 && $self->is_welded_at_seqno($parent_seqno) );
14030 # Opening container must exist and be on this line
14031 my $Ko = $self->[_K_opening_container_]->{$parent_seqno};
14032 return unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
14034 # Verify that the closing container exists and is on a later line
14035 my $Kc = $self->[_K_closing_container_]->{$parent_seqno};
14036 return unless ( defined($Kc) && $Kc > $K_last );
14042 } ## end sub kgb_find_container_end
14044 sub kgb_add_to_group {
14045 my ( $self, $i, $token, $level ) = @_;
14047 # End the previous group if we have reached the maximum
14049 if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) {
14050 $self->kgb_end_group();
14053 if ( @group == 0 ) {
14055 $level_beg = $level;
14063 if ( !@group || $token ne $group[-1]->[1] ) {
14064 push @subgroup, scalar(@group);
14066 push @group, [ $i, $token, $count ];
14068 # remember if this line ends in an open container
14069 $self->kgb_find_container_end();
14072 } ## end sub kgb_add_to_group
14074 #---------------------
14075 # -kgb main subroutine
14076 #---------------------
14078 sub keyword_group_scan {
14081 # Called once per file to process --keyword-group-blanks-* parameters.
14084 # Manipulate blank lines around keyword groups (kgb* flags)
14085 # Scan all lines looking for runs of consecutive lines beginning with
14086 # selected keywords. Example keywords are 'my', 'our', 'local', ... but
14087 # they may be anything. We will set flags requesting that blanks be
14088 # inserted around and within them according to input parameters. Note
14089 # that we are scanning the lines as they came in in the input stream, so
14090 # they are not necessarily well formatted.
14093 # The output of this sub is a return hash ref whose keys are the indexes
14094 # of lines after which we desire a blank line. For line index $i:
14095 # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
14096 # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
14098 # Nothing to do if no blanks can be output. This test added to fix
14100 if ( !$rOpts_maximum_consecutive_blank_lines ) {
14101 return $rhash_of_desires;
14107 my $quit = kgb_initialize();
14108 if ($quit) { return $rhash_of_desires }
14110 my $rLL = $self->[_rLL_];
14111 my $rlines = $self->[_rlines_];
14113 $self->kgb_end_group();
14115 my $Opt_repeat_count =
14116 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
14118 #----------------------------------
14119 # loop over all lines of the source
14120 #----------------------------------
14121 foreach my $line_of_tokens ( @{$rlines} ) {
14125 if ( $Opt_repeat_count > 0
14126 && $number_of_groups_seen >= $Opt_repeat_count );
14128 kgb_initialize_line_vars();
14130 $line_type = $line_of_tokens->{_line_type};
14132 # always end a group at non-CODE
14133 if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next }
14135 $CODE_type = $line_of_tokens->{_code_type};
14137 # end any group at a format skipping line
14138 if ( $CODE_type && $CODE_type eq 'FS' ) {
14139 $self->kgb_end_group();
14143 # continue in a verbatim (VB) type; it may be quoted text
14144 if ( $CODE_type eq 'VB' ) {
14145 if ( $ibeg >= 0 ) { $iend = $i; }
14149 # and continue in blank (BL) types
14150 if ( $CODE_type eq 'BL' ) {
14151 if ( $ibeg >= 0 ) {
14153 push @{iblanks}, $i;
14155 # propagate current subgroup token
14156 my $tok = $group[-1]->[1];
14157 push @group, [ $i, $tok, $count ];
14162 # examine the first token of this line
14163 my $rK_range = $line_of_tokens->{_rK_range};
14164 ( $K_first, $K_last ) = @{$rK_range};
14165 if ( !defined($K_first) ) {
14167 # Somewhat unexpected blank line..
14168 # $rK_range is normally defined for line type CODE, but this can
14169 # happen for example if the input line was a single semicolon
14170 # which is being deleted. In that case there was code in the
14171 # input file but it is not being retained. So we can silently
14173 return $rhash_of_desires;
14176 my $level = $rLL->[$K_first]->[_LEVEL_];
14177 my $type = $rLL->[$K_first]->[_TYPE_];
14178 my $token = $rLL->[$K_first]->[_TOKEN_];
14179 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
14181 # End a group 'badly' at an unexpected level. This will prevent
14182 # blank lines being incorrectly placed after the end of the group.
14183 # We are looking for any deviation from two acceptable patterns:
14184 # PATTERN 1: a simple list; secondary lines are at level+1
14185 # PATTERN 2: a long statement; all secondary lines same level
14186 # This was added as a fix for case b1177, in which a complex
14187 # structure got incorrectly inserted blank lines.
14188 if ( $ibeg >= 0 ) {
14190 # Check for deviation from PATTERN 1, simple list:
14191 if ( defined($K_closing) && $K_first < $K_closing ) {
14192 $self->kgb_end_group(1) if ( $level != $level_beg + 1 );
14195 # Check for deviation from PATTERN 2, single statement:
14196 elsif ( $level != $level_beg ) { $self->kgb_end_group(1) }
14199 # Do not look for keywords in lists ( keyword 'my' can occur in
14200 # lists, see case b760); fixed for c048.
14201 if ( $self->is_list_by_K($K_first) ) {
14202 if ( $ibeg >= 0 ) { $iend = $i }
14206 # see if this is a code type we seek (i.e. comment)
14208 && $keyword_group_list_comment_pattern
14209 && $CODE_type =~ /$keyword_group_list_comment_pattern/ )
14212 my $tok = $CODE_type;
14214 # Continuing a group
14215 if ( $ibeg >= 0 && $level == $level_beg ) {
14216 $self->kgb_add_to_group( $i, $tok, $level );
14222 # first end old group if any; we might be starting new
14223 # keywords at different level
14224 if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
14225 $self->kgb_add_to_group( $i, $tok, $level );
14230 # See if it is a keyword we seek, but never start a group in a
14231 # continuation line; the code may be badly formatted.
14232 if ( $ci_level == 0
14234 && $token =~ /$keyword_group_list_pattern/ )
14237 # Continuing a keyword group
14238 if ( $ibeg >= 0 && $level == $level_beg ) {
14239 $self->kgb_add_to_group( $i, $token, $level );
14242 # Start new keyword group
14245 # first end old group if any; we might be starting new
14246 # keywords at different level
14247 if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
14248 $self->kgb_add_to_group( $i, $token, $level );
14253 # This is not one of our keywords, but we are in a keyword group
14254 # so see if we should continue or quit
14255 elsif ( $ibeg >= 0 ) {
14257 # - bail out on a large level change; we may have walked into a
14258 # data structure or anonymous sub code.
14259 if ( $level > $level_beg + 1 || $level < $level_beg ) {
14260 $self->kgb_end_group(1);
14264 # - keep going on a continuation line of the same level, since
14265 # it is probably a continuation of our previous keyword,
14266 # - and keep going past hanging side comments because we never
14267 # want to interrupt them.
14268 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
14269 || $CODE_type eq 'HSC' )
14275 # - continue if if we are within in a container which started
14276 # with the line of the previous keyword.
14277 if ( defined($K_closing) && $K_first <= $K_closing ) {
14279 # continue if entire line is within container
14280 if ( $K_last <= $K_closing ) { $iend = $i; next }
14282 # continue at ); or }; or ];
14283 my $KK = $K_closing + 1;
14284 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
14285 if ( $KK < $K_last ) {
14286 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
14287 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' )
14289 $self->kgb_end_group(1);
14297 $self->kgb_end_group(1);
14301 # - end the group if none of the above
14302 $self->kgb_end_group();
14306 # not in a keyword group; continue
14308 } ## end of loop over all lines
14310 $self->kgb_end_group();
14311 return $rhash_of_desires;
14313 } ## end sub keyword_group_scan
14314 } ## end closure keyword_group_scan
14316 #######################################
14317 # CODE SECTION 7: Process lines of code
14318 #######################################
14320 { ## begin closure process_line_of_CODE
14322 # The routines in this closure receive lines of code and combine them into
14323 # 'batches' and send them along. A 'batch' is the unit of code which can be
14324 # processed further as a unit. It has the property that it is the largest
14325 # amount of code into which which perltidy is free to place one or more
14326 # line breaks within it without violating any constraints.
14328 # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
14330 # flags needed by the store routine
14331 my $line_of_tokens;
14332 my $no_internal_newlines;
14335 # range of K of tokens for the current line
14336 my ( $K_first, $K_last );
14338 my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
14339 $rblock_type_of_seqno, $ri_starting_one_line_block );
14341 # past stored nonblank tokens and flags
14343 $K_last_nonblank_code, $looking_for_else,
14344 $is_static_block_comment, $last_CODE_type,
14345 $last_line_had_side_comment, $next_parent_seqno,
14349 # Called once at the start of a new file
14350 sub initialize_process_line_of_CODE {
14351 $K_last_nonblank_code = undef;
14352 $looking_for_else = 0;
14353 $is_static_block_comment = 0;
14354 $last_line_had_side_comment = 0;
14355 $next_parent_seqno = SEQ_ROOT;
14356 $next_slevel = undef;
14358 } ## end sub initialize_process_line_of_CODE
14360 # Batch variables: these describe the current batch of code being formed
14361 # and sent down the pipeline. They are initialized in the next
14364 $rbrace_follower, $index_start_one_line_block,
14365 $starting_in_quote, $ending_in_quote,
14368 # Called before the start of each new batch
14369 sub initialize_batch_variables {
14371 # Initialize array values for a new batch. Any changes here must be
14372 # carefully coordinated with sub store_token_to_go.
14374 $max_index_to_go = UNDEFINED_INDEX;
14375 $summed_lengths_to_go[0] = 0;
14376 $nesting_depth_to_go[0] = 0;
14377 $ri_starting_one_line_block = [];
14379 # Redefine some sparse arrays.
14380 # It is more efficient to redefine these sparse arrays and rely on
14381 # undef's instead of initializing to 0's. Testing showed that using
14382 # @array=() is more efficient than $#array=-1
14384 @old_breakpoint_to_go = ();
14385 @forced_breakpoint_to_go = ();
14386 @block_type_to_go = ();
14387 @mate_index_to_go = ();
14388 @type_sequence_to_go = ();
14390 # NOTE: @nobreak_to_go is sparse and could be treated this way, but
14391 # testing showed that there would be very little efficiency gain
14392 # because an 'if' test must be added in store_token_to_go.
14394 # The initialization code for the remaining batch arrays is as follows
14395 # and can be activated for testing. But profiling shows that it is
14396 # time-consuming to re-initialize the batch arrays and is not necessary
14397 # because the maximum valid token, $max_index_to_go, is carefully
14398 # controlled. This means however that it is not possible to do any
14399 # type of filter or map operation directly on these arrays. And it is
14400 # not possible to use negative indexes. As a precaution against program
14401 # changes which might do this, sub pad_array_to_go adds some undefs at
14402 # the end of the current batch of data.
14405 ## @nobreak_to_go = ();
14406 ## @token_lengths_to_go = ();
14407 ## @levels_to_go = ();
14408 ## @ci_levels_to_go = ();
14409 ## @tokens_to_go = ();
14411 ## @types_to_go = ();
14412 ## @leading_spaces_to_go = ();
14413 ## @reduced_spaces_to_go = ();
14414 ## @inext_to_go = ();
14415 ## @parent_seqno_to_go = ();
14418 $rbrace_follower = undef;
14419 $ending_in_quote = 0;
14421 $index_start_one_line_block = undef;
14423 # initialize forced breakpoint vars associated with each output batch
14424 $forced_breakpoint_count = 0;
14425 $index_max_forced_break = UNDEFINED_INDEX;
14426 $forced_breakpoint_undo_count = 0;
14429 } ## end sub initialize_batch_variables
14431 sub leading_spaces_to_go {
14433 # return the number of indentation spaces for a token in the output
14437 return 0 if ( $ii < 0 );
14438 my $indentation = $leading_spaces_to_go[$ii];
14439 return ref($indentation) ? $indentation->get_spaces() : $indentation;
14440 } ## end sub leading_spaces_to_go
14442 sub create_one_line_block {
14444 # set index starting next one-line block
14445 # call with no args to delete the current one-line block
14446 ($index_start_one_line_block) = @_;
14448 } ## end sub create_one_line_block
14450 # Routine to place the current token into the output stream.
14451 # Called once per output token.
14453 use constant DEBUG_STORE => 0;
14455 sub store_token_to_go {
14457 my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
14459 #-------------------------------------------------------
14460 # Token storage utility for sub process_line_of_CODE.
14461 # Add one token to the next batch of '_to_go' variables.
14462 #-------------------------------------------------------
14464 # Input parameters:
14465 # $Ktoken_vars = the index K in the global token array
14466 # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
14467 # unless they are temporarily being overridden
14469 #------------------------------------------------------------------
14470 # NOTE: called once per token so coding efficiency is critical here.
14471 # All changes need to be benchmarked with Devel::NYTProf.
14472 #------------------------------------------------------------------
14483 ) = @{$rtoken_vars}[
14494 # Check for emergency flush...
14495 # The K indexes in the batch must always be a continuous sequence of
14496 # the global token array. The batch process programming assumes this.
14497 # If storing this token would cause this relation to fail we must dump
14498 # the current batch before storing the new token. It is extremely rare
14499 # for this to happen. One known example is the following two-line
14500 # snippet when run with parameters
14501 # --noadd-newlines --space-terminal-semicolon:
14502 # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
14504 if ( $max_index_to_go >= 0 ) {
14505 if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
14506 $self->flush_batch_of_CODE();
14509 # Do not output consecutive blank tokens ... this should not
14510 # happen, but it is worth checking. Later code can then make the
14511 # simplifying assumption that blank tokens are not consecutive.
14512 elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
14516 # if this happens, it is may be that consecutive blanks
14517 # were inserted into the token stream in 'respace_tokens'
14518 my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
14519 Fault("consecutive blanks near line $lno; please fix");
14525 # Do not start a batch with a blank token.
14526 # Fixes cases b149 b888 b984 b985 b986 b987
14528 if ( $type eq 'b' ) { return }
14531 # Update counter and do initializations if first token of new batch
14532 if ( !++$max_index_to_go ) {
14534 # Reset flag '$starting_in_quote' for a new batch. It must be set
14535 # to the value of '$in_continued_quote', but here for efficiency we
14536 # set it to zero, which is its normal value. Then in coding below
14537 # we will change it if we find we are actually in a continued quote.
14538 $starting_in_quote = 0;
14540 # Update the next parent sequence number for each new batch.
14542 #----------------------------------------
14543 # Begin coding from sub parent_seqno_by_K
14544 #----------------------------------------
14546 # The following is equivalent to this call but much faster:
14547 # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
14549 $next_parent_seqno = SEQ_ROOT;
14551 $next_parent_seqno = $rparent_of_seqno->{$seqno};
14554 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
14555 if ( defined($Kt) ) {
14556 my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
14557 my $type_t = $rLL->[$Kt]->[_TYPE_];
14559 # if next container token is closing, it is the parent seqno
14560 if ( $is_closing_type{$type_t} ) {
14561 $next_parent_seqno = $type_sequence_t;
14564 # otherwise we want its parent container
14566 $next_parent_seqno =
14567 $rparent_of_seqno->{$type_sequence_t};
14571 $next_parent_seqno = SEQ_ROOT
14572 unless ( defined($next_parent_seqno) );
14574 #--------------------------------------
14575 # End coding from sub parent_seqno_by_K
14576 #--------------------------------------
14578 $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
14581 # Clip levels to zero if there are level errors in the file.
14582 # We had to wait until now for reasons explained in sub 'write_line'.
14583 if ( $level < 0 ) { $level = 0 }
14585 # Safety check that length is defined. This is slow and should not be
14586 # needed now, so just do it in DEVEL_MODE to check programming changes.
14587 # Formerly needed for --indent-only, in which the entire set of tokens
14588 # is normally turned into type 'q'. Lengths are now defined in sub
14589 # 'respace_tokens' so this check is no longer needed.
14590 if ( DEVEL_MODE && !defined($length) ) {
14591 my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
14592 $length = length($token);
14594 undefined length near line $lno; num chars=$length, token='$token'
14598 #----------------------------
14599 # add this token to the batch
14600 #----------------------------
14601 $K_to_go[$max_index_to_go] = $Ktoken_vars;
14602 $types_to_go[$max_index_to_go] = $type;
14603 $tokens_to_go[$max_index_to_go] = $token;
14604 $ci_levels_to_go[$max_index_to_go] = $ci_level;
14605 $levels_to_go[$max_index_to_go] = $level;
14606 $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
14607 $token_lengths_to_go[$max_index_to_go] = $length;
14609 # Skip point initialization for these sparse arrays - undef's okay;
14610 # See also related code in sub initialize_batch_variables.
14611 ## $old_breakpoint_to_go[$max_index_to_go] = 0;
14612 ## $forced_breakpoint_to_go[$max_index_to_go] = 0;
14613 ## $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
14614 ## $type_sequence_to_go[$max_index_to_go] = $seqno;
14616 # NOTE1: nobreak_to_go can be treated as a sparse array, but testing
14617 # showed that there is almost no efficiency gain because an if test
14618 # would need to be added.
14620 # NOTE2: Eventually '$type_sequence_to_go' can be also handled as a
14621 # sparse array with undef's, but this will require extensive testing
14622 # because of its heavy use.
14624 # We keep a running sum of token lengths from the start of this batch:
14625 # summed_lengths_to_go[$i] = total length to just before token $i
14626 # summed_lengths_to_go[$i+1] = total length to just after token $i
14627 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
14628 $summed_lengths_to_go[$max_index_to_go] + $length;
14630 # Initialize some sequence-dependent variables to their normal values
14631 $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
14632 $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
14634 # Then fix them at container tokens:
14637 $type_sequence_to_go[$max_index_to_go] = $seqno;
14639 $block_type_to_go[$max_index_to_go] =
14640 $rblock_type_of_seqno->{$seqno};
14642 if ( $is_opening_token{$token} ) {
14644 my $slevel = $rdepth_of_opening_seqno->[$seqno];
14645 $nesting_depth_to_go[$max_index_to_go] = $slevel;
14646 $next_slevel = $slevel + 1;
14648 $next_parent_seqno = $seqno;
14651 elsif ( $is_closing_token{$token} ) {
14653 $next_slevel = $rdepth_of_opening_seqno->[$seqno];
14654 my $slevel = $next_slevel + 1;
14655 $nesting_depth_to_go[$max_index_to_go] = $slevel;
14657 my $parent_seqno = $rparent_of_seqno->{$seqno};
14658 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
14659 $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
14660 $next_parent_seqno = $parent_seqno;
14664 # ternary token: nothing to do
14668 # Define the indentation that this token will have in two cases:
14669 # Without CI = reduced_spaces_to_go
14670 # With CI = leading_spaces_to_go
14671 if ( ( $Ktoken_vars == $K_first )
14672 && $line_of_tokens->{_starting_in_quote} )
14674 # in a continued quote - correct value set above if first token
14675 if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 }
14677 $leading_spaces_to_go[$max_index_to_go] = 0;
14678 $reduced_spaces_to_go[$max_index_to_go] = 0;
14681 $leading_spaces_to_go[$max_index_to_go] =
14682 $reduced_spaces_to_go[$max_index_to_go] =
14683 $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
14685 $leading_spaces_to_go[$max_index_to_go] +=
14686 $rOpts_continuation_indentation * $ci_level
14690 DEBUG_STORE && do {
14691 my ( $a, $b, $c ) = caller();
14693 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
14696 } ## end sub store_token_to_go
14698 sub flush_batch_of_CODE {
14700 # Finish and process the current batch.
14701 # This must be the only call to grind_batch_of_CODE()
14704 # If a batch has been started ...
14705 if ( $max_index_to_go >= 0 ) {
14707 # Create an array to hold variables for this batch
14708 my $this_batch = [];
14710 $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
14711 $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
14713 if ( $CODE_type || $last_CODE_type ) {
14714 $this_batch->[_batch_CODE_type_] =
14715 $K_to_go[$max_index_to_go] >= $K_first
14720 $last_line_had_side_comment =
14721 ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
14723 # The flag $is_static_block_comment applies to the line which just
14724 # arrived. So it only applies if we are outputting that line.
14725 if ( $is_static_block_comment && !$last_line_had_side_comment ) {
14726 $this_batch->[_is_static_block_comment_] =
14727 $K_to_go[0] == $K_first;
14730 $this_batch->[_ri_starting_one_line_block_] =
14731 $ri_starting_one_line_block;
14733 $self->[_this_batch_] = $this_batch;
14735 #-------------------
14736 # process this batch
14737 #-------------------
14738 $self->grind_batch_of_CODE();
14740 # Done .. this batch is history
14741 $self->[_this_batch_] = undef;
14743 initialize_batch_variables();
14747 } ## end sub flush_batch_of_CODE
14751 # End the current batch, EXCEPT for a few special cases
14754 if ( $max_index_to_go < 0 ) {
14756 # nothing to do .. this is harmless but wastes time.
14758 Fault("sub end_batch called with nothing to do; please fix\n");
14763 # Exceptions when a line does not end with a comment... (fixes c058)
14764 if ( $types_to_go[$max_index_to_go] ne '#' ) {
14766 # Exception 1: Do not end line in a weld
14768 if ( $total_weld_count
14769 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
14771 # Exception 2: just set a tentative breakpoint if we might be in a
14773 if ( defined($index_start_one_line_block) ) {
14774 $self->set_forced_breakpoint($max_index_to_go);
14779 $self->flush_batch_of_CODE();
14781 } ## end sub end_batch
14783 sub flush_vertical_aligner {
14785 my $vao = $self->[_vertical_aligner_object_];
14788 } ## end sub flush_vertical_aligner
14790 # flush is called to output any tokens in the pipeline, so that
14791 # an alternate source of lines can be written in the correct order
14793 my ( $self, $CODE_type_flush ) = @_;
14795 # end the current batch with 1 exception
14797 $index_start_one_line_block = undef;
14799 # Exception: if we are flushing within the code stream only to insert
14800 # blank line(s), then we can keep the batch intact at a weld. This
14801 # improves formatting of -ce. See test 'ce1.ce'
14802 if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
14803 $self->end_batch() if ( $max_index_to_go >= 0 );
14806 # otherwise, we have to shut things down completely.
14807 else { $self->flush_batch_of_CODE() }
14809 $self->flush_vertical_aligner();
14813 my %is_assignment_or_fat_comma;
14816 %is_assignment_or_fat_comma = %is_assignment;
14817 $is_assignment_or_fat_comma{'=>'} = 1;
14820 sub process_line_of_CODE {
14822 my ( $self, $my_line_of_tokens ) = @_;
14824 #----------------------------------------------------------------
14825 # This routine is called once per INPUT line to format all of the
14826 # tokens on that line.
14827 #----------------------------------------------------------------
14829 # It outputs full-line comments and blank lines immediately.
14831 # For lines of code:
14832 # - Tokens are copied one-by-one from the global token
14833 # array $rLL to a set of '_to_go' arrays which collect batches of
14834 # tokens. This is done with calls to 'store_token_to_go'.
14835 # - A batch is closed and processed upon reaching a well defined
14836 # structural break point (i.e. code block boundary) or forced
14837 # breakpoint (i.e. side comment or special user controls).
14838 # - Subsequent stages of formatting make additional line breaks
14839 # appropriate for lists and logical structures, and as necessary to
14840 # keep line lengths below the requested maximum line length.
14842 #-----------------------------------
14843 # begin initialize closure variables
14844 #-----------------------------------
14845 $line_of_tokens = $my_line_of_tokens;
14846 my $rK_range = $line_of_tokens->{_rK_range};
14847 if ( !defined( $rK_range->[0] ) ) {
14849 # Empty line: This can happen if tokens are deleted, for example
14850 # with the -mangle parameter
14854 ( $K_first, $K_last ) = @{$rK_range};
14855 $last_CODE_type = $CODE_type;
14856 $CODE_type = $line_of_tokens->{_code_type};
14858 $rLL = $self->[_rLL_];
14859 $radjusted_levels = $self->[_radjusted_levels_];
14860 $rparent_of_seqno = $self->[_rparent_of_seqno_];
14861 $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
14862 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
14864 #---------------------------------
14865 # end initialize closure variables
14866 #---------------------------------
14868 # This flag will become nobreak_to_go and should be set to 2 to prevent
14869 # a line break AFTER the current token.
14870 $no_internal_newlines = 0;
14871 if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
14872 $no_internal_newlines = 2;
14875 my $input_line = $line_of_tokens->{_line_text};
14877 my ( $is_block_comment, $has_side_comment );
14878 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
14879 if ( $K_last == $K_first ) { $is_block_comment = 1 }
14880 else { $has_side_comment = 1 }
14883 my $is_static_block_comment_without_leading_space =
14884 $CODE_type eq 'SBCX';
14885 $is_static_block_comment =
14886 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
14888 # check for a $VERSION statement
14889 if ( $CODE_type eq 'VER' ) {
14890 $self->[_saw_VERSION_in_this_file_] = 1;
14891 $no_internal_newlines = 2;
14894 # Add interline blank if any
14895 my $last_old_nonblank_type = "b";
14896 my $first_new_nonblank_token = EMPTY_STRING;
14897 my $K_first_true = $K_first;
14898 if ( $max_index_to_go >= 0 ) {
14899 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
14900 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
14901 if ( !$is_block_comment
14902 && $types_to_go[$max_index_to_go] ne 'b'
14904 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
14910 my $rtok_first = $rLL->[$K_first];
14912 my $in_quote = $line_of_tokens->{_ending_in_quote};
14913 $ending_in_quote = $in_quote;
14915 #------------------------------------
14916 # Handle a block (full-line) comment.
14917 #------------------------------------
14918 if ($is_block_comment) {
14920 if ( $rOpts->{'delete-block-comments'} ) {
14925 $index_start_one_line_block = undef;
14926 $self->end_batch() if ( $max_index_to_go >= 0 );
14928 # output a blank line before block comments
14930 # unless we follow a blank or comment line
14931 $self->[_last_line_leading_type_] ne '#'
14932 && $self->[_last_line_leading_type_] ne 'b'
14935 && $rOpts->{'blanks-before-comments'}
14937 # if this is NOT an empty comment, unless it follows a side
14938 # comment and could become a hanging side comment.
14940 $rtok_first->[_TOKEN_] ne '#'
14941 || ( $last_line_had_side_comment
14942 && $rLL->[$K_first]->[_LEVEL_] > 0 )
14945 # not after a short line ending in an opening token
14946 # because we already have space above this comment.
14947 # Note that the first comment in this if block, after
14948 # the 'if (', does not get a blank line because of this.
14949 && !$self->[_last_output_short_opening_token_]
14951 # never before static block comments
14952 && !$is_static_block_comment
14955 $self->flush(); # switching to new output stream
14956 my $file_writer_object = $self->[_file_writer_object_];
14957 $file_writer_object->write_blank_code_line();
14958 $self->[_last_line_leading_type_] = 'b';
14962 $rOpts->{'indent-block-comments'}
14963 && ( !$rOpts->{'indent-spaced-block-comments'}
14964 || $input_line =~ /^\s+/ )
14965 && !$is_static_block_comment_without_leading_space
14968 my $Ktoken_vars = $K_first;
14969 my $rtoken_vars = $rLL->[$Ktoken_vars];
14970 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
14971 $self->end_batch();
14975 # switching to new output stream
14978 # Note that last arg in call here is 'undef' for comments
14979 my $file_writer_object = $self->[_file_writer_object_];
14980 $file_writer_object->write_code_line(
14981 $rtok_first->[_TOKEN_] . "\n", undef );
14982 $self->[_last_line_leading_type_] = '#';
14987 #--------------------------------------------
14988 # Compare input/output indentation in logfile
14989 #--------------------------------------------
14990 if ( $self->[_save_logfile_] ) {
14992 # Compare input/output indentation except for:
14993 # - hanging side comments
14994 # - continuation lines (have unknown leading blank space)
14995 # - and lines which are quotes (they may have been outdented)
14996 my $guessed_indentation_level =
14997 $line_of_tokens->{_guessed_indentation_level};
14999 unless ( $CODE_type eq 'HSC'
15000 || $rtok_first->[_CI_LEVEL_] > 0
15001 || $guessed_indentation_level == 0
15002 && $rtok_first->[_TYPE_] eq 'Q' )
15004 my $input_line_number = $line_of_tokens->{_line_number};
15005 $self->compare_indentation_levels( $K_first,
15006 $guessed_indentation_level, $input_line_number );
15010 #-----------------------------------------
15011 # Handle a line marked as indentation-only
15012 #-----------------------------------------
15014 if ( $CODE_type eq 'IO' ) {
15016 my $line = $input_line;
15018 # Fix for rt #125506 Unexpected string formating
15019 # in which leading space of a terminal quote was removed
15021 $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
15023 my $Ktoken_vars = $K_first;
15025 # We work with a copy of the token variables and change the
15026 # first token to be the entire line as a quote variable
15027 my $rtoken_vars = $rLL->[$Ktoken_vars];
15028 $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
15030 # Patch: length is not really important here but must be defined
15031 $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
15033 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15034 $self->end_batch();
15038 #---------------------------
15039 # Handle all other lines ...
15040 #---------------------------
15042 # If we just saw the end of an elsif block, write nag message
15043 # if we do not see another elseif or an else.
15044 if ($looking_for_else) {
15046 ## /^(elsif|else)$/
15047 if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) {
15048 write_logfile_entry("(No else block)\n");
15050 $looking_for_else = 0;
15053 # This is a good place to kill incomplete one-line blocks
15054 if ( $max_index_to_go >= 0 ) {
15056 # For -iob and -lp, mark essential old breakpoints.
15057 # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
15058 # See related code below.
15059 if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
15060 my $type_first = $rLL->[$K_first_true]->[_TYPE_];
15061 if ( $is_assignment_or_fat_comma{$type_first} ) {
15062 $old_breakpoint_to_go[$max_index_to_go] = 1;
15068 # this check needed -mangle (for example rt125012)
15070 ( !$index_start_one_line_block )
15071 && ( $last_old_nonblank_type eq ';' )
15072 && ( $first_new_nonblank_token ne '}' )
15075 # Patch for RT #98902. Honor request to break at old commas.
15076 || ( $rOpts_break_at_old_comma_breakpoints
15077 && $last_old_nonblank_type eq ',' )
15080 $forced_breakpoint_to_go[$max_index_to_go] = 1
15081 if ($rOpts_break_at_old_comma_breakpoints);
15082 $index_start_one_line_block = undef;
15083 $self->end_batch();
15086 # Keep any requested breaks before this line. Note that we have to
15087 # use the original K_first because it may have been reduced above
15088 # to add a blank. The value of the flag is as follows:
15089 # 1 => hard break, flush the batch
15090 # 2 => soft break, set breakpoint and continue building the batch
15091 # added check on max_index_to_go for c177
15092 if ( $max_index_to_go >= 0
15093 && $self->[_rbreak_before_Kfirst_]->{$K_first_true} )
15095 $index_start_one_line_block = undef;
15096 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
15097 $self->set_forced_breakpoint($max_index_to_go);
15100 $self->end_batch();
15105 #--------------------------------------
15106 # loop to process the tokens one-by-one
15107 #--------------------------------------
15108 $self->process_line_inner_loop($has_side_comment);
15110 # if there is anything left in the output buffer ...
15111 if ( $max_index_to_go >= 0 ) {
15113 my $type = $rLL->[$K_last]->[_TYPE_];
15114 my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
15116 # we have to flush ..
15119 # if there is a side comment...
15122 # if this line ends in a quote
15123 # NOTE: This is critically important for insuring that quoted
15124 # lines do not get processed by things like -sot and -sct
15127 # if this is a VERSION statement
15128 || $CODE_type eq 'VER'
15130 # to keep a label at the end of a line
15131 || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
15133 # if we have a hard break request
15134 || $break_flag && $break_flag != 2
15136 # if we are instructed to keep all old line breaks
15137 || !$rOpts->{'delete-old-newlines'}
15139 # if this is a line of the form 'use overload'. A break here in
15140 # the input file is a good break because it will allow the
15141 # operators which follow to be formatted well. Without this
15142 # break the formatting with -ci=4 -xci is poor, for example.
15146 # print length $_[2], "\n";
15147 # my ( $x, $y ) = _order(@_);
15148 # Number::Roman->new( int $x + $y );
15151 # my ( $x, $y ) = _order(@_);
15152 # Number::Roman->new( int $x - $y );
15154 || ( $max_index_to_go == 2
15155 && $types_to_go[0] eq 'k'
15156 && $tokens_to_go[0] eq 'use'
15157 && $tokens_to_go[$max_index_to_go] eq 'overload' )
15160 $index_start_one_line_block = undef;
15161 $self->end_batch();
15166 # Check for a soft break request
15167 if ( $break_flag && $break_flag == 2 ) {
15168 $self->set_forced_breakpoint($max_index_to_go);
15171 # mark old line breakpoints in current output stream
15173 !$rOpts_ignore_old_breakpoints
15175 # Mark essential old breakpoints if combination -iob -lp is
15176 # used. These two options do not work well together, but
15177 # we can avoid turning -iob off by ignoring -iob at certain
15178 # essential line breaks. See also related code above.
15179 # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
15180 || ( $rOpts_line_up_parentheses
15181 && $is_assignment_or_fat_comma{$type} )
15184 $old_breakpoint_to_go[$max_index_to_go] = 1;
15190 } ## end sub process_line_of_CODE
15192 sub process_line_inner_loop {
15194 my ( $self, $has_side_comment ) = @_;
15196 #--------------------------------------------------------------------
15197 # Loop to move all tokens from one input line to a newly forming batch
15198 #--------------------------------------------------------------------
15200 # Do not start a new batch with a blank space
15201 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
15205 foreach my $Ktoken_vars ( $K_first .. $K_last ) {
15207 my $rtoken_vars = $rLL->[$Ktoken_vars];
15212 if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
15213 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15217 #------------------
15218 # handle non-blanks
15219 #------------------
15220 my $type = $rtoken_vars->[_TYPE_];
15222 # If we are continuing after seeing a right curly brace, flush
15223 # buffer unless we see what we are looking for, as in
15225 if ($rbrace_follower) {
15226 my $token = $rtoken_vars->[_TOKEN_];
15227 unless ( $rbrace_follower->{$token} ) {
15228 $self->end_batch() if ( $max_index_to_go >= 0 );
15230 $rbrace_follower = undef;
15234 $block_type, $type_sequence,
15235 $is_opening_BLOCK, $is_closing_BLOCK,
15236 $nobreak_BEFORE_BLOCK
15239 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
15241 my $token = $rtoken_vars->[_TOKEN_];
15242 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
15243 $block_type = $rblock_type_of_seqno->{$type_sequence};
15247 && $block_type ne 't'
15248 && !$self->[_rshort_nested_]->{$type_sequence} )
15251 if ( $type eq '{' ) {
15252 $is_opening_BLOCK = 1;
15253 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
15255 elsif ( $type eq '}' ) {
15256 $is_closing_BLOCK = 1;
15257 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
15262 #---------------------
15263 # handle side comments
15264 #---------------------
15265 if ($has_side_comment) {
15267 # if at last token ...
15268 if ( $Ktoken_vars == $K_last ) {
15269 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15273 # if before last token ... do not allow breaks which would
15274 # promote a side comment to a block comment
15275 elsif ($Ktoken_vars == $K_last - 1
15276 || $Ktoken_vars == $K_last - 2
15277 && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
15279 $no_internal_newlines = 2;
15283 # Process non-blank and non-comment tokens ...
15288 if ( $type eq ';' ) {
15290 my $next_nonblank_token_type = 'b';
15291 my $next_nonblank_token = EMPTY_STRING;
15292 if ( $Ktoken_vars < $K_last ) {
15293 my $Knnb = $Ktoken_vars + 1;
15294 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
15295 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
15296 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
15299 if ( $rOpts_break_at_old_semicolon_breakpoints
15300 && ( $Ktoken_vars == $K_first )
15301 && $max_index_to_go >= 0
15302 && !defined($index_start_one_line_block) )
15304 $self->end_batch();
15307 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15311 $no_internal_newlines
15312 || ( $rOpts_keep_interior_semicolons
15313 && $Ktoken_vars < $K_last )
15314 || ( $next_nonblank_token eq '}' )
15321 elsif ($is_opening_BLOCK) {
15323 # Tentatively output this token. This is required before
15324 # calling starting_one_line_block. We may have to unstore
15325 # it, though, if we have to break before it.
15326 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15328 # Look ahead to see if we might form a one-line block..
15330 $self->starting_one_line_block( $Ktoken_vars,
15331 $K_last_nonblank_code, $K_last );
15332 $self->clear_breakpoint_undo_stack();
15334 # to simplify the logic below, set a flag to indicate if
15335 # this opening brace is far from the keyword which introduces it
15336 my $keyword_on_same_line = 1;
15338 $max_index_to_go >= 0
15339 && defined($K_last_nonblank_code)
15340 && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
15341 && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
15345 $keyword_on_same_line = 0;
15348 # Break before '{' if requested with -bl or -bli flag
15349 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
15351 # But do not break if this token is welded to the left
15352 if ( $total_weld_count
15353 && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
15358 # Break BEFORE an opening '{' ...
15364 # and we were unable to start looking for a block,
15365 && !defined($index_start_one_line_block)
15367 # or if it will not be on same line as its keyword, so that
15368 # it will be outdented (eval.t, overload.t), and the user
15369 # has not insisted on keeping it on the right
15370 || ( !$keyword_on_same_line
15371 && !$rOpts_opening_brace_always_on_right )
15375 # but only if allowed
15376 unless ($nobreak_BEFORE_BLOCK) {
15378 # since we already stored this token, we must unstore it
15379 $self->unstore_token_to_go();
15381 # then output the line
15382 $self->end_batch() if ( $max_index_to_go >= 0 );
15384 # and now store this token at the start of a new line
15385 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15389 # now output this line
15391 if ( $max_index_to_go >= 0 && !$no_internal_newlines );
15397 elsif ($is_closing_BLOCK) {
15399 my $next_nonblank_token_type = 'b';
15400 my $next_nonblank_token = EMPTY_STRING;
15402 if ( $Ktoken_vars < $K_last ) {
15403 $Knnb = $Ktoken_vars + 1;
15404 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
15405 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
15406 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
15409 # If there is a pending one-line block ..
15410 if ( defined($index_start_one_line_block) ) {
15412 # Fix for b1208: if a side comment follows this closing
15413 # brace then we must include its length in the length test
15414 # ... unless the -issl flag is set (fixes b1307-1309).
15415 # Assume a minimum of 1 blank space to the comment.
15416 my $added_length = 0;
15417 if ( $has_side_comment
15418 && !$rOpts_ignore_side_comment_lengths
15419 && $next_nonblank_token_type eq '#' )
15421 $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
15424 # we have to terminate it if..
15427 # it is too long (final length may be different from
15428 # initial estimate). note: must allow 1 space for this
15430 $self->excess_line_length( $index_start_one_line_block,
15431 $max_index_to_go ) + $added_length >= 0
15434 $index_start_one_line_block = undef;
15438 # put a break before this closing curly brace if appropriate
15440 if ( $max_index_to_go >= 0
15441 && !$nobreak_BEFORE_BLOCK
15442 && !defined($index_start_one_line_block) );
15444 # store the closing curly brace
15445 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15447 # ok, we just stored a closing curly brace. Often, but
15448 # not always, we want to end the line immediately.
15449 # So now we have to check for special cases.
15451 # if this '}' successfully ends a one-line block..
15452 my $one_line_block_type = EMPTY_STRING;
15454 if ( defined($index_start_one_line_block) ) {
15456 # Remember the type of token just before the
15457 # opening brace. It would be more general to use
15458 # a stack, but this will work for one-line blocks.
15459 $one_line_block_type =
15460 $types_to_go[$index_start_one_line_block];
15462 # we have to actually make it by removing tentative
15463 # breaks that were set within it
15464 $self->undo_forced_breakpoint_stack(0);
15466 # For -lp, extend the nobreak to include a trailing
15467 # terminal ','. This is because the -lp indentation was
15468 # not known when making one-line blocks, so we may be able
15469 # to move the line back to fit. Otherwise we may create a
15470 # needlessly stranded comma on the next line.
15471 my $iend_nobreak = $max_index_to_go - 1;
15472 if ( $rOpts_line_up_parentheses
15473 && $next_nonblank_token_type eq ','
15474 && $Knnb eq $K_last )
15476 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
15478 $self->[_ris_excluded_lp_container_]->{$p_seqno};
15479 $iend_nobreak = $max_index_to_go if ( !$is_excluded );
15482 $self->set_nobreaks( $index_start_one_line_block,
15485 # save starting block indexes so that sub correct_lp can
15486 # check and adjust -lp indentation (c098)
15487 push @{$ri_starting_one_line_block},
15488 $index_start_one_line_block;
15490 # then re-initialize for the next one-line block
15491 $index_start_one_line_block = undef;
15493 # then decide if we want to break after the '}' ..
15494 # We will keep going to allow certain brace followers as in:
15495 # do { $ifclosed = 1; last } unless $losing;
15497 # But make a line break if the curly ends a
15498 # significant block:
15501 $is_block_without_semicolon{$block_type}
15503 # Follow users break point for
15504 # one line block types U & G, such as a 'try' block
15505 || $one_line_block_type =~ /^[UG]$/
15506 && $Ktoken_vars == $K_last
15509 # if needless semicolon follows we handle it later
15510 && $next_nonblank_token ne ';'
15514 unless ($no_internal_newlines);
15518 # set string indicating what we need to look for brace follower
15520 if ( $is_if_unless_elsif_else{$block_type} ) {
15521 $rbrace_follower = undef;
15523 elsif ( $block_type eq 'do' ) {
15524 $rbrace_follower = \%is_do_follower;
15526 $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
15529 $rbrace_follower = { ')' => 1 };
15533 # added eval for borris.t
15534 elsif ($is_sort_map_grep_eval{$block_type}
15535 || $one_line_block_type eq 'G' )
15537 $rbrace_follower = undef;
15542 elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
15543 if ($one_line_block_type) {
15545 $rbrace_follower = \%is_anon_sub_1_brace_follower;
15547 # Exceptions to help keep -lp intact, see git #74 ...
15548 # Exception 1: followed by '}' on this line
15549 if ( $Ktoken_vars < $K_last
15550 && $next_nonblank_token eq '}' )
15552 $rbrace_follower = undef;
15556 # Exception 2: followed by '}' on next line if -lp set.
15557 # The -lp requirement allows the formatting to follow
15558 # old breaks when -lp is not used, minimizing changes.
15559 # Fixes issue c087.
15560 elsif ($Ktoken_vars == $K_last
15561 && $rOpts_line_up_parentheses )
15563 my $K_closing_container =
15564 $self->[_K_closing_container_];
15565 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
15566 my $Kc = $K_closing_container->{$p_seqno};
15568 $self->[_ris_excluded_lp_container_]->{$p_seqno};
15571 && $rLL->[$Kc]->[_TOKEN_] eq '}'
15573 && $Kc - $Ktoken_vars <= 2 );
15574 $rbrace_follower = undef if ($keep_going);
15578 $rbrace_follower = \%is_anon_sub_brace_follower;
15582 # None of the above: specify what can follow a closing
15583 # brace of a block which is not an
15584 # if/elsif/else/do/sort/map/grep/eval
15586 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
15588 $rbrace_follower = \%is_other_brace_follower;
15591 # See if an elsif block is followed by another elsif or else;
15593 if ( $block_type eq 'elsif' ) {
15595 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
15596 $looking_for_else = 1; # ok, check on next line
15599 ## /^(elsif|else)$/
15600 if ( !$is_elsif_else{$next_nonblank_token} ) {
15601 write_logfile_entry("No else block :(\n");
15606 # keep going after certain block types (map,sort,grep,eval)
15607 # added eval for borris.t
15611 $rbrace_follower = undef;
15615 # if no more tokens, postpone decision until re-entering
15616 elsif ( ( $next_nonblank_token_type eq 'b' )
15617 && $rOpts_add_newlines )
15619 unless ($rbrace_follower) {
15621 unless ( $no_internal_newlines
15622 || $max_index_to_go < 0 );
15625 elsif ($rbrace_follower) {
15627 if ( $rbrace_follower->{$next_nonblank_token} ) {
15629 # Fix for b1385: keep break after a comma following a
15630 # 'do' block. This could also be used for other block
15631 # types, but that would cause a significant change in
15632 # existing formatting without much benefit.
15633 if ( $next_nonblank_token eq ','
15634 && $Knnb eq $K_last
15635 && $block_type eq 'do'
15636 && $rOpts_add_newlines
15637 && $self->is_trailing_comma($Knnb) )
15639 $self->[_rbreak_after_Klast_]->{$K_last} = 1;
15644 unless ( $no_internal_newlines
15645 || $max_index_to_go < 0 );
15648 $rbrace_follower = undef;
15653 unless ( $no_internal_newlines
15654 || $max_index_to_go < 0 );
15657 } ## end treatment of closing block token
15659 #------------------------------
15660 # handle here_doc target string
15661 #------------------------------
15662 elsif ( $type eq 'h' ) {
15664 # no newlines after seeing here-target
15665 $no_internal_newlines = 2;
15666 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15669 #-----------------------------
15670 # handle all other token types
15671 #-----------------------------
15674 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15676 # break after a label if requested
15677 if ( $rOpts_break_after_labels
15679 && $rOpts_break_after_labels == 1 )
15682 unless ($no_internal_newlines);
15686 # remember previous nonblank, non-comment OUTPUT token
15687 $K_last_nonblank_code = $Ktoken_vars;
15689 } ## end of loop over all tokens in this line
15691 } ## end sub process_line_inner_loop
15693 } ## end closure process_line_of_CODE
15695 sub is_trailing_comma {
15696 my ( $self, $KK ) = @_;
15699 # $KK - index of a comma in token list
15701 # true if the comma at index $KK is a trailing comma
15704 my $rLL = $self->[_rLL_];
15705 my $type_KK = $rLL->[$KK]->[_TYPE_];
15706 if ( $type_KK ne ',' ) {
15708 && Fault("Bad call: expected type ',' but received '$type_KK'\n");
15711 my $Knnb = $self->K_next_nonblank($KK);
15712 if ( defined($Knnb) ) {
15713 my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_];
15714 my $type_Knnb = $rLL->[$Knnb]->[_TYPE_];
15715 if ( $type_sequence && $is_closing_type{$type_Knnb} ) {
15720 } ## end sub is_trailing_comma
15722 sub tight_paren_follows {
15724 my ( $self, $K_to_go_0, $K_ic ) = @_;
15726 # Input parameters:
15727 # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
15728 # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
15729 # Return parameter:
15730 # false if we want a break after the closing do brace
15731 # true if we do not want a break after the closing do brace
15733 # We are at the closing brace of a 'do' block. See if this brace is
15734 # followed by a closing paren, and if so, set a flag which indicates
15735 # that we do not want a line break between the '}' and ')'.
15737 # xxxxx ( ...... do { ... } ) {
15738 # ^-------looking at this brace, K_ic
15740 # Subscript notation:
15741 # _i = inner container (braces in this case)
15742 # _o = outer container (parens in this case)
15743 # _io = inner opening = '{'
15744 # _ic = inner closing = '}'
15745 # _oo = outer opening = '('
15746 # _oc = outer closing = ')'
15748 # |--K_oo |--K_oc = outer container
15749 # xxxxx ( ...... do { ...... } ) {
15750 # |--K_io |--K_ic = inner container
15752 # In general, the safe thing to do is return a 'false' value
15753 # if the statement appears to be complex. This will have
15754 # the downstream side-effect of opening up outer containers
15755 # to help make complex code readable. But for simpler
15756 # do blocks it can be preferable to keep the code compact
15757 # by returning a 'true' value.
15759 return unless defined($K_ic);
15760 my $rLL = $self->[_rLL_];
15762 # we should only be called at a closing block
15763 my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
15764 return unless ($seqno_i); # shouldn't happen;
15766 # This only applies if the next nonblank is a ')'
15767 my $K_oc = $self->K_next_nonblank($K_ic);
15768 return unless defined($K_oc);
15769 my $token_next = $rLL->[$K_oc]->[_TOKEN_];
15770 return unless ( $token_next eq ')' );
15772 my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
15773 my $K_io = $self->[_K_opening_container_]->{$seqno_i};
15774 my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
15775 return unless ( defined($K_io) && defined($K_oo) );
15777 # RULE 1: Do not break before a closing signature paren
15778 # (regardless of complexity). This is a fix for issue git#22.
15779 # Looking for something like:
15780 # sub xxx ( ... do { ... } ) {
15781 # ^----- next block_type
15782 my $K_test = $self->K_next_nonblank($K_oc);
15783 if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
15784 my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
15786 if ( $self->[_ris_asub_block_]->{$seqno_test}
15787 || $self->[_ris_sub_block_]->{$seqno_test} )
15794 # RULE 2: Break if the contents within braces appears to be 'complex'. We
15795 # base this decision on the number of tokens between braces.
15797 # xxxxx ( ... do { ... } ) {
15800 # Although very simple, it has the advantages of (1) being insensitive to
15801 # changes in lengths of identifier names, (2) easy to understand, implement
15802 # and test. A test case for this is 't/snippets/long_line.in'.
15804 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
15805 # if ( do { $2 !~ /&/ } ) { ... }
15807 # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
15808 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
15810 # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
15811 # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
15813 return if ( $K_ic - $K_io > 16 );
15815 # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
15816 # As with the previous rule, we decide based on the token count
15818 # xxxxx ( ... do { ... } ) {
15821 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
15822 # $K_io - $K_oo = 4 [Pass Rule 3]
15823 # if ( do { $2 !~ /&/ } ) { ... }
15825 # Example: $K_ic - $K_oo = 10 [Pass rule 2]
15826 # $K_io - $K_oo = 9 [Pass rule 3]
15827 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
15829 return if ( $K_io - $K_oo > 9 );
15831 # RULE 4: Break if we have already broken this batch of output tokens
15832 return if ( $K_oo < $K_to_go_0 );
15834 # RULE 5: Break if input is not on one line
15835 # For example, we will set the flag for the following expression
15836 # written in one line:
15838 # This has: $K_ic - $K_oo = 10 [Pass rule 2]
15839 # $K_io - $K_oo = 8 [Pass rule 3]
15840 # $self->debug( 'Error: ' . do { local $/; <$err> } );
15842 # but we break after the brace if it is on multiple lines on input, since
15843 # the user may prefer it on multiple lines:
15847 # 'Error: ' . do { local $/; <$err> }
15850 if ( !$rOpts_ignore_old_breakpoints ) {
15851 my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
15852 my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
15853 return if ( $iline_oo != $iline_oc );
15856 # OK to keep the paren tight
15858 } ## end sub tight_paren_follows
15860 my %is_brace_semicolon_colon;
15863 my @q = qw( { } ; : );
15864 @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
15867 sub starting_one_line_block {
15869 # After seeing an opening curly brace, look for the closing brace and see
15870 # if the entire block will fit on a line. This routine is not always right
15871 # so a check is made later (at the closing brace) to make sure we really
15872 # have a one-line block. We have to do this preliminary check, though,
15873 # because otherwise we would always break at a semicolon within a one-line
15874 # block if the block contains multiple statements.
15877 # $Kj = index of opening brace
15878 # $K_last_nonblank = index of previous nonblank code token
15879 # $K_last = index of last token of input line
15881 # Calls 'create_one_line_block' if one-line block might be formed.
15883 # Also returns a flag '$too_long':
15884 # true = distance from opening keyword to OPENING brace exceeds
15885 # the maximum line length.
15886 # false (simple return) => not too long
15887 # Note that this flag is for distance from the statement start to the
15888 # OPENING brace, not the closing brace.
15890 my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
15892 my $rbreak_container = $self->[_rbreak_container_];
15893 my $rshort_nested = $self->[_rshort_nested_];
15894 my $rLL = $self->[_rLL_];
15895 my $K_opening_container = $self->[_K_opening_container_];
15896 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
15898 # kill any current block - we can only go 1 deep
15899 create_one_line_block();
15903 # This routine should not have been called if there are no tokens in the
15904 # 'to_go' arrays of previously stored tokens. A previous call to
15905 # 'store_token_to_go' should have stored an opening brace. An error here
15906 # indicates that a programming change may have caused a flush operation to
15907 # clean out the previously stored tokens.
15908 if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
15909 Fault("program bug: store_token_to_go called incorrectly\n")
15914 # Return if block should be broken
15915 my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
15916 if ( $rbreak_container->{$type_sequence_j} ) {
15920 my $ris_bli_container = $self->[_ris_bli_container_];
15921 my $is_bli = $ris_bli_container->{$type_sequence_j};
15923 my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
15924 $block_type = EMPTY_STRING unless ( defined($block_type) );
15926 my $previous_nonblank_token = EMPTY_STRING;
15927 my $i_last_nonblank = -1;
15928 if ( defined($K_last_nonblank) ) {
15929 $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
15930 if ( $i_last_nonblank >= 0 ) {
15931 $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
15935 #---------------------------------------------------------------------
15936 # find the starting keyword for this block (such as 'if', 'else', ...)
15937 #---------------------------------------------------------------------
15939 $max_index_to_go == 0
15940 ##|| $block_type =~ /^[\{\}\;\:]$/
15941 || $is_brace_semicolon_colon{$block_type}
15942 || substr( $block_type, 0, 7 ) eq 'package'
15945 $i_start = $max_index_to_go;
15948 # the previous nonblank token should start these block types
15950 $i_last_nonblank >= 0
15951 && ( $previous_nonblank_token eq $block_type
15952 || $self->[_ris_asub_block_]->{$type_sequence_j}
15953 || $self->[_ris_sub_block_]->{$type_sequence_j}
15954 || substr( $block_type, -2, 2 ) eq '()' )
15957 $i_start = $i_last_nonblank;
15959 # For signatures and extended syntax ...
15960 # If this brace follows a parenthesized list, we should look back to
15961 # find the keyword before the opening paren because otherwise we might
15962 # form a one line block which stays intact, and cause the parenthesized
15963 # expression to break open. That looks bad.
15964 if ( $tokens_to_go[$i_start] eq ')' ) {
15966 # Find the opening paren
15967 my $K_start = $K_to_go[$i_start];
15968 return unless defined($K_start);
15969 my $seqno = $type_sequence_to_go[$i_start];
15970 return unless ($seqno);
15971 my $K_opening = $K_opening_container->{$seqno};
15972 return unless defined($K_opening);
15973 my $i_opening = $i_start + ( $K_opening - $K_start );
15975 # give up if not on this line
15976 return unless ( $i_opening >= 0 );
15977 $i_start = $i_opening;
15979 # go back one token before the opening paren
15980 if ( $i_start > 0 ) { $i_start-- }
15981 if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
15982 my $lev = $levels_to_go[$i_start];
15983 if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return }
15987 elsif ( $previous_nonblank_token eq ')' ) {
15989 # For something like "if (xxx) {", the keyword "if" will be
15990 # just after the most recent break. This will be 0 unless
15991 # we have just killed a one-line block and are starting another.
15993 # Note: cannot use inext_index_to_go[] here because that array
15994 # is still being constructed.
15995 $i_start = $index_max_forced_break + 1;
15996 if ( $types_to_go[$i_start] eq 'b' ) {
16000 # Patch to avoid breaking short blocks defined with extended_syntax:
16001 # Strip off any trailing () which was added in the parser to mark
16002 # the opening keyword. For example, in the following
16003 # create( TypeFoo $e) {$bubba}
16004 # the blocktype would be marked as create()
16005 my $stripped_block_type = $block_type;
16006 if ( substr( $block_type, -2, 2 ) eq '()' ) {
16007 $stripped_block_type = substr( $block_type, 0, -2 );
16009 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
16014 # patch for SWITCH/CASE to retain one-line case/when blocks
16015 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
16017 # Note: cannot use inext_index_to_go[] here because that array
16018 # is still being constructed.
16019 $i_start = $index_max_forced_break + 1;
16020 if ( $types_to_go[$i_start] eq 'b' ) {
16023 unless ( $tokens_to_go[$i_start] eq $block_type ) {
16029 #-------------------------------------------
16030 # Couldn't find start - return too_long flag
16031 #-------------------------------------------
16035 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
16037 my $maximum_line_length =
16038 $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
16040 # see if distance to the opening container is too great to even start
16041 if ( $pos > $maximum_line_length ) {
16043 #------------------------------
16044 # too long to the opening token
16045 #------------------------------
16049 #-----------------------------------------------------------------------
16050 # OK so far: the statement is not to long just to the OPENING token. Now
16051 # see if everything to the closing token will fit on one line
16052 #-----------------------------------------------------------------------
16054 # This is part of an update to fix cases b562 .. b983
16055 my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
16056 return unless ( defined($K_closing) );
16057 my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
16058 $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
16060 my $excess = $pos + 1 + $container_length - $maximum_line_length;
16062 # Add a small tolerance for welded tokens (case b901)
16063 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
16067 if ( $excess > 0 ) {
16069 # line is too long... there is no chance of forming a one line block
16070 # if the excess is more than 1 char
16071 return if ( $excess > 1 );
16073 # ... and give up if it is not a one-line block on input.
16074 # note: for a one-line block on input, it may be possible to keep
16075 # it as a one-line block (by removing a needless semicolon ).
16076 my $K_start = $K_to_go[$i_start];
16078 $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
16079 return if ($ldiff);
16082 #------------------------------------------------------------------
16083 # Loop to check contents and length of the potential one-line block
16084 #------------------------------------------------------------------
16085 foreach my $Ki ( $Kj + 1 .. $K_last ) {
16087 # old whitespace could be arbitrarily large, so don't use it
16088 if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
16089 else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
16091 # ignore some small blocks
16092 my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
16093 my $nobreak = $rshort_nested->{$type_sequence_i};
16095 # Return false result if we exceed the maximum line length,
16096 if ( $pos > $maximum_line_length ) {
16100 # keep going for non-containers
16101 elsif ( !$type_sequence_i ) {
16105 # return if we encounter another opening brace before finding the
16107 elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
16108 && $rLL->[$Ki]->[_TYPE_] eq '{'
16109 && $rblock_type_of_seqno->{$type_sequence_i}
16115 # if we find our closing brace..
16116 elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
16117 && $rLL->[$Ki]->[_TYPE_] eq '}'
16118 && $rblock_type_of_seqno->{$type_sequence_i}
16122 # be sure any trailing comment also fits on the line
16123 my $Ki_nonblank = $Ki;
16124 if ( $Ki_nonblank < $K_last ) {
16126 if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
16127 && $Ki_nonblank < $K_last )
16133 # Patch for one-line sort/map/grep/eval blocks with side comments:
16134 # We will ignore the side comment length for sort/map/grep/eval
16135 # because this can lead to statements which change every time
16136 # perltidy is run. Here is an example from Denis Moskowitz which
16137 # oscillates between these two states without this patch:
16140 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
16144 ## $_->foo ne 'bar'
16145 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
16149 # When the first line is input it gets broken apart by the main
16150 # line break logic in sub process_line_of_CODE.
16151 # When the second line is input it gets recombined by
16152 # process_line_of_CODE and passed to the output routines. The
16153 # output routines (break_long_lines) do not break it apart
16154 # because the bond strengths are set to the highest possible value
16155 # for grep/map/eval/sort blocks, so the first version gets output.
16156 # It would be possible to fix this by changing bond strengths,
16157 # but they are high to prevent errors in older versions of perl.
16158 # See c100 for eval test.
16160 && $rLL->[$K_last]->[_TYPE_] eq '#'
16161 && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
16162 && !$rOpts_ignore_side_comment_lengths
16163 && !$is_sort_map_grep_eval{$block_type}
16164 && $K_last - $Ki_nonblank <= 2 )
16166 # Only include the side comment for if/else/elsif/unless if it
16167 # immediately follows (because the current '$rbrace_follower'
16168 # logic for these will give an immediate brake after these
16169 # closing braces). So for example a line like this
16170 # if (...) { ... } ; # very long comment......
16171 # will already break like this:
16173 # ; # very long comment......
16174 # so we do not need to include the length of the comment, which
16175 # would break the block. Project 'bioperl' has coding like this.
16176 ## !~ /^(if|else|elsif|unless)$/
16177 if ( !$is_if_unless_elsif_else{$block_type}
16178 || $K_last == $Ki_nonblank )
16180 $Ki_nonblank = $K_last;
16181 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
16183 if ( $Ki_nonblank > $Ki + 1 ) {
16185 # source whitespace could be anything, assume
16186 # at least one space before the hash on output
16187 if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
16190 else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
16193 if ( $pos >= $maximum_line_length ) {
16199 #--------------------------
16200 # ok, it's a one-line block
16201 #--------------------------
16202 create_one_line_block($i_start);
16206 # just keep going for other characters
16211 #--------------------------------------------------
16212 # End Loop to examine tokens in potential one-block
16213 #--------------------------------------------------
16215 # We haven't hit the closing brace, but there is still space. So the
16216 # question here is, should we keep going to look at more lines in hopes of
16217 # forming a new one-line block, or should we stop right now. The problem
16218 # with continuing is that we will not be able to honor breaks before the
16219 # opening brace if we continue.
16221 # Typically we will want to keep trying to make one-line blocks for things
16222 # like sort/map/grep/eval. But it is not always a good idea to make as
16223 # many one-line blocks as possible, so other types are not done. The user
16224 # can always use -mangle.
16226 # If we want to keep going, we will create a new one-line block.
16227 # The blocks which we can keep going are in a hash, but we never want
16228 # to continue if we are at a '-bli' block.
16229 if ( $want_one_line_block{$block_type} && !$is_bli ) {
16230 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j};
16231 my $semicolon_count = $rtype_count
16232 && $rtype_count->{';'} ? $rtype_count->{';'} : 0;
16234 # Ignore a terminal semicolon in the count
16235 if ( $semicolon_count <= 2 ) {
16236 my $K_closing_container = $self->[_K_closing_container_];
16237 my $K_closing_j = $K_closing_container->{$type_sequence_j};
16238 my $Kp = $self->K_previous_nonblank($K_closing_j);
16240 && $rLL->[$Kp]->[_TYPE_] eq ';' )
16242 $semicolon_count -= 1;
16245 if ( $semicolon_count <= 0 ) {
16246 create_one_line_block($i_start);
16248 elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) {
16250 # Mark short broken eval blocks for possible later use in
16251 # avoiding adding spaces before a 'package' line. This is not
16252 # essential but helps keep newer and older formatting the same.
16253 $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1;
16257 } ## end sub starting_one_line_block
16259 sub unstore_token_to_go {
16261 # remove most recent token from output stream
16263 if ( $max_index_to_go > 0 ) {
16264 $max_index_to_go--;
16267 $max_index_to_go = UNDEFINED_INDEX;
16270 } ## end sub unstore_token_to_go
16272 sub compare_indentation_levels {
16274 # Check to see if output line tabbing agrees with input line
16275 # this can be very useful for debugging a script which has an extra
16276 # or missing brace.
16278 my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
16279 return unless ( defined($K_first) );
16281 my $rLL = $self->[_rLL_];
16283 # ignore a line with a leading blank token - issue c195
16284 my $type = $rLL->[$K_first]->[_TYPE_];
16285 return if ( $type eq 'b' );
16287 my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first];
16289 # record max structural depth for log file
16290 if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
16291 $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
16292 $self->[_maximum_BLOCK_level_at_line_] = $line_number;
16295 my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
16296 my $is_closing_block =
16298 && $self->[_rblock_type_of_seqno_]->{$type_sequence}
16301 if ( $guessed_indentation_level ne $structural_indentation_level ) {
16302 $self->[_last_tabbing_disagreement_] = $line_number;
16304 if ($is_closing_block) {
16306 if ( !$self->[_in_brace_tabbing_disagreement_] ) {
16307 $self->[_in_brace_tabbing_disagreement_] = $line_number;
16309 if ( !$self->[_first_brace_tabbing_disagreement_] ) {
16310 $self->[_first_brace_tabbing_disagreement_] = $line_number;
16314 if ( !$self->[_in_tabbing_disagreement_] ) {
16315 $self->[_tabbing_disagreement_count_]++;
16317 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
16318 write_logfile_entry(
16319 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
16322 $self->[_in_tabbing_disagreement_] = $line_number;
16323 $self->[_first_tabbing_disagreement_] = $line_number
16324 unless ( $self->[_first_tabbing_disagreement_] );
16329 $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
16331 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
16332 if ($in_tabbing_disagreement) {
16334 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
16335 write_logfile_entry(
16336 "End indentation disagreement from input line $in_tabbing_disagreement\n"
16339 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
16341 write_logfile_entry(
16342 "No further tabbing disagreements will be noted\n");
16345 $self->[_in_tabbing_disagreement_] = 0;
16350 } ## end sub compare_indentation_levels
16352 ###################################################
16353 # CODE SECTION 8: Utilities for setting breakpoints
16354 ###################################################
16356 { ## begin closure set_forced_breakpoint
16358 my @forced_breakpoint_undo_stack;
16360 # These are global vars for efficiency:
16361 # my $forced_breakpoint_count;
16362 # my $forced_breakpoint_undo_count;
16363 # my $index_max_forced_break;
16365 # Break before or after certain tokens based on user settings
16366 my %break_before_or_after_token;
16370 # Updated to use all operators. This fixes case b1054
16371 # Here is the previous simplified version:
16372 ## my @q = qw( . : ? and or xor && || );
16373 my @q = @all_operators;
16376 @break_before_or_after_token{@q} = (1) x scalar(@q);
16379 sub set_fake_breakpoint {
16381 # Just bump up the breakpoint count as a signal that there are breaks.
16382 # This is useful if we have breaks but may want to postpone deciding
16383 # where to make them.
16384 $forced_breakpoint_count++;
16386 } ## end sub set_fake_breakpoint
16388 use constant DEBUG_FORCE => 0;
16390 sub set_forced_breakpoint {
16391 my ( $self, $i ) = @_;
16393 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
16396 # - If the token at index $i is a blank, backup to $i-1 to
16397 # get to the previous nonblank token.
16398 # - For certain tokens, the break may be placed BEFORE the token
16399 # at index $i, depending on user break preference settings.
16400 # - If a break is made after an opening token, then a break will
16401 # also be made before the corresponding closing token.
16403 # Returns '$i_nonblank':
16404 # = index of the token after which the breakpoint was actually placed
16405 # = undef if breakpoint was not set.
16408 if ( !defined($i) || $i < 0 ) {
16410 # Calls with bad index $i are harmless but waste time and should
16411 # be caught and eliminated during code development.
16413 my ( $a, $b, $c ) = caller();
16415 "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
16421 # Break after token $i
16422 $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
16424 # If we break at an opening container..break at the closing
16426 if ( defined($i_nonblank)
16427 && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
16430 $self->set_closing_breakpoint($i_nonblank);
16433 DEBUG_FORCE && do {
16434 my ( $a, $b, $c ) = caller();
16436 "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
16437 if ( !defined($i_nonblank) ) {
16438 $i = EMPTY_STRING unless defined($i);
16439 $msg .= " but could not set break after i='$i'\n";
16442 my $nobr = $nobreak_to_go[$i_nonblank];
16443 $nobr = 0 if ( !defined($nobr) );
16445 set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobr
16447 if ( defined($set_closing) ) {
16449 " Also set closing breakpoint corresponding to this token\n";
16455 return $i_nonblank;
16456 } ## end sub set_forced_breakpoint
16458 sub set_forced_breakpoint_AFTER {
16459 my ( $self, $i ) = @_;
16461 # This routine is only called by sub set_forced_breakpoint and
16462 # sub set_closing_breakpoint.
16464 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
16467 # - If the token at index $i is a blank, backup to $i-1 to
16468 # get to the previous nonblank token.
16469 # - For certain tokens, the break may be placed BEFORE the token
16470 # at index $i, depending on user break preference settings.
16473 # - the index of the token after which the break was set, or
16474 # - undef if no break was set
16476 return unless ( defined($i) && $i >= 0 );
16478 # Back up at a blank so we have a token to examine.
16479 # This was added to fix for cases like b932 involving an '=' break.
16480 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
16482 # Never break between welded tokens
16484 if ( $total_weld_count
16485 && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
16487 my $token = $tokens_to_go[$i];
16488 my $type = $types_to_go[$i];
16490 # For certain tokens, use user settings to decide if we break before or
16492 if ( $break_before_or_after_token{$token}
16493 && ( $type eq $token || $type eq 'k' ) )
16495 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
16498 # breaks are forced before 'if' and 'unless'
16499 elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
16501 if ( $i >= 0 && $i <= $max_index_to_go ) {
16502 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
16504 if ( $i_nonblank >= 0
16505 && !$nobreak_to_go[$i_nonblank]
16506 && !$forced_breakpoint_to_go[$i_nonblank] )
16508 $forced_breakpoint_to_go[$i_nonblank] = 1;
16510 if ( $i_nonblank > $index_max_forced_break ) {
16511 $index_max_forced_break = $i_nonblank;
16513 $forced_breakpoint_count++;
16514 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
16518 return $i_nonblank;
16522 } ## end sub set_forced_breakpoint_AFTER
16524 sub clear_breakpoint_undo_stack {
16526 $forced_breakpoint_undo_count = 0;
16530 use constant DEBUG_UNDOBP => 0;
16532 sub undo_forced_breakpoint_stack {
16534 my ( $self, $i_start ) = @_;
16536 # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
16537 # remove all breakpoints from the top of the 'undo stack' down to and
16538 # including index $i_start.
16540 # The 'undo stack' is a stack of all breakpoints made for a batch of
16543 if ( $i_start < 0 ) {
16545 my ( $a, $b, $c ) = caller();
16547 # Bad call, can only be due to a recent programming change.
16549 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
16554 while ( $forced_breakpoint_undo_count > $i_start ) {
16556 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
16557 if ( $i >= 0 && $i <= $max_index_to_go ) {
16558 $forced_breakpoint_to_go[$i] = 0;
16559 $forced_breakpoint_count--;
16561 DEBUG_UNDOBP && do {
16562 my ( $a, $b, $c ) = caller();
16564 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
16568 # shouldn't happen, but not a critical error
16571 my ( $a, $b, $c ) = caller();
16573 Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go
16579 } ## end sub undo_forced_breakpoint_stack
16580 } ## end closure set_forced_breakpoint
16582 { ## begin closure set_closing_breakpoint
16584 my %postponed_breakpoint;
16586 sub initialize_postponed_breakpoint {
16587 %postponed_breakpoint = ();
16591 sub has_postponed_breakpoint {
16593 return $postponed_breakpoint{$seqno};
16596 sub set_closing_breakpoint {
16598 # set a breakpoint at a matching closing token
16599 my ( $self, $i_break ) = @_;
16601 if ( defined( $mate_index_to_go[$i_break] ) ) {
16603 # Don't reduce the '2' in the statement below.
16604 # Test files: attrib.t, BasicLyx.pm.html
16605 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
16607 # break before } ] and ), but sub set_forced_breakpoint will decide
16608 # to break before or after a ? and :
16609 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
16610 $self->set_forced_breakpoint_AFTER(
16611 $mate_index_to_go[$i_break] - $inc );
16615 my $type_sequence = $type_sequence_to_go[$i_break];
16616 if ($type_sequence) {
16617 $postponed_breakpoint{$type_sequence} = 1;
16621 } ## end sub set_closing_breakpoint
16622 } ## end closure set_closing_breakpoint
16624 #########################################
16625 # CODE SECTION 9: Process batches of code
16626 #########################################
16628 { ## begin closure grind_batch_of_CODE
16630 # The routines in this closure begin the processing of a 'batch' of code.
16632 # A variable to keep track of consecutive nonblank lines so that we can
16633 # insert occasional blanks
16634 my @nonblank_lines_at_depth;
16636 # A variable to remember maximum size of previous batches; this is needed
16637 # by the logical padding routine
16638 my $peak_batch_size;
16641 # variables to keep track of indentation of unmatched containers.
16642 my %saved_opening_indentation;
16644 sub initialize_grind_batch_of_CODE {
16645 @nonblank_lines_at_depth = ();
16646 $peak_batch_size = 0;
16648 %saved_opening_indentation = ();
16650 } ## end sub initialize_grind_batch_of_CODE
16652 # sub grind_batch_of_CODE receives sections of code which are the longest
16653 # possible lines without a break. In other words, it receives what is left
16654 # after applying all breaks forced by blank lines, block comments, side
16655 # comments, pod text, and structural braces. Its job is to break this code
16656 # down into smaller pieces, if necessary, which fit within the maximum
16657 # allowed line length. Then it sends the resulting lines of code on down
16658 # the pipeline to the VerticalAligner package, breaking the code into
16659 # continuation lines as necessary. The batch of tokens are in the "to_go"
16660 # arrays. The name 'grind' is slightly suggestive of a machine continually
16661 # breaking down long lines of code, but mainly it is unique and easy to
16662 # remember and find with an editor search.
16664 # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
16665 # together in the following way:
16667 # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
16668 # combines them into the largest sequences of tokens which might form a new
16670 # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
16673 # So sub 'process_line_of_CODE' builds up the longest possible continuous
16674 # sequences of tokens, regardless of line length, and then
16675 # grind_batch_of_CODE breaks these sequences back down into the new output
16678 # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
16680 use constant DEBUG_GRIND => 0;
16682 sub check_grind_input {
16684 # Check for valid input to sub grind_batch_of_CODE. An error here
16685 # would most likely be due to an error in 'sub store_token_to_go'.
16688 # Be sure there are tokens in the batch
16689 if ( $max_index_to_go < 0 ) {
16691 sub grind incorrectly called with max_index_to_go=$max_index_to_go
16694 my $Klimit = $self->[_Klimit_];
16696 # The local batch tokens must be a continuous part of the global token
16699 foreach my $ii ( 0 .. $max_index_to_go ) {
16703 $KK = $K_to_go[$ii];
16704 if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
16705 $KK = '(undef)' unless defined($KK);
16707 at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
16711 if ( $ii > 0 && $KK != $Km + 1 ) {
16714 Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
16719 } ## end sub check_grind_input
16721 # This filter speeds up a critical if-test
16725 my @q = qw# L { ( [ R ] ) } ? : f => #;
16727 @quick_filter{@q} = (1) x scalar(@q);
16730 sub grind_batch_of_CODE {
16734 #-----------------------------------------------------------------
16735 # This sub directs the formatting of one complete batch of tokens.
16736 # The tokens of the batch are in the '_to_go' arrays.
16737 #-----------------------------------------------------------------
16739 my $this_batch = $self->[_this_batch_];
16740 $this_batch->[_peak_batch_size_] = $peak_batch_size;
16741 $this_batch->[_batch_count_] = ++$batch_count;
16743 $self->check_grind_input() if (DEVEL_MODE);
16745 # This routine is only called from sub flush_batch_of_code, so that
16746 # routine is a better spot for debugging.
16747 DEBUG_GRIND && do {
16748 my $token = my $type = EMPTY_STRING;
16749 if ( $max_index_to_go >= 0 ) {
16750 $token = $tokens_to_go[$max_index_to_go];
16751 $type = $types_to_go[$max_index_to_go];
16753 my $output_str = EMPTY_STRING;
16754 if ( $max_index_to_go > 20 ) {
16755 my $mm = $max_index_to_go - 10;
16757 join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
16758 . join( EMPTY_STRING,
16759 @tokens_to_go[ $mm .. $max_index_to_go ] );
16762 $output_str = join EMPTY_STRING,
16763 @tokens_to_go[ 0 .. $max_index_to_go ];
16765 print STDERR <<EOM;
16766 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
16771 # Remove any trailing blank, which is possible (c192 has example)
16772 if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) {
16773 $max_index_to_go -= 1;
16776 return if ( $max_index_to_go < 0 );
16778 if ($rOpts_line_up_parentheses) {
16779 $self->set_lp_indentation();
16782 #--------------------------------------------------
16783 # Shortcut for block comments
16784 # Note that this shortcut does not work for -lp yet
16785 #--------------------------------------------------
16786 elsif ( !$max_index_to_go && $types_to_go[0] eq '#' ) {
16788 $this_batch->[_ri_first_] = [$ibeg];
16789 $this_batch->[_ri_last_] = [$ibeg];
16791 $self->convey_batch_to_vertical_aligner();
16793 my $level = $levels_to_go[$ibeg];
16794 $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
16795 $self->[_last_line_leading_level_] = $level;
16796 $nonblank_lines_at_depth[$level] = 1;
16804 my $rLL = $self->[_rLL_];
16806 #-------------------------------------------------------
16807 # Loop over the batch to initialize some batch variables
16808 #-------------------------------------------------------
16809 my $comma_count_in_batch = 0;
16811 my @ix_seqno_controlling_ci;
16812 my %comma_arrow_count;
16813 my $comma_arrow_count_contained = 0;
16814 my @unmatched_closing_indexes_in_this_batch;
16815 my @unmatched_opening_indexes_in_this_batch;
16817 my @i_for_semicolon;
16818 foreach my $i ( 0 .. $max_index_to_go ) {
16820 if ( $types_to_go[$i] eq 'b' ) {
16821 $inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1;
16825 $inext_to_go[$i] = $i + 1;
16827 # This is an optional shortcut to save a bit of time by skipping
16828 # most tokens. Note: the filter may need to be updated if the
16829 # next 'if' tests are ever changed to include more token types.
16830 next if ( !$quick_filter{ $types_to_go[$i] } );
16832 my $type = $types_to_go[$i];
16834 # gather info needed by sub break_long_lines
16835 if ( $type_sequence_to_go[$i] ) {
16836 my $seqno = $type_sequence_to_go[$i];
16837 my $token = $tokens_to_go[$i];
16839 # remember indexes of any tokens controlling xci
16840 # in this batch. This list is needed by sub undo_ci.
16841 if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) {
16842 push @ix_seqno_controlling_ci, $i;
16845 if ( $is_opening_sequence_token{$token} ) {
16846 if ( $self->[_rbreak_container_]->{$seqno} ) {
16847 $self->set_forced_breakpoint($i);
16849 push @unmatched_opening_indexes_in_this_batch, $i;
16850 if ( $type eq '?' ) {
16851 push @colon_list, $type;
16854 elsif ( $is_closing_sequence_token{$token} ) {
16856 if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) {
16857 $self->set_forced_breakpoint( $i - 1 );
16860 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
16861 if ( defined($i_mate) && $i_mate >= 0 ) {
16862 if ( $type_sequence_to_go[$i_mate] ==
16863 $type_sequence_to_go[$i] )
16865 $mate_index_to_go[$i] = $i_mate;
16866 $mate_index_to_go[$i_mate] = $i;
16867 my $cac = $comma_arrow_count{$seqno};
16868 $comma_arrow_count_contained += $cac if ($cac);
16871 push @unmatched_opening_indexes_in_this_batch,
16873 push @unmatched_closing_indexes_in_this_batch, $i;
16877 push @unmatched_closing_indexes_in_this_batch, $i;
16879 if ( $type eq ':' ) {
16880 push @colon_list, $type;
16882 } ## end elsif ( $is_closing_sequence_token...)
16884 } ## end if ($seqno)
16886 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
16887 elsif ( $type eq '=>' ) {
16888 if (@unmatched_opening_indexes_in_this_batch) {
16889 my $j = $unmatched_opening_indexes_in_this_batch[-1];
16890 my $seqno = $type_sequence_to_go[$j];
16891 $comma_arrow_count{$seqno}++;
16894 elsif ( $type eq 'f' ) {
16895 push @i_for_semicolon, $i;
16898 } ## end for ( my $i = 0 ; $i <=...)
16900 # Break at a single interior C-style for semicolon in this batch (c154)
16901 if ( @i_for_semicolon && @i_for_semicolon == 1 ) {
16902 my $i = $i_for_semicolon[0];
16903 my $inext = $inext_to_go[$i];
16904 if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) {
16905 $self->set_forced_breakpoint($i);
16909 my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
16910 @unmatched_closing_indexes_in_this_batch;
16912 if (@unmatched_opening_indexes_in_this_batch) {
16913 $this_batch->[_runmatched_opening_indexes_] =
16914 \@unmatched_opening_indexes_in_this_batch;
16917 if (@ix_seqno_controlling_ci) {
16918 $this_batch->[_rix_seqno_controlling_ci_] =
16919 \@ix_seqno_controlling_ci;
16922 #------------------------
16923 # Set special breakpoints
16924 #------------------------
16925 # If this line ends in a code block brace, set breaks at any
16926 # previous closing code block braces to breakup a chain of code
16927 # blocks on one line. This is very rare but can happen for
16928 # user-defined subs. For example we might be looking at this:
16929 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
16930 my $saw_good_break; # flag to force breaks even if short line
16933 # looking for opening or closing block brace
16934 $block_type_to_go[$max_index_to_go]
16936 # never any good breaks if just one token
16937 && $max_index_to_go > 0
16939 # but not one of these which are never duplicated on a line:
16940 # until|while|for|if|elsif|else
16941 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
16945 my $lev = $nesting_depth_to_go[$max_index_to_go];
16947 # Walk backwards from the end and
16948 # set break at any closing block braces at the same level.
16949 # But quit if we are not in a chain of blocks.
16950 foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
16951 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
16952 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
16954 if ( $block_type_to_go[$i] ) {
16955 if ( $tokens_to_go[$i] eq '}' ) {
16956 $self->set_forced_breakpoint($i);
16957 $saw_good_break = 1;
16961 # quit if we see anything besides words, function, blanks
16963 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
16967 #-----------------------------------------------
16968 # insertion of any blank lines before this batch
16969 #-----------------------------------------------
16972 my $imax = $max_index_to_go;
16974 # trim any blank tokens - for safety, but should not be necessary
16975 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
16976 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
16978 if ( $imin > $imax ) {
16980 my $K0 = $K_to_go[0];
16981 my $lno = EMPTY_STRING;
16982 if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
16984 Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
16990 my $last_line_leading_type = $self->[_last_line_leading_type_];
16991 my $last_line_leading_level = $self->[_last_line_leading_level_];
16993 my $leading_type = $types_to_go[0];
16994 my $leading_level = $levels_to_go[0];
16996 # add blank line(s) before certain key types but not after a comment
16997 if ( $last_line_leading_type ne '#' ) {
16998 my $blank_count = 0;
16999 my $leading_token = $tokens_to_go[0];
17001 # break before certain key blocks except one-liners
17002 if ( $leading_type eq 'k' ) {
17003 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
17004 $blank_count = $rOpts->{'blank-lines-before-subs'}
17005 if ( terminal_type_i( 0, $max_index_to_go ) ne '}' );
17008 # Break before certain block types if we haven't had a
17009 # break at this level for a while. This is the
17010 # difficult decision..
17011 elsif ($last_line_leading_type ne 'b'
17012 && $is_if_unless_while_until_for_foreach{$leading_token} )
17014 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
17015 if ( !defined($lc) ) { $lc = 0 }
17017 # patch for RT #128216: no blank line inserted at a level
17019 if ( $levels_to_go[0] != $last_line_leading_level ) {
17023 if ( $rOpts->{'blanks-before-blocks'}
17024 && $lc >= $rOpts->{'long-block-line-count'}
17025 && $self->consecutive_nonblank_lines() >=
17026 $rOpts->{'long-block-line-count'}
17027 && terminal_type_i( 0, $max_index_to_go ) ne '}' )
17034 # blank lines before subs except declarations and one-liners
17035 elsif ( $leading_type eq 'i' ) {
17036 my $special_identifier =
17037 $self->[_ris_special_identifier_token_]->{$leading_token};
17038 if ($special_identifier) {
17039 ## $leading_token =~ /$SUB_PATTERN/
17040 if ( $special_identifier eq 'sub' ) {
17042 $blank_count = $rOpts->{'blank-lines-before-subs'}
17043 if ( terminal_type_i( 0, $max_index_to_go ) !~
17047 # break before all package declarations
17048 ## substr( $leading_token, 0, 8 ) eq 'package '
17049 elsif ( $special_identifier eq 'package' ) {
17051 # ... except in a very short eval block
17052 my $pseqno = $parent_seqno_to_go[0];
17053 $blank_count = $rOpts->{'blank-lines-before-packages'}
17055 !$self->[_ris_short_broken_eval_block_]->{$pseqno}
17061 # Check for blank lines wanted before a closing brace
17062 elsif ( $leading_token eq '}' ) {
17063 if ( $rOpts->{'blank-lines-before-closing-block'}
17064 && $block_type_to_go[0]
17065 && $block_type_to_go[0] =~
17066 /$blank_lines_before_closing_block_pattern/ )
17068 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
17069 if ( $nblanks > $blank_count ) {
17070 $blank_count = $nblanks;
17075 if ($blank_count) {
17077 # future: send blank line down normal path to VerticalAligner?
17078 $self->flush_vertical_aligner();
17079 my $file_writer_object = $self->[_file_writer_object_];
17080 $file_writer_object->require_blank_code_lines($blank_count);
17084 # update blank line variables and count number of consecutive
17085 # non-blank, non-comment lines at this level
17086 if ( $leading_level == $last_line_leading_level
17087 && $leading_type ne '#'
17088 && defined( $nonblank_lines_at_depth[$leading_level] ) )
17090 $nonblank_lines_at_depth[$leading_level]++;
17093 $nonblank_lines_at_depth[$leading_level] = 1;
17096 $self->[_last_line_leading_type_] = $leading_type;
17097 $self->[_last_line_leading_level_] = $leading_level;
17099 #--------------------------
17100 # scan lists and long lines
17101 #--------------------------
17103 # Flag to remember if we called sub 'pad_array_to_go'.
17104 # Some routines (break_lists(), break_long_lines() ) need some
17105 # extra tokens added at the end of the batch. Most batches do not
17106 # use these routines, so we will avoid calling 'pad_array_to_go'
17107 # unless it is needed.
17108 my $called_pad_array_to_go;
17110 # set all forced breakpoints for good list formatting
17112 my $multiple_old_lines_in_batch;
17113 if ( $max_index_to_go > 0 ) {
17115 $self->excess_line_length( $imin, $max_index_to_go ) > 0;
17117 my $Kbeg = $K_to_go[0];
17118 my $Kend = $K_to_go[$max_index_to_go];
17119 $multiple_old_lines_in_batch =
17120 $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
17123 my $rbond_strength_bias = [];
17126 || $multiple_old_lines_in_batch
17128 # must always call break_lists() with unbalanced batches because
17129 # it is maintaining some stacks
17130 || $is_unbalanced_batch
17132 # call break_lists if we might want to break at commas
17134 $comma_count_in_batch
17135 && ( $rOpts_maximum_fields_per_table > 0
17136 && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
17137 || $rOpts_comma_arrow_breakpoints == 0 )
17140 # call break_lists if user may want to break open some one-line
17142 || ( $comma_arrow_count_contained
17143 && $rOpts_comma_arrow_breakpoints != 3 )
17146 # add a couple of extra terminal blank tokens
17147 $self->pad_array_to_go();
17148 $called_pad_array_to_go = 1;
17150 my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
17151 $saw_good_break ||= $sgb;
17154 # let $ri_first and $ri_last be references to lists of
17155 # first and last tokens of line fragments to output..
17156 my ( $ri_first, $ri_last );
17158 #-----------------------------
17159 # a single token uses one line
17160 #-----------------------------
17161 if ( !$max_index_to_go ) {
17162 $ri_first = [$imin];
17163 $ri_last = [$imax];
17166 # for multiple tokens
17169 #-------------------------
17170 # write a single line if..
17171 #-------------------------
17175 # this line is 'short'
17178 # and we didn't see a good breakpoint
17179 && !$saw_good_break
17181 # and we don't already have an interior breakpoint
17182 && !$forced_breakpoint_count
17185 # or, we aren't allowed to add any newlines
17186 || !$rOpts_add_newlines
17190 $ri_first = [$imin];
17191 $ri_last = [$imax];
17194 #-----------------------------
17195 # otherwise use multiple lines
17196 #-----------------------------
17199 # add a couple of extra terminal blank tokens if we haven't
17201 $self->pad_array_to_go() unless ($called_pad_array_to_go);
17203 ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
17204 $self->break_long_lines( $saw_good_break, \@colon_list,
17205 $rbond_strength_bias );
17207 $self->break_all_chain_tokens( $ri_first, $ri_last );
17209 $self->break_equals( $ri_first, $ri_last )
17210 if @{$ri_first} >= 3;
17212 # now we do a correction step to clean this up a bit
17213 # (The only time we would not do this is for debugging)
17214 $self->recombine_breakpoints( $ri_first, $ri_last,
17215 $rbond_strength_to_go )
17216 if ( $rOpts_recombine && @{$ri_first} > 1 );
17218 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
17222 $self->insert_breaks_before_list_opening_containers( $ri_first,
17224 if ( %break_before_container_types && $max_index_to_go > 0 );
17226 # Check for a phantom semicolon at the end of the batch
17227 if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
17228 $self->unmask_phantom_token($imax);
17231 if ( $rOpts_one_line_block_semicolons == 0 ) {
17232 $self->delete_one_line_semicolons( $ri_first, $ri_last );
17235 # Remember the largest batch size processed. This is needed by the
17236 # logical padding routine to avoid padding the first nonblank token
17237 if ( $max_index_to_go > $peak_batch_size ) {
17238 $peak_batch_size = $max_index_to_go;
17242 #-------------------
17243 # -lp corrector step
17244 #-------------------
17245 if ($rOpts_line_up_parentheses) {
17246 $self->correct_lp_indentation( $ri_first, $ri_last );
17249 #--------------------
17250 # ship this batch out
17251 #--------------------
17252 $this_batch->[_ri_first_] = $ri_first;
17253 $this_batch->[_ri_last_] = $ri_last;
17255 $self->convey_batch_to_vertical_aligner();
17257 #-------------------------------------------------------------------
17258 # Write requested number of blank lines after an opening block brace
17259 #-------------------------------------------------------------------
17260 if ($rOpts_blank_lines_after_opening_block) {
17262 if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
17264 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
17269 if ( $types_to_go[$iterm] eq '{'
17270 && $block_type_to_go[$iterm]
17271 && $block_type_to_go[$iterm] =~
17272 /$blank_lines_after_opening_block_pattern/ )
17274 my $nblanks = $rOpts_blank_lines_after_opening_block;
17275 $self->flush_vertical_aligner();
17276 my $file_writer_object = $self->[_file_writer_object_];
17277 $file_writer_object->require_blank_code_lines($nblanks);
17282 } ## end sub grind_batch_of_CODE
17287 && $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1;
17290 sub unmask_phantom_token {
17291 my ( $self, $iend ) = @_;
17293 # Turn a phantom token into a real token.
17296 # $iend = the index in the output batch array of this token.
17298 # Phantom tokens are specially marked token types (such as ';') with
17299 # no token text which only become real tokens if they occur at the end
17300 # of an output line. At one time phantom ',' tokens were handled
17301 # here, but now they are processed elsewhere.
17303 my $rLL = $self->[_rLL_];
17304 my $KK = $K_to_go[$iend];
17305 my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
17307 my $type = $types_to_go[$iend];
17308 return unless ( $type eq ';' );
17310 my $tok_len = length($tok);
17311 if ( $want_left_space{$type} != WS_NO ) {
17312 $tok = SPACE . $tok;
17316 $tokens_to_go[$iend] = $tok;
17317 $token_lengths_to_go[$iend] = $tok_len;
17319 $rLL->[$KK]->[_TOKEN_] = $tok;
17320 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
17322 $self->note_added_semicolon($line_number);
17324 # This changes the summed lengths of the rest of this batch
17325 foreach ( $iend .. $max_index_to_go ) {
17326 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
17329 } ## end sub unmask_phantom_token
17331 sub save_opening_indentation {
17333 # This should be called after each batch of tokens is output. It
17334 # saves indentations of lines of all unmatched opening tokens.
17335 # These will be used by sub get_opening_indentation.
17337 my ( $self, $ri_first, $ri_last, $rindentation_list,
17338 $runmatched_opening_indexes )
17341 $runmatched_opening_indexes = []
17342 if ( !defined($runmatched_opening_indexes) );
17344 # QW INDENTATION PATCH 1:
17345 # Also save indentation for multiline qw quotes
17347 my $seqno_qw_opening;
17348 if ( $types_to_go[$max_index_to_go] eq 'q' ) {
17349 my $KK = $K_to_go[$max_index_to_go];
17350 $seqno_qw_opening =
17351 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
17352 if ($seqno_qw_opening) {
17353 push @i_qw, $max_index_to_go;
17357 # we need to save indentations of any unmatched opening tokens
17358 # in this batch because we may need them in a subsequent batch.
17359 foreach ( @{$runmatched_opening_indexes}, @i_qw ) {
17361 my $seqno = $type_sequence_to_go[$_];
17364 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
17365 $seqno = $seqno_qw_opening;
17370 $seqno = 'UNKNOWN';
17371 DEVEL_MODE && Fault("unable to find sequence number\n");
17375 $saved_opening_indentation{$seqno} = [
17376 lookup_opening_indentation(
17377 $_, $ri_first, $ri_last, $rindentation_list
17382 } ## end sub save_opening_indentation
17384 sub get_saved_opening_indentation {
17386 my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
17389 if ( $saved_opening_indentation{$seqno} ) {
17390 ( $indent, $offset, $is_leading ) =
17391 @{ $saved_opening_indentation{$seqno} };
17396 # some kind of serious error it doesn't exist
17397 # (example is badfile.t)
17399 return ( $indent, $offset, $is_leading, $exists );
17400 } ## end sub get_saved_opening_indentation
17401 } ## end closure grind_batch_of_CODE
17403 sub lookup_opening_indentation {
17405 # get the indentation of the line in the current output batch
17406 # which output a selected opening token
17409 # $i_opening - index of an opening token in the current output batch
17410 # whose line indentation we need
17411 # $ri_first - reference to list of the first index $i for each output
17412 # line in this batch
17413 # $ri_last - reference to list of the last index $i for each output line
17415 # $rindentation_list - reference to a list containing the indentation
17416 # used for each line. (NOTE: the first slot in
17417 # this list is the last returned line number, and this is
17418 # followed by the list of indentations).
17421 # -the indentation of the line which contained token $i_opening
17422 # -and its offset (number of columns) from the start of the line
17424 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
17426 if ( !@{$ri_last} ) {
17428 # An error here implies a bug introduced by a recent program change.
17429 # Every batch of code has lines, so this should never happen.
17431 Fault("Error in opening_indentation: no lines");
17433 return ( 0, 0, 0 );
17436 my $nline = $rindentation_list->[0]; # line number of previous lookup
17438 # reset line location if necessary
17439 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
17441 # find the correct line
17442 unless ( $i_opening > $ri_last->[-1] ) {
17443 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
17446 # Error - token index is out of bounds - shouldn't happen
17447 # A program bug has been introduced in one of the calling routines.
17448 # We better stop here.
17450 my $i_last_line = $ri_last->[-1];
17453 Program bug in call to lookup_opening_indentation - index out of range
17454 called with index i_opening=$i_opening > $i_last_line = max index of last line
17455 This batch has max index = $max_index_to_go,
17458 $nline = $#{$ri_last};
17461 $rindentation_list->[0] =
17462 $nline; # save line number to start looking next call
17463 my $ibeg = $ri_start->[$nline];
17464 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
17465 my $is_leading = ( $ibeg == $i_opening );
17466 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
17467 } ## end sub lookup_opening_indentation
17469 sub terminal_type_i {
17471 # returns type of last token on this line (terminal token), as follows:
17472 # returns # for a full-line comment
17473 # returns ' ' for a blank line
17474 # otherwise returns final token type
17476 my ( $ibeg, $iend ) = @_;
17478 # Start at the end and work backwards
17480 my $type_i = $types_to_go[$i];
17482 # Check for side comment
17483 if ( $type_i eq '#' ) {
17485 if ( $i < $ibeg ) {
17486 return wantarray ? ( $type_i, $ibeg ) : $type_i;
17488 $type_i = $types_to_go[$i];
17491 # Skip past a blank
17492 if ( $type_i eq 'b' ) {
17494 if ( $i < $ibeg ) {
17495 return wantarray ? ( $type_i, $ibeg ) : $type_i;
17497 $type_i = $types_to_go[$i];
17500 # Found it..make sure it is a BLOCK termination,
17501 # but hide a terminal } after sort/map/grep/eval/do because it is not
17502 # necessarily the end of the line. (terminal.t)
17503 my $block_type = $block_type_to_go[$i];
17507 || $is_sort_map_grep_eval_do{$block_type} )
17512 return wantarray ? ( $type_i, $i ) : $type_i;
17513 } ## end sub terminal_type_i
17515 sub pad_array_to_go {
17517 # To simplify coding in break_lists and set_bond_strengths, it helps to
17518 # create some extra blank tokens at the end of the arrays. We also add
17519 # some undef's to help guard against using invalid data.
17521 $K_to_go[ $max_index_to_go + 1 ] = undef;
17522 $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
17523 $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
17524 $tokens_to_go[ $max_index_to_go + 3 ] = undef;
17525 $types_to_go[ $max_index_to_go + 1 ] = 'b';
17526 $types_to_go[ $max_index_to_go + 2 ] = 'b';
17527 $types_to_go[ $max_index_to_go + 3 ] = undef;
17528 $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
17529 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
17530 $nesting_depth_to_go[$max_index_to_go];
17533 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
17534 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
17536 # Nesting depths are set to be >=0 in sub write_line, so it should
17537 # not be possible to get here unless the code has a bracing error
17538 # which leaves a closing brace with zero nesting depth.
17539 unless ( get_saw_brace_error() ) {
17542 Program bug in pad_array_to_go: hit nesting error which should have been caught
17548 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
17553 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
17554 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
17557 } ## end sub pad_array_to_go
17559 sub break_all_chain_tokens {
17561 # scan the current breakpoints looking for breaks at certain "chain
17562 # operators" (. : && || + etc) which often occur repeatedly in a long
17563 # statement. If we see a break at any one, break at all similar tokens
17564 # within the same container.
17566 my ( $self, $ri_left, $ri_right ) = @_;
17568 my %saw_chain_type;
17569 my %left_chain_type;
17570 my %right_chain_type;
17571 my %interior_chain_type;
17572 my $nmax = @{$ri_right} - 1;
17574 # scan the left and right end tokens of all lines
17576 for my $n ( 0 .. $nmax ) {
17577 my $il = $ri_left->[$n];
17578 my $ir = $ri_right->[$n];
17579 my $typel = $types_to_go[$il];
17580 my $typer = $types_to_go[$ir];
17581 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
17582 $typer = '+' if ( $typer eq '-' );
17583 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
17584 $typer = '*' if ( $typer eq '/' );
17586 my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
17587 my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
17588 if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
17589 next if ( $typel eq '?' );
17590 push @{ $left_chain_type{$keyl} }, $il;
17591 $saw_chain_type{$keyl} = 1;
17594 if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
17595 next if ( $typer eq '?' );
17596 push @{ $right_chain_type{$keyr} }, $ir;
17597 $saw_chain_type{$keyr} = 1;
17601 return unless $count;
17603 # now look for any interior tokens of the same types
17605 my $has_interior_dot_or_plus;
17606 for my $n ( 0 .. $nmax ) {
17607 my $il = $ri_left->[$n];
17608 my $ir = $ri_right->[$n];
17609 foreach my $i ( $il + 1 .. $ir - 1 ) {
17610 my $type = $types_to_go[$i];
17611 my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
17612 $key = '+' if ( $key eq '-' );
17613 $key = '*' if ( $key eq '/' );
17614 if ( $saw_chain_type{$key} ) {
17615 push @{ $interior_chain_type{$key} }, $i;
17617 $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' );
17621 return unless $count;
17623 my @keys = keys %saw_chain_type;
17625 # quit if just ONE continuation line with leading . For example--
17626 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
17629 if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) {
17633 # now make a list of all new break points
17636 # loop over all chain types
17637 foreach my $key (@keys) {
17639 # loop over all interior chain tokens
17640 foreach my $itest ( @{ $interior_chain_type{$key} } ) {
17642 # loop over all left end tokens of same type
17643 if ( $left_chain_type{$key} ) {
17644 next if $nobreak_to_go[ $itest - 1 ];
17645 foreach my $i ( @{ $left_chain_type{$key} } ) {
17646 next unless $self->in_same_container_i( $i, $itest );
17647 push @insert_list, $itest - 1;
17649 # Break at matching ? if this : is at a different level.
17650 # For example, the ? before $THRf_DEAD in the following
17651 # should get a break if its : gets a break.
17654 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
17655 # : ( $_ & 4 ) ? $THRf_R_DETACHED
17656 # : $THRf_R_JOINABLE;
17658 && $levels_to_go[$i] != $levels_to_go[$itest] )
17660 my $i_question = $mate_index_to_go[$itest];
17661 if ( defined($i_question) && $i_question > 0 ) {
17662 push @insert_list, $i_question - 1;
17669 # loop over all right end tokens of same type
17670 if ( $right_chain_type{$key} ) {
17671 next if $nobreak_to_go[$itest];
17672 foreach my $i ( @{ $right_chain_type{$key} } ) {
17673 next unless $self->in_same_container_i( $i, $itest );
17674 push @insert_list, $itest;
17676 # break at matching ? if this : is at a different level
17678 && $levels_to_go[$i] != $levels_to_go[$itest] )
17680 my $i_question = $mate_index_to_go[$itest];
17681 if ( defined($i_question) ) {
17682 push @insert_list, $i_question;
17691 # insert any new break points
17692 if (@insert_list) {
17693 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17696 } ## end sub break_all_chain_tokens
17698 sub insert_additional_breaks {
17700 # this routine will add line breaks at requested locations after
17701 # sub break_long_lines has made preliminary breaks.
17703 my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
17706 my $line_number = 0;
17707 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
17709 next if ( $nobreak_to_go[$i_break_left] );
17711 $i_f = $ri_first->[$line_number];
17712 $i_l = $ri_last->[$line_number];
17713 while ( $i_break_left >= $i_l ) {
17716 # shouldn't happen unless caller passes bad indexes
17717 if ( $line_number >= @{$ri_last} ) {
17720 Non-fatal program bug: couldn't set break at $i_break_left
17725 $i_f = $ri_first->[$line_number];
17726 $i_l = $ri_last->[$line_number];
17729 # Do not leave a blank at the end of a line; back up if necessary
17730 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
17732 my $i_break_right = $inext_to_go[$i_break_left];
17733 if ( $i_break_left >= $i_f
17734 && $i_break_left < $i_l
17735 && $i_break_right > $i_f
17736 && $i_break_right <= $i_l )
17738 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
17739 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
17743 } ## end sub insert_additional_breaks
17745 { ## begin closure in_same_container_i
17746 my $ris_break_token;
17747 my $ris_comma_token;
17751 # all cases break on seeing commas at same level
17754 @{$ris_comma_token}{@q} = (1) x scalar(@q);
17756 # Non-ternary text also breaks on seeing any of qw(? : || or )
17757 # Example: we would not want to break at any of these .'s
17758 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
17759 push @q, qw( or || ? : );
17760 @{$ris_break_token}{@q} = (1) x scalar(@q);
17763 sub in_same_container_i {
17765 # Check to see if tokens at i1 and i2 are in the same container, and
17766 # not separated by certain characters: => , ? : || or
17767 # This is an interface between the _to_go arrays to the rLL array
17768 my ( $self, $i1, $i2 ) = @_;
17771 my $parent_seqno_1 = $parent_seqno_to_go[$i1];
17772 return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
17774 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
17775 my $K1 = $K_to_go[$i1];
17776 my $K2 = $K_to_go[$i2];
17777 my $rLL = $self->[_rLL_];
17779 my $depth_1 = $nesting_depth_to_go[$i1];
17780 return if ( $depth_1 < 0 );
17782 # Shouldn't happen since i1 and i2 have same parent:
17783 return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
17785 # Select character set to scan for
17786 my $type_1 = $types_to_go[$i1];
17787 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
17789 # Fast preliminary loop to verify that tokens are in the same container
17792 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
17793 last if !defined($KK);
17794 last if ( $KK >= $K2 );
17795 my $ii = $i1 + $KK - $K1;
17796 my $depth_i = $nesting_depth_to_go[$ii];
17797 return if ( $depth_i < $depth_1 );
17798 next if ( $depth_i > $depth_1 );
17799 if ( $type_1 ne ':' ) {
17800 my $tok_i = $tokens_to_go[$ii];
17801 return if ( $tok_i eq '?' || $tok_i eq ':' );
17805 # Slow loop checking for certain characters
17807 #-----------------------------------------------------
17808 # This is potentially a slow routine and not critical.
17809 # For safety just give up for large differences.
17810 # See test file 'infinite_loop.txt'
17811 #-----------------------------------------------------
17812 return if ( $i2 - $i1 > 200 );
17814 foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
17816 my $depth_i = $nesting_depth_to_go[$ii];
17817 next if ( $depth_i > $depth_1 );
17818 return if ( $depth_i < $depth_1 );
17819 my $tok_i = $tokens_to_go[$ii];
17820 return if ( $rbreak->{$tok_i} );
17823 } ## end sub in_same_container_i
17824 } ## end closure in_same_container_i
17828 # Look for assignment operators that could use a breakpoint.
17829 # For example, in the following snippet
17831 # $HOME = $ENV{HOME}
17834 # || die "no home directory for user $<";
17836 # we could break at the = to get this, which is a little nicer:
17841 # || die "no home directory for user $<";
17843 # The logic here follows the logic in set_logical_padding, which
17844 # will add the padding in the second line to improve alignment.
17846 my ( $self, $ri_left, $ri_right ) = @_;
17847 my $nmax = @{$ri_right} - 1;
17848 return unless ( $nmax >= 2 );
17850 # scan the left ends of first two lines
17851 my $tokbeg = EMPTY_STRING;
17853 for my $n ( 1 .. 2 ) {
17854 my $il = $ri_left->[$n];
17855 my $typel = $types_to_go[$il];
17856 my $tokenl = $tokens_to_go[$il];
17857 my $keyl = $typel eq 'k' ? $tokenl : $typel;
17859 my $has_leading_op = $is_chain_operator{$keyl};
17860 return unless ($has_leading_op);
17863 unless ( $tokenl eq $tokbeg
17864 && $nesting_depth_to_go[$il] eq $depth_beg );
17867 $depth_beg = $nesting_depth_to_go[$il];
17870 # now look for any interior tokens of the same types
17871 my $il = $ri_left->[0];
17872 my $ir = $ri_right->[0];
17874 # now make a list of all new break points
17876 foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
17877 my $type = $types_to_go[$i];
17878 if ( $is_assignment{$type}
17879 && $nesting_depth_to_go[$i] eq $depth_beg )
17881 if ( $want_break_before{$type} ) {
17882 push @insert_list, $i - 1;
17885 push @insert_list, $i;
17890 # Break after a 'return' followed by a chain of operators
17891 # return ( $^O !~ /win32|dos/i )
17892 # && ( $^O ne 'VMS' )
17893 # && ( $^O ne 'OS2' )
17894 # && ( $^O ne 'MacOS' );
17897 # ( $^O !~ /win32|dos/i )
17898 # && ( $^O ne 'VMS' )
17899 # && ( $^O ne 'OS2' )
17900 # && ( $^O ne 'MacOS' );
17902 if ( $types_to_go[$i] eq 'k'
17903 && $tokens_to_go[$i] eq 'return'
17905 && $nesting_depth_to_go[$i] eq $depth_beg )
17907 push @insert_list, $i;
17910 return unless (@insert_list);
17912 # One final check...
17913 # scan second and third lines and be sure there are no assignments
17914 # we want to avoid breaking at an = to make something like this:
17916 # $html_icons{"$type-$state"}
17917 # or $icon = $html_icons{$type}
17918 # or $icon = $html_icons{$state} )
17919 for my $n ( 1 .. 2 ) {
17920 my $il_n = $ri_left->[$n];
17921 my $ir_n = $ri_right->[$n];
17922 foreach my $i ( $il_n + 1 .. $ir_n ) {
17923 my $type = $types_to_go[$i];
17925 if ( $is_assignment{$type}
17926 && $nesting_depth_to_go[$i] eq $depth_beg );
17930 # ok, insert any new break point
17931 if (@insert_list) {
17932 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17935 } ## end sub break_equals
17937 { ## begin closure recombine_breakpoints
17939 # This routine is called once per batch to see if it would be better
17940 # to combine some of the lines into which the batch has been broken.
17951 @is_amp_amp{@q} = (1) x scalar(@q);
17953 @q = qw( + - * / );
17954 @is_math_op{@q} = (1) x scalar(@q);
17957 @is_plus_minus{@q} = (1) x scalar(@q);
17960 @is_mult_div{@q} = (1) x scalar(@q);
17963 sub Debug_dump_breakpoints {
17965 # Debug routine to dump current breakpoints...not normally called
17966 # We are given indexes to the current lines:
17967 # $ri_beg = ref to array of BEGinning indexes of each line
17968 # $ri_end = ref to array of ENDing indexes of each line
17969 my ( $self, $ri_beg, $ri_end, $msg ) = @_;
17970 print STDERR "----Dumping breakpoints from: $msg----\n";
17971 for my $n ( 0 .. @{$ri_end} - 1 ) {
17972 my $ibeg = $ri_beg->[$n];
17973 my $iend = $ri_end->[$n];
17974 my $text = EMPTY_STRING;
17975 foreach my $i ( $ibeg .. $iend ) {
17976 $text .= $tokens_to_go[$i];
17978 print STDERR "$n ($ibeg:$iend) $text\n";
17980 print STDERR "----\n";
17982 } ## end sub Debug_dump_breakpoints
17984 sub delete_one_line_semicolons {
17986 my ( $self, $ri_beg, $ri_end ) = @_;
17987 my $rLL = $self->[_rLL_];
17988 my $K_opening_container = $self->[_K_opening_container_];
17990 # Walk down the lines of this batch and delete any semicolons
17991 # terminating one-line blocks;
17992 my $nmax = @{$ri_end} - 1;
17994 foreach my $n ( 0 .. $nmax ) {
17995 my $i_beg = $ri_beg->[$n];
17996 my $i_e = $ri_end->[$n];
17997 my $K_beg = $K_to_go[$i_beg];
17998 my $K_e = $K_to_go[$i_e];
18000 my $type_end = $rLL->[$K_end]->[_TYPE_];
18001 if ( $type_end eq '#' ) {
18002 $K_end = $self->K_previous_nonblank($K_end);
18003 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
18006 # we are looking for a line ending in closing brace
18008 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
18010 # ...and preceded by a semicolon on the same line
18011 my $K_semicolon = $self->K_previous_nonblank($K_end);
18012 next unless defined($K_semicolon);
18013 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
18014 next if ( $i_semicolon <= $i_beg );
18015 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
18017 # Safety check - shouldn't happen - not critical
18018 # This is not worth throwing a Fault, except in DEVEL_MODE
18019 if ( $types_to_go[$i_semicolon] ne ';' ) {
18021 && Fault("unexpected type looking for semicolon");
18025 # ... with the corresponding opening brace on the same line
18026 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
18027 my $K_opening = $K_opening_container->{$type_sequence};
18028 next unless ( defined($K_opening) );
18029 my $i_opening = $i_beg + ( $K_opening - $K_beg );
18030 next if ( $i_opening < $i_beg );
18032 # ... and only one semicolon between these braces
18033 my $semicolon_count = 0;
18034 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
18035 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
18036 $semicolon_count++;
18040 next if ($semicolon_count);
18042 # ...ok, then make the semicolon invisible
18043 my $len = $token_lengths_to_go[$i_semicolon];
18044 $tokens_to_go[$i_semicolon] = EMPTY_STRING;
18045 $token_lengths_to_go[$i_semicolon] = 0;
18046 $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
18047 $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
18048 foreach ( $i_semicolon .. $max_index_to_go ) {
18049 $summed_lengths_to_go[ $_ + 1 ] -= $len;
18053 } ## end sub delete_one_line_semicolons
18055 use constant DEBUG_RECOMBINE => 0;
18057 sub recombine_breakpoints {
18059 my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
18061 # This sub implements the 'recombine' operation on a batch.
18062 # Its task is to combine some of these lines back together to
18063 # improve formatting. The need for this arises because
18064 # sub 'break_long_lines' is very liberal in setting line breaks
18065 # for long lines, always setting breaks at good breakpoints, even
18066 # when that creates small lines. Sometimes small line fragments
18067 # are produced which would look better if they were combined.
18069 # Input parameters:
18070 # $ri_beg = ref to array of BEGinning indexes of each line
18071 # $ri_end = ref to array of ENDing indexes of each line
18072 # $rbond_strength_to_go = array of bond strengths pulling
18073 # tokens together, used to decide where best to recombine lines.
18075 #-------------------------------------------------------------------
18076 # Do nothing under extreme stress; use <= 2 for c171.
18077 # (NOTE: New optimizations make this unnecessary. But removing this
18078 # check is not really useful because this condition only occurs in
18079 # test runs, and another formatting pass will fix things anyway.)
18080 # This routine has a long history of improvements. Some past
18081 # relevant issues are : c118, c167, c171, c186, c187, c193, c200.
18082 #-------------------------------------------------------------------
18083 return if ( $high_stress_level <= 2 );
18085 my $nmax_start = @{$ri_end} - 1;
18086 return if ( $nmax_start <= 0 );
18088 my $iend_max = $ri_end->[$nmax_start];
18089 if ( $types_to_go[$iend_max] eq '#' ) {
18090 $iend_max = iprev_to_go($iend_max);
18092 my $has_terminal_semicolon =
18093 $iend_max >= 0 && $types_to_go[$iend_max] eq ';';
18095 #--------------------------------------------------------------------
18096 # Break into the smallest possible sub-sections to improve efficiency
18097 #--------------------------------------------------------------------
18099 # Also make a list of all good joining tokens between the lines
18103 my $rsections = [];
18106 my $nmax_section = 0;
18107 foreach my $nn ( 1 .. $nmax_start ) {
18108 my $ibeg_1 = $ri_beg->[ $nn - 1 ];
18109 my $iend_1 = $ri_end->[ $nn - 1 ];
18110 my $iend_2 = $ri_end->[$nn];
18111 my $ibeg_2 = $ri_beg->[$nn];
18113 # Define certain good joint tokens
18114 my ( $itok, $itokp, $itokm );
18115 foreach my $itest ( $iend_1, $ibeg_2 ) {
18116 my $type = $types_to_go[$itest];
18117 if ( $is_math_op{$type}
18118 || $is_amp_amp{$type}
18119 || $is_assignment{$type}
18126 # joint[$nn] = index of joint character
18127 $joint[$nn] = $itok;
18129 # Update the section list
18130 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
18134 # The number 5 here is an arbitrary small number intended
18135 # to keep most small matches in one sub-section.
18136 || ( defined($nend_sec)
18137 && ( $nn < 5 || $nmax_start - $nn < 5 ) )
18143 if ( defined($nend_sec) ) {
18144 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
18145 my $num = $nend_sec - $nbeg_sec;
18146 if ( $num > $nmax_section ) { $nmax_section = $num }
18154 if ( defined($nend_sec) ) {
18155 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
18156 my $num = $nend_sec - $nbeg_sec;
18157 if ( $num > $nmax_section ) { $nmax_section = $num }
18160 my $num_sections = @{$rsections};
18162 if ( DEBUG_RECOMBINE > 1 ) {
18163 print STDERR <<EOM;
18164 sections=$num_sections; nmax_sec=$nmax_section
18168 if ( DEBUG_RECOMBINE > 0 ) {
18171 "-----\n$num_sections sections found for nmax=$nmax_start\n";
18172 foreach my $sect ( @{$rsections} ) {
18173 my ( $nbeg, $nend ) = @{$sect};
18174 my $num = $nend - $nbeg;
18175 if ( $num > $max ) { $max = $num }
18176 print STDERR "$nbeg $nend\n";
18178 print STDERR "max size=$max of $nmax_start lines\n";
18181 # Loop over all sub-sections. Note that we have to work backwards
18182 # from the end of the batch since the sections use original line
18183 # numbers, and the line numbers change as we go.
18184 while ( my $section = pop @{$rsections} ) {
18185 my ( $nbeg, $nend ) = @{$section};
18186 $self->recombine_section_loop(
18188 _ri_beg => $ri_beg,
18189 _ri_end => $ri_end,
18192 _rjoint => \@joint,
18193 _rbond_strength_to_go => $rbond_strength_to_go,
18194 _has_terminal_semicolon => $has_terminal_semicolon,
18200 } ## end sub recombine_breakpoints
18202 sub recombine_section_loop {
18203 my ( $self, $rhash ) = @_;
18205 # Recombine breakpoints for one section of lines in the current batch
18208 # $ri_beg, $ri_end = ref to arrays with token indexes of the first
18210 # $nbeg, $nend = line numbers bounding this section
18211 # $rjoint = ref to array of good joining tokens per line
18213 # Update: $ri_beg, $ri_end, $rjoint if lines are joined
18223 # _ri_beg = ref to array with starting token index by line
18224 # _ri_end = ref to array with ending token index by line
18225 # _nbeg = first line number of this section
18226 # _nend = last line number of this section
18227 # _rjoint = ref to array of good joining tokens for each line
18228 # _rbond_strength_to_go = array of bond strengths
18229 # _has_terminal_semicolon = true if last line of batch has ';'
18231 # _num_freeze = fixed number of lines at end of this batch
18232 # _optimization_on = true during final optimization loop
18233 # _num_compares = total number of line compares made so far
18234 # _pair_list = list of line pairs in optimal search order
18238 my $ri_beg = $rhash->{_ri_beg};
18239 my $ri_end = $rhash->{_ri_end};
18241 # Line index range of this section:
18242 my $nbeg = $rhash->{_nbeg}; # stays constant
18243 my $nend = $rhash->{_nend}; # will decrease
18245 # $nmax_batch = starting number of lines in the full batch
18246 # $num_freeze = number of lines following this section to leave alone
18247 my $nmax_batch = @{$ri_end} - 1;
18248 $rhash->{_num_freeze} = $nmax_batch - $nend;
18250 # Setup the list of line pairs to test. This stores the following
18251 # values for each line pair:
18252 # [ $n=index of the second line of the pair, $bs=bond strength]
18254 my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
18255 foreach my $n ( $nbeg + 1 .. $nend ) {
18256 my $iend_1 = $ri_end->[ $n - 1 ];
18257 my $ibeg_2 = $ri_beg->[$n];
18259 if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 }
18260 my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
18261 push @pair_list, [ $n, $bs ];
18264 # Any order for testing is possible, but optimization is only possible
18265 # if we sort the line pairs on decreasing joint strength.
18267 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list;
18268 $rhash->{_rpair_list} = \@pair_list;
18274 # This was originally an O(n-squared) loop which required a check on
18275 # the maximum number of iterations for safety. It is now a very fast
18276 # loop which runs in O(n) time, but a check on total number of
18277 # iterations is retained to guard against future programming errors.
18279 # Most cases require roughly 1 comparison per line pair (1 full pass).
18280 # The upper bound is estimated to be about 3 comparisons per line pair
18281 # unless optimization is deactivated. The approximate breakdown is:
18282 # 1 pass with 1 compare per joint to do any special cases, plus
18283 # 1 pass with up to 2 compares per joint in optimization mode
18284 # The most extreme cases in my collection are:
18285 # camel1.t - needs 2.7 compares per line (12 without optimization)
18286 # ternary.t - needs 2.8 compares per line (12 without optimization)
18287 # So a value of MAX_COMPARE_RATIO = 3 looks like an upper bound as
18288 # long as optimization is used. A value of 20 should allow all code to
18289 # pass even if optimization is turned off for testing.
18291 # The OPTIMIZE_OK flag should be true except for testing.
18292 use constant MAX_COMPARE_RATIO => 20;
18293 use constant OPTIMIZE_OK => 1;
18295 my $num_pairs = $nend - $nbeg + 1;
18296 my $max_compares = MAX_COMPARE_RATIO * $num_pairs;
18298 # Always start with optimization off
18299 $rhash->{_num_compares} = 0;
18300 $rhash->{_optimization_on} = 0;
18301 $rhash->{_ix_best_last} = 0;
18303 #--------------------------------------------
18304 # loop until there are no more recombinations
18305 #--------------------------------------------
18306 my $nmax_last = $nmax_batch + 1;
18309 # Stop when the number of lines in the batch does not decrease
18310 $nmax_batch = @{$ri_end} - 1;
18311 if ( $nmax_batch >= $nmax_last ) {
18314 $nmax_last = $nmax_batch;
18316 #-----------------------------------------
18317 # inner loop to find next best combination
18318 #-----------------------------------------
18319 $self->recombine_inner_loop($rhash);
18321 # Iteration limit check:
18322 if ( $rhash->{_num_compares} > $max_compares ) {
18324 # See note above; should only get here on a programming error
18326 my $ibeg = $ri_beg->[$nbeg];
18327 my $Kbeg = $K_to_go[$ibeg];
18328 my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_];
18330 inner loop passes =$rhash->{_num_compares} exceeds max=$max_compares, near line $lno
18336 } ## end iteration loop
18338 if (DEBUG_RECOMBINE) {
18339 my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs;
18341 "exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n";
18345 } ## end sub recombine_section_loop
18347 sub recombine_inner_loop {
18348 my ( $self, $rhash ) = @_;
18350 # This is the inner loop of the recombine operation. We look at all of
18351 # the remaining joints in this section and select the best joint to be
18352 # recombined. If a recombination is made, the number of lines
18353 # in this section will be reduced by one.
18357 my $rK_weld_right = $self->[_rK_weld_right_];
18358 my $rK_weld_left = $self->[_rK_weld_left_];
18360 my $ri_beg = $rhash->{_ri_beg};
18361 my $ri_end = $rhash->{_ri_end};
18362 my $nbeg = $rhash->{_nbeg};
18363 my $rjoint = $rhash->{_rjoint};
18364 my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
18365 my $rpair_list = $rhash->{_rpair_list};
18367 # This will remember the best joint:
18373 # The range of lines in this group is $nbeg to $nstop
18374 my $nmax = @{$ri_end} - 1;
18375 my $nstop = $nmax - $rhash->{_num_freeze};
18376 my $num_joints = $nstop - $nbeg;
18378 # Turn off optimization if just two joints remain to allow
18379 # special two-line logic to be checked (c193)
18380 if ( $rhash->{_optimization_on} && $num_joints <= 2 ) {
18381 $rhash->{_optimization_on} = 0;
18384 # Start where we ended the last search
18385 my $ix_start = $rhash->{_ix_best_last};
18387 # Keep the starting index in bounds
18388 $ix_start = max( 0, $ix_start );
18390 # Make a search order list which cycles around to visit
18392 my $ix_max = @{$rpair_list} - 1;
18393 my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 );
18394 my $ix_last = $ix_list[-1];
18396 #-------------------------
18397 # loop over all line pairs
18398 #-------------------------
18399 my $incomplete_loop;
18400 foreach my $ix (@ix_list) {
18401 my $item = $rpair_list->[$ix];
18402 my ( $n, $bs ) = @{$item};
18404 # This flag will be true if we 'last' out of this loop early.
18405 # We cannot turn on optimization if this is true.
18406 $incomplete_loop = $ix != $ix_last;
18408 # Update the count of the number of times through this inner loop
18409 $rhash->{_num_compares}++;
18411 #----------------------------------------------------------
18412 # If we join the current pair of lines,
18413 # line $n-1 will become the left part of the joined line
18414 # line $n will become the right part of the joined line
18416 # Here are Indexes of the endpoint tokens of the two lines:
18418 # -----line $n-1--- | -----line $n-----
18419 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
18422 # We want to decide if we should remove the line break
18423 # between the tokens at $iend_1 and $ibeg_2
18425 # We will apply a number of ad-hoc tests to see if joining
18426 # here will look ok. The code will just move to the next
18427 # pair if the join doesn't look good. If we get through
18428 # the gauntlet of tests, the lines will be recombined.
18429 #----------------------------------------------------------
18431 # beginning and ending tokens of the lines we are working on
18432 my $ibeg_1 = $ri_beg->[ $n - 1 ];
18433 my $iend_1 = $ri_end->[ $n - 1 ];
18434 my $iend_2 = $ri_end->[$n];
18435 my $ibeg_2 = $ri_beg->[$n];
18437 # The combined line cannot be too long
18438 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
18439 next if ( $excess > 0 );
18441 my $type_iend_1 = $types_to_go[$iend_1];
18442 my $type_iend_2 = $types_to_go[$iend_2];
18443 my $type_ibeg_1 = $types_to_go[$ibeg_1];
18444 my $type_ibeg_2 = $types_to_go[$ibeg_2];
18446 DEBUG_RECOMBINE > 1 && do {
18448 "RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
18451 # If line $n is the last line, we set some flags and
18452 # do any special checks for it
18453 my $this_line_is_semicolon_terminated;
18454 if ( $n == $nmax ) {
18456 if ( $type_ibeg_2 eq '{' ) {
18458 # join isolated ')' and '{' if requested (git #110)
18459 if ( $rOpts_cuddled_paren_brace
18460 && $type_iend_1 eq '}'
18461 && $iend_1 == $ibeg_1
18462 && $ibeg_2 == $iend_2 )
18464 if ( $tokens_to_go[$iend_1] eq ')'
18465 && $tokens_to_go[$ibeg_2] eq '{' )
18473 # otherwise, a terminal '{' should stay where it is
18474 # unless preceded by a fat comma
18475 next if ( $type_iend_1 ne '=>' );
18478 $this_line_is_semicolon_terminated =
18479 $rhash->{_has_terminal_semicolon};
18483 #----------------------------------------------------------
18484 # Recombine Section 0:
18485 # Examine the special token joining this line pair, if any.
18486 # Put as many tests in this section to avoid duplicate code
18487 # and to make formatting independent of whether breaks are
18488 # to the left or right of an operator.
18489 #----------------------------------------------------------
18491 my $itok = $rjoint->[$n];
18493 my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n );
18494 next if ( !$ok_0 );
18497 #----------------------------------------------------------
18498 # Recombine Section 1:
18499 # Join welded nested containers immediately
18500 #----------------------------------------------------------
18504 && ( $type_sequence_to_go[$iend_1]
18505 && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
18506 || $type_sequence_to_go[$ibeg_2]
18507 && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
18515 #----------------------------------------------------------
18516 # Recombine Section 2:
18517 # Examine token at $iend_1 (right end of first line of pair)
18518 #----------------------------------------------------------
18520 my ( $ok_2, $skip_Section_3 ) =
18521 recombine_section_2( $ri_beg, $ri_end, $n,
18522 $this_line_is_semicolon_terminated );
18523 next if ( !$ok_2 );
18525 #----------------------------------------------------------
18526 # Recombine Section 3:
18527 # Examine token at $ibeg_2 (left end of second line of pair)
18528 #----------------------------------------------------------
18530 # Join lines identified above as capable of
18531 # causing an outdented line with leading closing paren.
18532 # Note that we are skipping the rest of this section
18533 # and the rest of the loop to do the join.
18534 if ($skip_Section_3) {
18535 $forced_breakpoint_to_go[$iend_1] = 0;
18538 $incomplete_loop = 1;
18542 my ( $ok_3, $bs_tweak ) =
18543 recombine_section_3( $ri_beg, $ri_end, $n,
18544 $this_line_is_semicolon_terminated );
18545 next if ( !$ok_3 );
18547 #----------------------------------------------------------
18548 # Recombine Section 4:
18549 # Combine the lines if we arrive here and it is possible
18550 #----------------------------------------------------------
18552 # honor hard breakpoints
18553 next if ( $forced_breakpoint_to_go[$iend_1] );
18557 # This fault can only occur if an array index error has been
18558 # introduced by a recent programming change.
18559 my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
18560 if ( $bs_check != $bs ) {
18562 bs=$bs != $bs_check for break after type $type_iend_1 ix=$ix n=$n
18567 # Require a few extra spaces before recombining lines if we
18568 # are at an old breakpoint unless this is a simple list or
18569 # terminal line. The goal is to avoid oscillating between
18570 # two quasi-stable end states. For example this snippet
18575 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
18579 if ( $old_breakpoint_to_go[$iend_1]
18580 && !$this_line_is_semicolon_terminated
18583 && $type_iend_2 ne ',' );
18585 # do not recombine if we would skip in indentation levels
18586 if ( $n < $nmax ) {
18587 my $if_next = $ri_beg->[ $n + 1 ];
18590 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
18591 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
18593 # but an isolated 'if (' is undesirable
18596 && $iend_1 - $ibeg_1 <= 2
18597 && $type_ibeg_1 eq 'k'
18598 && $tokens_to_go[$ibeg_1] eq 'if'
18599 && $tokens_to_go[$iend_1] ne '('
18604 ## OLD: honor no-break's
18605 ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
18607 # remember the pair with the greatest bond strength
18610 # First good joint ...
18616 # In optimization mode: stop on the first acceptable joint
18617 # because we already know it has the highest strength
18618 if ( $rhash->{_optimization_on} == 1 ) {
18624 # Second and later joints ..
18627 # save maximum strength; in case of a tie select min $n
18628 if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) {
18635 } ## end loop over all line pairs
18637 #---------------------------------------------------
18638 # recombine the pair with the greatest bond strength
18639 #---------------------------------------------------
18641 DEBUG_RECOMBINE > 1
18642 && print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n";
18643 splice @{$ri_beg}, $n_best, 1;
18644 splice @{$ri_end}, $n_best - 1, 1;
18645 splice @{$rjoint}, $n_best, 1;
18647 splice @{$rpair_list}, $ix_best, 1;
18649 # Update the line indexes in the pair list:
18650 # Old $n values greater than the best $n decrease by 1
18651 # because of the splice we just did.
18652 foreach my $item ( @{$rpair_list} ) {
18653 my $n_old = $item->[0];
18654 if ( $n_old > $n_best ) { $item->[0] -= 1 }
18657 # Store the index of this location for starting the next search.
18658 # We must subtract 1 to get an updated index because the splice
18659 # above just removed the best pair.
18660 # BUT CAUTION: if this is the first pair in the pair list, then
18661 # this produces an invalid index. So this index must be tested
18662 # before use in the next pass through the outer loop.
18663 $rhash->{_ix_best_last} = $ix_best - 1;
18665 # Turn on optimization if ...
18668 # it is not already on, and
18669 !$rhash->{_optimization_on}
18671 # we have not taken a shortcut to get here, and
18672 && !$incomplete_loop
18674 # we have seen a good break on strength, and
18677 # we are allowed to optimize
18682 $rhash->{_optimization_on} = 1;
18683 if (DEBUG_RECOMBINE) {
18684 my $num_compares = $rhash->{_num_compares};
18685 my $pair_count = @ix_list;
18687 "Entering optimization phase at $num_compares compares, pair count = $pair_count\n";
18692 } ## end sub recombine_inner_loop
18694 sub recombine_section_0 {
18695 my ( $itok, $ri_beg, $ri_end, $n ) = @_;
18697 # Recombine Section 0:
18698 # Examine special candidate joining token $itok
18701 # $itok = index of token at a possible join of lines $n-1 and $n
18704 # true => ok to combine
18705 # false => do not combine lines
18707 # Here are Indexes of the endpoint tokens of the two lines:
18709 # -----line $n-1--- | -----line $n-----
18710 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
18713 # ------------$itok is one of these tokens
18715 # Put as many tests in this section to avoid duplicate code
18716 # and to make formatting independent of whether breaks are
18717 # to the left or right of an operator.
18719 my $nmax = @{$ri_end} - 1;
18720 my $ibeg_1 = $ri_beg->[ $n - 1 ];
18721 my $iend_1 = $ri_end->[ $n - 1 ];
18722 my $ibeg_2 = $ri_beg->[$n];
18723 my $iend_2 = $ri_end->[$n];
18727 my $type = $types_to_go[$itok];
18729 if ( $type eq ':' ) {
18731 # do not join at a colon unless it disobeys the
18733 if ( $itok eq $iend_1 ) {
18734 return unless $want_break_before{$type};
18737 return if $want_break_before{$type};
18741 # handle math operators + - * /
18742 elsif ( $is_math_op{$type} ) {
18744 # Combine these lines if this line is a single
18745 # number, or if it is a short term with same
18746 # operator as the previous line. For example, in
18747 # the following code we will combine all of the
18748 # short terms $A, $B, $C, $D, $E, $F, together
18749 # instead of leaving them one per line:
18751 # $A * $B * $C * $D * $E * $F *
18752 # ( 2. * $eps * $sigma * $area ) *
18753 # ( 1. / $tcold**3 - 1. / $thot**3 );
18755 # This can be important in math-intensive code.
18759 my $itokp = min( $inext_to_go[$itok], $iend_2 );
18760 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
18761 my $itokm = max( iprev_to_go($itok), $ibeg_1 );
18762 my $itokmm = max( iprev_to_go($itokm), $ibeg_1 );
18764 # check for a number on the right
18765 if ( $types_to_go[$itokp] eq 'n' ) {
18767 # ok if nothing else on right
18768 if ( $itokp == $iend_2 ) {
18773 # look one more token to right..
18774 # okay if math operator or some termination
18776 ( ( $itokpp == $iend_2 )
18777 && $is_math_op{ $types_to_go[$itokpp] } )
18778 || $types_to_go[$itokpp] =~ /^[#,;]$/;
18782 # check for a number on the left
18783 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
18785 # okay if nothing else to left
18786 if ( $itokm == $ibeg_1 ) {
18790 # otherwise look one more token to left
18793 # okay if math operator, comma, or assignment
18794 $good_combo = ( $itokmm == $ibeg_1 )
18795 && ( $is_math_op{ $types_to_go[$itokmm] }
18796 || $types_to_go[$itokmm] =~ /^[,]$/
18797 || $is_assignment{ $types_to_go[$itokmm] } );
18801 # look for a single short token either side of the
18803 if ( !$good_combo ) {
18805 # Slight adjustment factor to make results
18806 # independent of break before or after operator
18807 # in long summed lists. (An operator and a
18808 # space make two spaces).
18809 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
18813 # numbers or id's on both sides of this joint
18814 $types_to_go[$itokp] =~ /^[in]$/
18815 && $types_to_go[$itokm] =~ /^[in]$/
18817 # one of the two lines must be short:
18820 # no more than 2 nonblank tokens right
18825 && token_sequence_length( $itokp, $iend_2 ) <
18826 $two + $rOpts_short_concatenation_item_length
18829 # no more than 2 nonblank tokens left of
18834 && token_sequence_length( $ibeg_1, $itokm ) <
18835 2 - $two + $rOpts_short_concatenation_item_length
18840 # keep pure terms; don't mix +- with */
18842 $is_plus_minus{$type}
18843 && ( $is_mult_div{ $types_to_go[$itokmm] }
18844 || $is_mult_div{ $types_to_go[$itokpp] } )
18847 $is_mult_div{$type}
18848 && ( $is_plus_minus{ $types_to_go[$itokmm] }
18849 || $is_plus_minus{ $types_to_go[$itokpp] } )
18855 # it is also good to combine if we can reduce to 2
18857 if ( !$good_combo ) {
18859 # index on other line where same token would be
18861 my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
18866 && $types_to_go[$iother] ne $type;
18869 return unless ($good_combo);
18873 elsif ( $is_amp_amp{$type} ) {
18877 elsif ( $is_assignment{$type} ) {
18879 } ## end assignment
18882 # ok to combine lines
18884 } ## end sub recombine_section_0
18886 sub recombine_section_2 {
18888 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
18890 # Recombine Section 2:
18891 # Examine token at $iend_1 (right end of first line of pair)
18893 # Here are Indexes of the endpoint tokens of the two lines:
18895 # -----line $n-1--- | -----line $n-----
18896 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
18899 # -----Section 2 looks at this token
18902 # (nothing) => do not join lines
18903 # 1, skip_Section_3 => ok to join lines
18905 # $skip_Section_3 is a flag for skipping the next section
18906 my $skip_Section_3 = 0;
18908 my $nmax = @{$ri_end} - 1;
18909 my $ibeg_1 = $ri_beg->[ $n - 1 ];
18910 my $iend_1 = $ri_end->[ $n - 1 ];
18911 my $iend_2 = $ri_end->[$n];
18912 my $ibeg_2 = $ri_beg->[$n];
18913 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
18914 my $ibeg_nmax = $ri_beg->[$nmax];
18916 my $type_iend_1 = $types_to_go[$iend_1];
18917 my $type_iend_2 = $types_to_go[$iend_2];
18918 my $type_ibeg_1 = $types_to_go[$ibeg_1];
18919 my $type_ibeg_2 = $types_to_go[$ibeg_2];
18921 # an isolated '}' may join with a ';' terminated segment
18922 if ( $type_iend_1 eq '}' ) {
18924 # Check for cases where combining a semicolon terminated
18925 # statement with a previous isolated closing paren will
18926 # allow the combined line to be outdented. This is
18927 # generally a good move. For example, we can join up
18928 # the last two lines here:
18930 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
18931 # $size, $atime, $mtime, $ctime, $blksize, $blocks
18937 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
18938 # $size, $atime, $mtime, $ctime, $blksize, $blocks
18941 # which makes the parens line up.
18943 # Another example, from Joe Matarazzo, probably looks best
18944 # with the 'or' clause appended to the trailing paren:
18945 # $self->some_method(
18948 # ) or die "Some_method didn't work";
18950 # But we do not want to do this for something like the -lp
18951 # option where the paren is not outdentable because the
18952 # trailing clause will be far to the right.
18954 # The logic here is synchronized with the logic in sub
18955 # sub get_final_indentation, which actually does
18958 my $combine_ok = $this_line_is_semicolon_terminated
18960 # only one token on last line
18961 && $ibeg_1 == $iend_1
18963 # must be structural paren
18964 && $tokens_to_go[$iend_1] eq ')'
18966 # style must allow outdenting,
18967 && !$closing_token_indentation{')'}
18969 # but leading colons probably line up with a
18970 # previous colon or question (count could be wrong).
18971 && $type_ibeg_2 ne ':'
18973 # only one step in depth allowed. this line must not
18974 # begin with a ')' itself.
18975 && ( $nesting_depth_to_go[$iend_1] ==
18976 $nesting_depth_to_go[$iend_2] + 1 );
18978 # But only combine leading '&&', '||', if no previous && || :
18979 # seen. This count includes these tokens at all levels. The
18980 # idea is that seeing these at any level can make it hard to read
18981 # formatting if we recombine.
18982 if ( $is_amp_amp{$type_ibeg_2} ) {
18983 foreach my $n_t ( reverse( 0 .. $n - 2 ) ) {
18984 my $ibeg_t = $ri_beg->[$n_t];
18985 my $type_t = $types_to_go[$ibeg_t];
18986 if ( $is_amp_amp{$type_t} || $type_t eq ':' ) {
18993 $skip_Section_3 ||= $combine_ok;
18995 # YVES patch 2 of 2:
18996 # Allow cuddled eval chains, like this:
19003 # This patch works together with a patch in
19004 # setting adjusted indentation (where the closing eval
19005 # brace is outdented if possible).
19006 # The problem is that an 'eval' block has continuation
19007 # indentation and it looks better to undo it in some
19008 # cases. If we do not use this patch we would get:
19016 # The alternative, for uncuddled style, is to create
19017 # a patch in get_final_indentation which undoes
19018 # the indentation of a leading line like 'or do {'.
19019 # This doesn't work well with -icb through
19021 $block_type_to_go[$iend_1]
19022 && $rOpts_brace_follower_vertical_tightness > 0
19025 # -bfvt=1, allow cuddled eval chains [default]
19027 $tokens_to_go[$iend_2] eq '{'
19028 && $block_type_to_go[$iend_1] eq 'eval'
19029 && !ref( $leading_spaces_to_go[$iend_1] )
19030 && !$rOpts_indent_closing_brace
19033 # -bfvt=2, allow most brace followers [part of git #110]
19034 || ( $rOpts_brace_follower_vertical_tightness > 1
19035 && $ibeg_1 == $iend_1 )
19040 ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
19041 || ( $type_ibeg_2 eq 'k'
19042 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
19043 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
19047 $skip_Section_3 ||= 1;
19054 # handle '.' and '?' specially below
19055 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
19057 # fix for c054 (unusual -pbp case)
19058 || $type_ibeg_2 eq '=='
19063 elsif ( $type_iend_1 eq '{' ) {
19066 # honor breaks at opening brace
19067 # Added to prevent recombining something like this:
19068 # } || eval { package main;
19069 return if ( $forced_breakpoint_to_go[$iend_1] );
19072 # do not recombine lines with ending &&, ||,
19073 elsif ( $is_amp_amp{$type_iend_1} ) {
19074 return unless ( $want_break_before{$type_iend_1} );
19077 # Identify and recombine a broken ?/: chain
19078 elsif ( $type_iend_1 eq '?' ) {
19080 # Do not recombine different levels
19082 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
19084 # do not recombine unless next line ends in :
19085 return unless $type_iend_2 eq ':';
19088 # for lines ending in a comma...
19089 elsif ( $type_iend_1 eq ',' ) {
19091 # Do not recombine at comma which is following the
19093 # NOTE: this could be controlled by a special flag,
19094 # but it seems to work okay.
19095 return if ( $old_breakpoint_to_go[$iend_1] );
19097 # An isolated '},' may join with an identifier + ';'
19098 # This is useful for the class of a 'bless' statement
19100 if ( $type_ibeg_1 eq '}'
19101 && $type_ibeg_2 eq 'i' )
19104 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
19105 && ( $iend_2 == ( $ibeg_2 + 1 ) )
19106 && $this_line_is_semicolon_terminated );
19108 # override breakpoint
19109 $forced_breakpoint_to_go[$iend_1] = 0;
19115 # do not recombine after a comma unless this will
19116 # leave just 1 more line
19117 return unless ( $n + 1 >= $nmax );
19119 # do not recombine if there is a change in
19120 # indentation depth
19122 if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
19124 # do not recombine a "complex expression" after a
19125 # comma. "complex" means no parens.
19127 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
19128 if ( $tokens_to_go[$ii] eq '(' ) {
19133 return if $saw_paren;
19138 elsif ( $type_iend_1 eq '(' ) {
19140 # No longer doing this
19143 elsif ( $type_iend_1 eq ')' ) {
19145 # No longer doing this
19148 # keep a terminal for-semicolon
19149 elsif ( $type_iend_1 eq 'f' ) {
19153 # if '=' at end of line ...
19154 elsif ( $is_assignment{$type_iend_1} ) {
19156 # keep break after = if it was in input stream
19157 # this helps prevent 'blinkers'
19160 $old_breakpoint_to_go[$iend_1]
19162 # don't strand an isolated '='
19163 && $iend_1 != $ibeg_1
19166 my $is_short_quote =
19167 ( $type_ibeg_2 eq 'Q'
19168 && $ibeg_2 == $iend_2
19169 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
19170 $rOpts_short_concatenation_item_length );
19172 $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
19173 && $types_to_go[$ibeg_3] eq ':' )
19176 # always join an isolated '=', a short quote, or if this
19177 # will put ?/: at start of adjacent lines
19178 if ( $ibeg_1 != $iend_1
19179 && !$is_short_quote
19186 # unless we can reduce this to two lines
19189 # or three lines, the last with a leading
19191 || ( $nmax == $n + 2
19192 && $types_to_go[$ibeg_nmax] eq ';' )
19194 # or the next line ends with a here doc
19195 || $type_iend_2 eq 'h'
19197 # or the next line ends in an open paren or
19198 # brace and the break hasn't been forced
19200 || ( !$forced_breakpoint_to_go[$iend_1]
19201 && $type_iend_2 eq '{' )
19204 # do not recombine if the two lines might align
19205 # well this is a very approximate test for this
19208 # RT#127633 - the leading tokens are not
19210 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
19212 # or they are different
19214 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
19220 # Recombine if we can make two lines
19223 # -lp users often prefer this:
19224 # my $title = function($env, $env, $sysarea,
19225 # "bubba Borrower Entry");
19226 # so we will recombine if -lp is used we have
19230 && ref( $leading_spaces_to_go[$ibeg_3] )
19231 && $type_iend_2 eq ','
19236 # otherwise, scan the rhs line up to last token for
19237 # complexity. Note that we are not counting the last token
19238 # in case it is an opening paren.
19239 my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 );
19240 return if ( !$ok );
19245 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
19246 $forced_breakpoint_to_go[$iend_1] = 0;
19251 elsif ( $type_iend_1 eq 'k' ) {
19253 # make major control keywords stand out
19258 #/^(last|next|redo|return)$/
19259 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
19261 # but only if followed by multiple lines
19265 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
19267 unless $want_break_before{ $tokens_to_go[$iend_1] };
19270 elsif ( $type_iend_1 eq '.' ) {
19272 # NOTE: the logic here should match that of section 3 so that
19273 # line breaks are independent of choice of break before or after.
19274 # It would be nice to combine them in section 0, but the
19275 # special junction case ') .' makes that difficult.
19276 # This section added to fix issues c172, c174.
19277 my $i_next_nonblank = $ibeg_2;
19278 my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
19279 $summed_lengths_to_go[$ibeg_1];
19280 my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
19281 $summed_lengths_to_go[$ibeg_2];
19282 my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) );
19287 # ... unless there is just one and we can reduce
19288 # this to two lines if we do. For example, this
19292 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
19294 # looks better than this:
19295 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' .
19298 # check for 2 lines, not in a long broken '.' chain
19299 ( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 )
19301 # ... or this would strand a short quote , like this
19302 # "some long quote" .
19305 $types_to_go[$i_next_nonblank] eq 'Q'
19306 && $i_next_nonblank >= $iend_2 - 2
19307 && $token_lengths_to_go[$i_next_nonblank] <
19308 $rOpts_short_concatenation_item_length
19310 # additional constraints to fix c167
19311 && ( $types_to_go[$iend_1_minus] ne 'Q'
19312 || $summed_len_2 < $summed_len_1 )
19316 return ( 1, $skip_Section_3 );
19317 } ## end sub recombine_section_2
19321 my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_;
19323 # Scan line ibeg_2 to $iend_2 up to last token for complexity.
19324 # We are not counting the last token in case it is an opening paren.
19326 # true if rhs is simple, ok to recombine
19330 my $depth = $nesting_depth_to_go[$ibeg_2];
19331 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
19332 if ( $nesting_depth_to_go[$i] != $depth ) {
19334 last if ( $tv > 1 );
19336 $depth = $nesting_depth_to_go[$i];
19339 # ok to recombine if no level changes before
19343 # otherwise, do not recombine if more than
19344 # two level changes.
19345 return if ( $tv > 1 );
19347 # check total complexity of the two
19348 # adjacent lines that will occur if we do
19352 ? $ri_end->[ $n + 1 ]
19354 foreach my $i ( $iend_2 .. $istop ) {
19355 if ( $nesting_depth_to_go[$i] != $depth ) {
19357 last if ( $tv > 2 );
19359 $depth = $nesting_depth_to_go[$i];
19362 # do not recombine if total is more than 2
19364 return if ( $tv > 2 );
19367 } ## end sub simple_rhs
19369 sub recombine_section_3 {
19371 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
19373 # Recombine Section 3:
19374 # Examine token at $ibeg_2 (right end of first line of pair)
19376 # Here are Indexes of the endpoint tokens of the two lines:
19378 # -----line $n-1--- | -----line $n-----
19379 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
19382 # -----Section 3 looks at this token
19385 # (nothing) => do not join lines
19386 # 1, bs_tweak => ok to join lines
19388 # $bstweak is a small tolerance to add to bond strengths
19391 my $nmax = @{$ri_end} - 1;
19392 my $ibeg_1 = $ri_beg->[ $n - 1 ];
19393 my $iend_1 = $ri_end->[ $n - 1 ];
19394 my $iend_2 = $ri_end->[$n];
19395 my $ibeg_2 = $ri_beg->[$n];
19397 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
19398 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
19399 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
19400 my $ibeg_nmax = $ri_beg->[$nmax];
19402 my $type_iend_1 = $types_to_go[$iend_1];
19403 my $type_iend_2 = $types_to_go[$iend_2];
19404 my $type_ibeg_1 = $types_to_go[$ibeg_1];
19405 my $type_ibeg_2 = $types_to_go[$ibeg_2];
19407 # handle lines with leading &&, ||
19408 if ( $is_amp_amp{$type_ibeg_2} ) {
19410 # ok to recombine if it follows a ? or :
19411 # and is followed by an open paren..
19413 ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' )
19415 # or is followed by a ? or : at same depth
19417 # We are looking for something like this. We can
19418 # recombine the && line with the line above to make the
19419 # structure more clear:
19421 # exists $G->{Attr}->{V}
19422 # && exists $G->{Attr}->{V}->{$u}
19423 # ? %{ $G->{Attr}->{V}->{$u} }
19426 # We should probably leave something like this alone:
19428 # exists $G->{Attr}->{E}
19429 # && exists $G->{Attr}->{E}->{$u}
19430 # && exists $G->{Attr}->{E}->{$u}->{$v}
19431 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
19433 # so that we either have all of the &&'s (or ||'s)
19434 # on one line, as in the first example, or break at
19435 # each one as in the second example. However, it
19436 # sometimes makes things worse to check for this because
19437 # it prevents multiple recombinations. So this is not done.
19439 && $is_ternary{ $types_to_go[$ibeg_3] }
19440 && $nesting_depth_to_go[$ibeg_3] ==
19441 $nesting_depth_to_go[$ibeg_2] );
19443 # Combine a trailing && term with an || term: fix for
19444 # c060 This is rare but can happen.
19447 && $type_ibeg_2 eq '&&'
19448 && $type_ibeg_1 eq '||'
19449 && $nesting_depth_to_go[$ibeg_2] ==
19450 $nesting_depth_to_go[$ibeg_1] );
19452 return if !$ok && $want_break_before{$type_ibeg_2};
19453 $forced_breakpoint_to_go[$iend_1] = 0;
19455 # tweak the bond strength to give this joint priority
19460 # Identify and recombine a broken ?/: chain
19461 elsif ( $type_ibeg_2 eq '?' ) {
19463 # Do not recombine different levels
19464 my $lev = $levels_to_go[$ibeg_2];
19465 return if ( $lev ne $levels_to_go[$ibeg_1] );
19467 # Do not recombine a '?' if either next line or
19468 # previous line does not start with a ':'. The reasons
19469 # are that (1) no alignment of the ? will be possible
19470 # and (2) the expression is somewhat complex, so the
19471 # '?' is harder to see in the interior of the line.
19472 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
19473 my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
19474 return unless ( $follows_colon || $precedes_colon );
19476 # we will always combining a ? line following a : line
19477 if ( !$follows_colon ) {
19479 # ...otherwise recombine only if it looks like a
19480 # chain. we will just look at a few nearby lines
19481 # to see if this looks like a chain.
19482 my $local_count = 0;
19483 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
19486 && $types_to_go[$ii] eq ':'
19487 && $levels_to_go[$ii] == $lev;
19489 return unless ( $local_count > 1 );
19491 $forced_breakpoint_to_go[$iend_1] = 0;
19494 # do not recombine lines with leading '.'
19495 elsif ( $type_ibeg_2 eq '.' ) {
19496 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
19497 my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
19498 $summed_lengths_to_go[$ibeg_1];
19499 my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
19500 $summed_lengths_to_go[$ibeg_2];
19505 # ... unless there is just one and we can reduce
19506 # this to two lines if we do. For example, this
19510 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
19512 # looks better than this:
19513 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
19514 # . '$args .= $pat;'
19516 ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 )
19518 # ... or this would strand a short quote , like this
19519 # . "some long quote"
19522 $types_to_go[$i_next_nonblank] eq 'Q'
19523 && $i_next_nonblank >= $iend_2 - 1
19524 && $token_lengths_to_go[$i_next_nonblank] <
19525 $rOpts_short_concatenation_item_length
19527 # additional constraints to fix c167
19529 $types_to_go[$iend_1] ne 'Q'
19531 # allow a term shorter than the previous term
19532 || $summed_len_2 < $summed_len_1
19534 # or allow a short semicolon-terminated term if this
19535 # makes two lines (see c169)
19538 && $this_line_is_semicolon_terminated )
19544 # handle leading keyword..
19545 elsif ( $type_ibeg_2 eq 'k' ) {
19547 # handle leading "or"
19548 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
19551 $this_line_is_semicolon_terminated
19553 $type_ibeg_1 eq '}'
19556 # following 'if' or 'unless' or 'or'
19557 $type_ibeg_1 eq 'k'
19558 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
19560 # important: only combine a very simple
19561 # or statement because the step below
19562 # may have combined a trailing 'and'
19563 # with this or, and we do not want to
19564 # then combine everything together
19565 && ( $iend_2 - $ibeg_2 <= 7 )
19571 $forced_breakpoint_to_go[$iend_1] = 0
19572 unless ( $old_breakpoint_to_go[$iend_1] );
19575 # handle leading 'and' and 'xor'
19576 elsif ($tokens_to_go[$ibeg_2] eq 'and'
19577 || $tokens_to_go[$ibeg_2] eq 'xor' )
19580 # Decide if we will combine a single terminal 'and'
19581 # after an 'if' or 'unless'.
19583 # This looks best with the 'and' on the same
19584 # line as the 'if':
19587 # if $seconds and $nu < 2;
19589 # But this looks better as shown:
19592 # if !$this->{Parents}{$_}
19593 # or $this->{Parents}{$_} eq $_;
19597 $this_line_is_semicolon_terminated
19600 # following 'if' or 'unless' or 'or'
19601 $type_ibeg_1 eq 'k'
19602 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
19603 || $tokens_to_go[$ibeg_1] eq 'or' )
19608 # handle leading "if" and "unless"
19609 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
19611 # Combine something like:
19613 # if ( $lang !~ /${l}$/i );
19615 # next if ( $lang !~ /${l}$/i );
19618 $this_line_is_semicolon_terminated
19620 # previous line begins with 'and' or 'or'
19621 && $type_ibeg_1 eq 'k'
19622 && $is_and_or{ $tokens_to_go[$ibeg_1] }
19627 # handle all other leading keywords
19630 # keywords look best at start of lines,
19631 # but combine things like "1 while"
19632 unless ( $is_assignment{$type_iend_1} ) {
19634 if ( ( $type_iend_1 ne 'k' )
19635 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
19640 # similar treatment of && and || as above for 'and' and
19641 # 'or': NOTE: This block of code is currently bypassed
19642 # because of a previous block but is retained for possible
19644 elsif ( $is_amp_amp{$type_ibeg_2} ) {
19646 # maybe looking at something like:
19647 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
19651 $this_line_is_semicolon_terminated
19653 # previous line begins with an 'if' or 'unless'
19655 && $type_ibeg_1 eq 'k'
19656 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
19661 # handle line with leading = or similar
19662 elsif ( $is_assignment{$type_ibeg_2} ) {
19663 return unless ( $n == 1 || $n == $nmax );
19664 return if ( $old_breakpoint_to_go[$iend_1] );
19668 # unless we can reduce this to two lines
19671 # or three lines, the last with a leading semicolon
19672 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
19674 # or the next line ends with a here doc
19675 || $type_iend_2 eq 'h'
19677 # or this is a short line ending in ;
19679 && $this_line_is_semicolon_terminated )
19681 $forced_breakpoint_to_go[$iend_1] = 0;
19683 return ( 1, $bs_tweak );
19684 } ## end sub recombine_section_3
19686 } ## end closure recombine_breakpoints
19688 sub insert_final_ternary_breaks {
19690 my ( $self, $ri_left, $ri_right ) = @_;
19692 # Called once per batch to look for and do any final line breaks for
19693 # long ternary chains
19695 my $nmax = @{$ri_right} - 1;
19697 # scan the left and right end tokens of all lines
19698 my $i_first_colon = -1;
19699 for my $n ( 0 .. $nmax ) {
19700 my $il = $ri_left->[$n];
19701 my $ir = $ri_right->[$n];
19702 my $typel = $types_to_go[$il];
19703 my $typer = $types_to_go[$ir];
19704 return if ( $typel eq '?' );
19705 return if ( $typer eq '?' );
19706 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
19707 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
19710 # For long ternary chains,
19711 # if the first : we see has its ? is in the interior
19712 # of a preceding line, then see if there are any good
19713 # breakpoints before the ?.
19714 if ( $i_first_colon > 0 ) {
19715 my $i_question = $mate_index_to_go[$i_first_colon];
19716 if ( defined($i_question) && $i_question > 0 ) {
19718 foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
19719 my $token = $tokens_to_go[$ii];
19720 my $type = $types_to_go[$ii];
19722 # For now, a good break is either a comma or,
19723 # in a long chain, a 'return'.
19724 # Patch for RT #126633: added the $nmax>1 check to avoid
19725 # breaking after a return for a simple ternary. For longer
19726 # chains the break after return allows vertical alignment, so
19727 # it is still done. So perltidy -wba='?' will not break
19728 # immediately after the return in the following statement:
19730 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
19731 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
19736 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
19738 && $self->in_same_container_i( $ii, $i_question )
19741 push @insert_list, $ii;
19746 # insert any new break points
19747 if (@insert_list) {
19748 $self->insert_additional_breaks( \@insert_list, $ri_left,
19754 } ## end sub insert_final_ternary_breaks
19756 sub insert_breaks_before_list_opening_containers {
19758 my ( $self, $ri_left, $ri_right ) = @_;
19760 # This routine is called once per batch to implement the parameters
19761 # --break-before-hash-brace, etc.
19763 # Nothing to do if none of these parameters has been set
19764 return unless %break_before_container_types;
19766 my $nmax = @{$ri_right} - 1;
19767 return unless ( $nmax >= 0 );
19769 my $rLL = $self->[_rLL_];
19771 my $rbreak_before_container_by_seqno =
19772 $self->[_rbreak_before_container_by_seqno_];
19773 my $rK_weld_left = $self->[_rK_weld_left_];
19775 # scan the ends of all lines
19777 for my $n ( 0 .. $nmax ) {
19778 my $il = $ri_left->[$n];
19779 my $ir = $ri_right->[$n];
19780 next unless ( $ir > $il );
19781 my $Kl = $K_to_go[$il];
19782 my $Kr = $K_to_go[$ir];
19784 my $type_end = $rLL->[$Kr]->[_TYPE_];
19786 # Backup before any side comment
19787 if ( $type_end eq '#' ) {
19788 $Kend = $self->K_previous_nonblank($Kr);
19789 next unless defined($Kend);
19790 $type_end = $rLL->[$Kend]->[_TYPE_];
19793 # Backup to the start of any weld; fix for b1173.
19794 if ($total_weld_count) {
19795 my $Kend_test = $rK_weld_left->{$Kend};
19796 if ( defined($Kend_test) && $Kend_test > $Kl ) {
19797 $Kend = $Kend_test;
19798 $Kend_test = $rK_weld_left->{$Kend};
19801 # Do not break if we did not back up to the start of a weld
19802 # (shouldn't happen)
19803 next if ( defined($Kend_test) );
19806 my $token = $rLL->[$Kend]->[_TOKEN_];
19807 next unless ( $is_opening_token{$token} );
19808 next unless ( $Kl < $Kend - 1 );
19810 my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
19811 next unless ( defined($seqno) );
19813 # Use the flag which was previously set
19814 next unless ( $rbreak_before_container_by_seqno->{$seqno} );
19816 # Install a break before this opening token.
19817 my $Kbreak = $self->K_previous_nonblank($Kend);
19818 my $ibreak = $Kbreak - $Kl + $il;
19819 next if ( $ibreak < $il );
19820 next if ( $nobreak_to_go[$ibreak] );
19821 push @insert_list, $ibreak;
19824 # insert any new break points
19825 if (@insert_list) {
19826 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
19829 } ## end sub insert_breaks_before_list_opening_containers
19831 sub note_added_semicolon {
19832 my ( $self, $line_number ) = @_;
19833 $self->[_last_added_semicolon_at_] = $line_number;
19834 if ( $self->[_added_semicolon_count_] == 0 ) {
19835 $self->[_first_added_semicolon_at_] = $line_number;
19837 $self->[_added_semicolon_count_]++;
19838 write_logfile_entry("Added ';' here\n");
19840 } ## end sub note_added_semicolon
19842 sub note_deleted_semicolon {
19843 my ( $self, $line_number ) = @_;
19844 $self->[_last_deleted_semicolon_at_] = $line_number;
19845 if ( $self->[_deleted_semicolon_count_] == 0 ) {
19846 $self->[_first_deleted_semicolon_at_] = $line_number;
19848 $self->[_deleted_semicolon_count_]++;
19849 write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
19851 } ## end sub note_deleted_semicolon
19853 sub note_embedded_tab {
19854 my ( $self, $line_number ) = @_;
19855 $self->[_embedded_tab_count_]++;
19856 $self->[_last_embedded_tab_at_] = $line_number;
19857 if ( !$self->[_first_embedded_tab_at_] ) {
19858 $self->[_first_embedded_tab_at_] = $line_number;
19861 if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
19862 write_logfile_entry("Embedded tabs in quote or pattern\n");
19865 } ## end sub note_embedded_tab
19867 use constant DEBUG_CORRECT_LP => 0;
19869 sub correct_lp_indentation {
19871 # When the -lp option is used, we need to make a last pass through
19872 # each line to correct the indentation positions in case they differ
19873 # from the predictions. This is necessary because perltidy uses a
19874 # predictor/corrector method for aligning with opening parens. The
19875 # predictor is usually good, but sometimes stumbles. The corrector
19876 # tries to patch things up once the actual opening paren locations
19878 my ( $self, $ri_first, $ri_last ) = @_;
19880 # first remove continuation indentation if appropriate
19881 my $max_line = @{$ri_first} - 1;
19883 #---------------------------------------------------------------------------
19884 # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
19885 #---------------------------------------------------------------------------
19887 # The point is that sub 'starting_one_line_block' made one-line blocks based
19888 # on default indentation, not -lp indentation. So some of the one-line
19889 # blocks may be too long when given -lp indentation. We will fix that now
19890 # if possible, using the list of these closing block indexes.
19891 my $ri_starting_one_line_block =
19892 $self->[_this_batch_]->[_ri_starting_one_line_block_];
19893 if ( @{$ri_starting_one_line_block} ) {
19894 $self->correct_lp_indentation_pass_1( $ri_first, $ri_last,
19895 $ri_starting_one_line_block );
19898 #-------------------------------------------------------------------
19899 # PASS 2: look for and fix other problems in each line of this batch
19900 #-------------------------------------------------------------------
19902 # look at each output line ...
19903 foreach my $line ( 0 .. $max_line ) {
19904 my $ibeg = $ri_first->[$line];
19905 my $iend = $ri_last->[$line];
19907 # looking at each token in this output line ...
19908 foreach my $i ( $ibeg .. $iend ) {
19910 # How many space characters to place before this token
19911 # for special alignment. Actual padding is done in the
19914 # looking for next unvisited indentation item ...
19915 my $indentation = $leading_spaces_to_go[$i];
19917 # This is just for indentation objects (c098)
19918 next unless ( ref($indentation) );
19920 # Visit each indentation object just once
19921 next if ( $indentation->get_marked() );
19924 $indentation->set_marked(1);
19926 # Skip indentation objects which do not align with container tokens
19927 my $align_seqno = $indentation->get_align_seqno();
19928 next unless ($align_seqno);
19930 # Skip a container which is entirely on this line
19931 my $Ko = $self->[_K_opening_container_]->{$align_seqno};
19932 my $Kc = $self->[_K_closing_container_]->{$align_seqno};
19933 if ( defined($Ko) && defined($Kc) ) {
19934 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
19937 # Note on flag '$do_not_pad':
19938 # We want to avoid a situation like this, where the aligner
19939 # inserts whitespace before the '=' to align it with a previous
19940 # '=', because otherwise the parens might become mis-aligned in a
19941 # situation like this, where the '=' has become aligned with the
19942 # previous line, pushing the opening '(' forward beyond where we
19945 # $mkFloor::currentRoom = '';
19946 # $mkFloor::c_entry = $c->Entry(
19948 # -relief => 'sunken',
19952 # We leave it to the aligner to decide how to do this.
19953 if ( $line == 1 && $i == $ibeg ) {
19954 $self->[_this_batch_]->[_do_not_pad_] = 1;
19957 #--------------------------------------------
19958 # Now see what the error is and try to fix it
19959 #--------------------------------------------
19960 my $closing_index = $indentation->get_closed();
19961 my $predicted_pos = $indentation->get_spaces();
19963 # Find actual position:
19966 if ( $i == $ibeg ) {
19968 # Case 1: token is first character of of batch - table lookup
19969 if ( $line == 0 ) {
19971 $actual_pos = $predicted_pos;
19973 my ( $indent, $offset, $is_leading, $exists ) =
19974 get_saved_opening_indentation($align_seqno);
19975 if ( defined($indent) ) {
19977 # NOTE: we could use '1' here if no space after
19978 # opening and '2' if want space; it is hardwired at 1
19979 # like -gnu-style. But it is probably best to leave
19980 # this alone because changing it would change
19981 # formatting of much existing code without any
19982 # significant benefit.
19983 $actual_pos = get_spaces($indent) + $offset + 1;
19987 # Case 2: token starts a new line - use length of previous line
19990 my $ibegm = $ri_first->[ $line - 1 ];
19991 my $iendm = $ri_last->[ $line - 1 ];
19992 $actual_pos = total_line_length( $ibegm, $iendm );
19996 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
20001 # Case 3: $i>$ibeg: token is mid-line - use length to previous token
20004 $actual_pos = total_line_length( $ibeg, $i - 1 );
20006 # for mid-line token, we must check to see if all
20007 # additional lines have continuation indentation,
20008 # and remove it if so. Otherwise, we do not get
20010 if ( $closing_index > $iend ) {
20011 my $ibeg_next = $ri_first->[ $line + 1 ];
20012 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
20013 $self->undo_lp_ci( $line, $i, $closing_index,
20014 $ri_first, $ri_last );
20019 # By how many spaces (plus or minus) would we need to increase the
20020 # indentation to get alignment with the opening token?
20021 my $move_right = $actual_pos - $predicted_pos;
20023 if (DEBUG_CORRECT_LP) {
20024 my $tok = substr( $tokens_to_go[$i], 0, 8 );
20025 my $avail = $self->get_available_spaces_to_go($ibeg);
20027 "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
20030 # nothing more to do if no error to correct (gnu2.t)
20031 if ( $move_right == 0 ) {
20032 $indentation->set_recoverable_spaces($move_right);
20036 # Get any collapsed length defined for -xlp
20037 my $collapsed_length =
20038 $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
20039 $collapsed_length = 0 unless ( defined($collapsed_length) );
20041 if (DEBUG_CORRECT_LP) {
20043 "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
20046 # if we have not seen closure for this indentation in this batch,
20047 # and do not have a collapsed length estimate, we can only pass on
20048 # a request to the vertical aligner
20049 if ( $closing_index < 0 && !$collapsed_length ) {
20050 $indentation->set_recoverable_spaces($move_right);
20054 # If necessary, look ahead to see if there is really any leading
20055 # whitespace dependent on this whitespace, and also find the
20056 # longest line using this whitespace. Since it is always safe to
20057 # move left if there are no dependents, we only need to do this if
20058 # we may have dependent nodes or need to move right.
20060 my $have_child = $indentation->get_have_child();
20061 my %saw_indentation;
20062 my $line_count = 1;
20063 $saw_indentation{$indentation} = $indentation;
20065 # How far can we move right before we hit the limit?
20066 # let $right_margen = the number of spaces that we can increase
20067 # the current indentation before hitting the maximum line length.
20068 my $right_margin = 0;
20070 if ( $have_child || $move_right > 0 ) {
20073 # include estimated collapsed length for incomplete containers
20074 my $max_length = 0;
20075 if ( $Kc > $K_to_go[$max_index_to_go] ) {
20076 $max_length = $collapsed_length + $predicted_pos;
20079 if ( $i == $ibeg ) {
20080 my $length = total_line_length( $ibeg, $iend );
20081 if ( $length > $max_length ) { $max_length = $length }
20084 # look ahead at the rest of the lines of this batch..
20085 foreach my $line_t ( $line + 1 .. $max_line ) {
20086 my $ibeg_t = $ri_first->[$line_t];
20087 my $iend_t = $ri_last->[$line_t];
20088 last if ( $closing_index <= $ibeg_t );
20090 # remember all different indentation objects
20091 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
20092 $saw_indentation{$indentation_t} = $indentation_t;
20095 # remember longest line in the group
20096 my $length_t = total_line_length( $ibeg_t, $iend_t );
20097 if ( $length_t > $max_length ) {
20098 $max_length = $length_t;
20103 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
20105 if ( $right_margin < 0 ) { $right_margin = 0 }
20108 my $first_line_comma_count =
20109 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
20110 my $comma_count = $indentation->get_comma_count();
20111 my $arrow_count = $indentation->get_arrow_count();
20113 # This is a simple approximate test for vertical alignment:
20114 # if we broke just after an opening paren, brace, bracket,
20115 # and there are 2 or more commas in the first line,
20116 # and there are no '=>'s,
20117 # then we are probably vertically aligned. We could set
20118 # an exact flag in sub break_lists, but this is good
20120 my $indentation_count = keys %saw_indentation;
20121 my $is_vertically_aligned =
20123 && $first_line_comma_count > 1
20124 && $indentation_count == 1
20125 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
20127 # Make the move if possible ..
20130 # we can always move left
20135 # incomplete container
20136 || ( $rOpts_extended_line_up_parentheses
20137 && $Kc > $K_to_go[$max_index_to_go] )
20138 || $closing_index < 0
20140 # but we should only move right if we are sure it will
20141 # not spoil vertical alignment
20142 || ( $comma_count == 0 )
20143 || ( $comma_count > 0 && !$is_vertically_aligned )
20147 ( $move_right <= $right_margin )
20151 if (DEBUG_CORRECT_LP) {
20153 "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
20156 foreach ( keys %saw_indentation ) {
20157 $saw_indentation{$_}
20158 ->permanently_decrease_available_spaces( -$move );
20162 # Otherwise, record what we want and the vertical aligner
20163 # will try to recover it.
20165 $indentation->set_recoverable_spaces($move_right);
20167 } ## end loop over tokens in a line
20168 } ## end loop over lines
20170 } ## end sub correct_lp_indentation
20172 sub correct_lp_indentation_pass_1 {
20173 my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_;
20175 # So some of the one-line blocks may be too long when given -lp
20176 # indentation. We will fix that now if possible, using the list of these
20177 # closing block indexes.
20179 my @ilist = @{$ri_starting_one_line_block};
20180 return unless (@ilist);
20182 my $max_line = @{$ri_first} - 1;
20183 my $inext = shift(@ilist);
20185 # loop over lines, checking length of each with a one-line block
20186 my ( $ibeg, $iend );
20187 foreach my $line ( 0 .. $max_line ) {
20188 $iend = $ri_last->[$line];
20189 next if ( $inext > $iend );
20190 $ibeg = $ri_first->[$line];
20192 # This is just for lines with indentation objects (c098)
20194 ref( $leading_spaces_to_go[$ibeg] )
20195 ? $self->excess_line_length( $ibeg, $iend )
20198 if ( $excess > 0 ) {
20199 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
20201 if ( $available_spaces > 0 ) {
20202 my $delete_want = min( $available_spaces, $excess );
20203 my $deleted_spaces =
20204 $self->reduce_lp_indentation( $ibeg, $delete_want );
20205 $available_spaces = $self->get_available_spaces_to_go($ibeg);
20209 # skip forward to next one-line block to check
20211 $inext = shift @ilist;
20212 next if ( $inext <= $iend );
20213 last if ( $inext > $iend );
20215 last if ( $inext <= $iend );
20218 } ## end sub correct_lp_indentation_pass_1
20222 # If there is a single, long parameter within parens, like this:
20224 # $self->command( "/msg "
20225 # . $infoline->chan
20226 # . " You said $1, but did you know that it's square was "
20227 # . $1 * $1 . " ?" );
20229 # we can remove the continuation indentation of the 2nd and higher lines
20230 # to achieve this effect, which is more pleasing:
20232 # $self->command("/msg "
20233 # . $infoline->chan
20234 # . " You said $1, but did you know that it's square was "
20235 # . $1 * $1 . " ?");
20237 my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
20239 my $max_line = @{$ri_first} - 1;
20241 # must be multiple lines
20242 return unless $max_line > $line_open;
20244 my $lev_start = $levels_to_go[$i_start];
20245 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
20247 # see if all additional lines in this container have continuation
20249 my $line_1 = 1 + $line_open;
20250 my $n = $line_open;
20252 while ( ++$n <= $max_line ) {
20253 my $ibeg = $ri_first->[$n];
20254 my $iend = $ri_last->[$n];
20255 if ( $ibeg eq $closing_index ) { $n--; last }
20256 return if ( $lev_start != $levels_to_go[$ibeg] );
20257 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
20258 last if ( $closing_index <= $iend );
20261 # we can reduce the indentation of all continuation lines
20262 my $continuation_line_count = $n - $line_open;
20263 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
20264 (0) x ($continuation_line_count);
20265 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
20266 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
20268 } ## end sub undo_lp_ci
20270 ###############################################
20271 # CODE SECTION 10: Code to break long statments
20272 ###############################################
20274 use constant DEBUG_BREAK_LINES => 0;
20276 sub break_long_lines {
20278 #-----------------------------------------------------------
20279 # Break a batch of tokens into lines which do not exceed the
20280 # maximum line length.
20281 #-----------------------------------------------------------
20283 my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
20285 # Input parameters:
20286 # $saw_good_break - a flag set by break_lists
20287 # $rcolon_list - ref to a list of all the ? and : tokens in the batch,
20289 # $rbond_strength_bias - small bond strength bias values set by break_lists
20291 # Output: returns references to the arrays:
20294 # which contain the indexes $i of the first and last tokens on each
20297 # In addition, the array:
20298 # $forced_breakpoint_to_go[$i]
20299 # may be updated to be =1 for any index $i after which there must be
20300 # a break. This signals later routines not to undo the breakpoint.
20303 # This routine is called if a statement is longer than the maximum line
20304 # length, or if a preliminary scanning located desirable break points.
20305 # Sub break_lists has already looked at these tokens and set breakpoints
20306 # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for
20307 # example after commas, after opening parens, and before closing parens).
20308 # This routine will honor these breakpoints and also add additional
20309 # breakpoints as necessary to keep the line length below the maximum
20310 # requested. It bases its decision on where the 'bond strength' is
20313 my @i_first = (); # the first index to output
20314 my @i_last = (); # the last index to output
20315 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
20316 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
20318 # Get the 'bond strengths' between tokens
20319 my $rbond_strength_to_go = $self->set_bond_strengths();
20321 # Add any comma bias set by break_lists
20322 if ( @{$rbond_strength_bias} ) {
20323 foreach my $item ( @{$rbond_strength_bias} ) {
20324 my ( $ii, $bias ) = @{$item};
20325 if ( $ii >= 0 && $ii <= $max_index_to_go ) {
20326 $rbond_strength_to_go->[$ii] += $bias;
20328 elsif (DEVEL_MODE) {
20329 my $KK = $K_to_go[0];
20330 my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
20332 "Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
20339 my $imax = $max_index_to_go;
20340 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
20341 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
20343 my $i_begin = $imin;
20344 my $last_break_strength = NO_BREAK;
20345 my $i_last_break = -1;
20346 my $line_count = 0;
20348 # see if any ?/:'s are in order
20349 my $colons_in_order = 1;
20350 my $last_tok = EMPTY_STRING;
20351 foreach ( @{$rcolon_list} ) {
20352 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
20356 # This is a sufficient but not necessary condition for colon chain
20357 my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
20359 #------------------------------------------
20360 # BEGINNING of main loop to set breakpoints
20361 # Keep iterating until we reach the end
20362 #------------------------------------------
20363 while ( $i_begin <= $imax ) {
20365 #------------------------------------------------------------------
20366 # Find the best next breakpoint based on token-token bond strengths
20367 #------------------------------------------------------------------
20368 my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) =
20369 $self->break_lines_inner_loop(
20374 $last_break_strength,
20376 $rbond_strength_to_go,
20381 # Now make any adjustments required by ternary breakpoint rules
20382 if ( @{$rcolon_list} ) {
20384 my $i_next_nonblank = $inext_to_go[$i_lowest];
20386 #-------------------------------------------------------
20387 # ?/: rule 1 : if a break here will separate a '?' on this
20388 # line from its closing ':', then break at the '?' instead.
20389 # But do not break a sequential chain of ?/: statements
20390 #-------------------------------------------------------
20391 if ( !$is_colon_chain ) {
20392 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
20393 next unless ( $tokens_to_go[$i] eq '?' );
20395 # do not break if statement is broken by side comment
20397 if ( $tokens_to_go[$max_index_to_go] eq '#'
20398 && terminal_type_i( 0, $max_index_to_go ) !~
20401 # no break needed if matching : is also on the line
20403 if ( defined( $mate_index_to_go[$i] )
20404 && $mate_index_to_go[$i] <= $i_next_nonblank );
20407 if ( $want_break_before{'?'} ) { $i_lowest-- }
20408 $i_next_nonblank = $inext_to_go[$i_lowest];
20413 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
20415 #-------------------------------------------------------------
20416 # ?/: rule 2 : if we break at a '?', then break at its ':'
20418 # Note: this rule is also in sub break_lists to handle a break
20419 # at the start and end of a line (in case breaks are dictated
20420 # by side comments).
20421 #-------------------------------------------------------------
20422 if ( $next_nonblank_type eq '?' ) {
20423 $self->set_closing_breakpoint($i_next_nonblank);
20425 elsif ( $types_to_go[$i_lowest] eq '?' ) {
20426 $self->set_closing_breakpoint($i_lowest);
20429 #--------------------------------------------------------
20430 # ?/: rule 3 : if we break at a ':' then we save
20431 # its location for further work below. We may need to go
20432 # back and break at its '?'.
20433 #--------------------------------------------------------
20434 if ( $next_nonblank_type eq ':' ) {
20435 push @i_colon_breaks, $i_next_nonblank;
20437 elsif ( $types_to_go[$i_lowest] eq ':' ) {
20438 push @i_colon_breaks, $i_lowest;
20441 # here we should set breaks for all '?'/':' pairs which are
20442 # separated by this line
20445 # guard against infinite loop (should never happen)
20446 if ( $i_lowest <= $i_last_break ) {
20448 && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n");
20454 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
20458 # save this line segment, after trimming blanks at the ends
20460 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
20462 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
20464 # set a forced breakpoint at a container opening, if necessary, to
20465 # signal a break at a closing container. Excepting '(' for now.
20468 $tokens_to_go[$i_lowest] eq '{'
20469 || $tokens_to_go[$i_lowest] eq '['
20471 && !$forced_breakpoint_to_go[$i_lowest]
20474 $self->set_closing_breakpoint($i_lowest);
20477 # get ready to find the next breakpoint
20478 $last_break_strength = $lowest_strength;
20479 $i_last_break = $i_lowest;
20480 $i_begin = $i_lowest + 1;
20482 # skip past a blank
20483 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
20488 #-------------------------------------------------
20489 # END of main loop to set continuation breakpoints
20490 #-------------------------------------------------
20492 #-----------------------------------------------------------
20493 # ?/: rule 4 -- if we broke at a ':', then break at
20494 # corresponding '?' unless this is a chain of ?: expressions
20495 #-----------------------------------------------------------
20496 if (@i_colon_breaks) {
20497 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
20498 if ( !$is_chain ) {
20499 $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last );
20503 return ( \@i_first, \@i_last, $rbond_strength_to_go );
20504 } ## end sub break_long_lines
20506 # small bond strength numbers to help break ties
20507 use constant TINY_BIAS => 0.0001;
20508 use constant MAX_BIAS => 0.001;
20510 sub break_lines_inner_loop {
20512 #-----------------------------------------------------------------
20513 # Find the best next breakpoint in index range ($i_begin .. $imax)
20514 # which, if possible, does not exceed the maximum line length.
20515 #-----------------------------------------------------------------
20523 $last_break_strength,
20525 $rbond_strength_to_go,
20531 # $i_begin = first index of range
20532 # $i_last_break = index of previous break
20533 # $imax = last index of range
20534 # $last_break_strength = bond strength of last break
20535 # $line_count = number of output lines so far
20536 # $rbond_strength_to_go = ref to array of bond strengths
20537 # $saw_good_break = true if old line had a good breakpoint
20540 # $i_lowest = index of best breakpoint
20541 # $lowest_strength = 'bond strength' at best breakpoint
20542 # $leading_alignment_type = special token type after break
20543 # $Msg = string of debug info
20545 my $Msg = EMPTY_STRING;
20546 my $strength = NO_BREAK;
20547 my $i_test = $i_begin - 1;
20549 my $starting_sum = $summed_lengths_to_go[$i_begin];
20550 my $lowest_strength = NO_BREAK;
20551 my $leading_alignment_type = EMPTY_STRING;
20552 my $leading_spaces = leading_spaces_to_go($i_begin);
20553 my $maximum_line_length =
20554 $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
20557 $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n";
20560 # Do not separate an isolated bare word from an opening paren.
20561 # Alternate Fix #2 for issue b1299. This waits as long as possible
20562 # to make the decision.
20563 if ( $types_to_go[$i_begin] eq 'i'
20564 && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
20566 my $i_next_nonblank = $inext_to_go[$i_begin];
20567 if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
20568 $rbond_strength_to_go->[$i_begin] = NO_BREAK;
20572 # Avoid a break which would strand a single punctuation
20573 # token. For example, we do not want to strand a leading
20574 # '.' which is followed by a long quoted string.
20575 # But note that we do want to do this with -extrude (l=1)
20576 # so please test any changes to this code on -extrude.
20578 ( $i_begin < $imax )
20579 && ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] )
20580 && !$forced_breakpoint_to_go[$i_begin]
20583 # Allow break after a closing eval brace. This is an
20584 # approximate way to simulate a forced breakpoint made in
20585 # Section B below. No differences have been found, but if
20586 # necessary the full logic of Section B could be used here
20588 $tokens_to_go[$i_begin] eq '}'
20589 && $block_type_to_go[$i_begin]
20590 && $block_type_to_go[$i_begin] eq 'eval'
20595 $summed_lengths_to_go[ $i_begin + 1 ] -
20597 ) < $maximum_line_length
20601 $i_test = min( $imax, $inext_to_go[$i_begin] ) - 1;
20602 DEBUG_BREAK_LINES && do {
20603 $Msg .= " :skip ahead at i=$i_test";
20607 #-------------------------------------------------------
20608 # Begin INNER_LOOP over the indexes in the _to_go arrays
20609 #-------------------------------------------------------
20610 while ( ++$i_test <= $imax ) {
20611 my $type = $types_to_go[$i_test];
20612 my $token = $tokens_to_go[$i_test];
20613 my $i_next_nonblank = $inext_to_go[$i_test];
20614 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
20615 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
20616 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
20618 #---------------------------------------------------------------
20619 # Section A: Get token-token strength and handle any adjustments
20620 #---------------------------------------------------------------
20622 # adjustments to the previous bond strength may have been made, and
20623 # we must keep the bond strength of a token and its following blank
20625 my $last_strength = $strength;
20626 $strength = $rbond_strength_to_go->[$i_test];
20627 if ( $type eq 'b' ) { $strength = $last_strength }
20629 # reduce strength a bit to break ties at an old comma breakpoint ...
20632 $old_breakpoint_to_go[$i_test]
20634 # Patch: limited to just commas to avoid blinking states
20637 # which is a 'good' breakpoint, meaning ...
20638 # we don't want to break before it
20639 && !$want_break_before{$type}
20641 # and either we want to break before the next token
20642 # or the next token is not short (i.e. not a '*', '/' etc.)
20643 && $i_next_nonblank <= $imax
20644 && ( $want_break_before{$next_nonblank_type}
20645 || $token_lengths_to_go[$i_next_nonblank] > 2
20646 || $next_nonblank_type eq ','
20647 || $is_opening_type{$next_nonblank_type} )
20650 $strength -= TINY_BIAS;
20651 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
20654 # otherwise increase strength a bit if this token would be at the
20655 # maximum line length. This is necessary to avoid blinking
20656 # in the above example when the -iob flag is added.
20660 $summed_lengths_to_go[ $i_test + 1 ] -
20662 if ( $len >= $maximum_line_length ) {
20663 $strength += TINY_BIAS;
20664 DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
20668 #-------------------------------------
20669 # Section B: Handle forced breakpoints
20670 #-------------------------------------
20673 # Force an immediate break at certain operators
20674 # with lower level than the start of the line,
20675 # unless we've already seen a better break.
20677 # Note on an issue with a preceding '?' :
20679 # There may be a break at a previous ? if the line is long. Because
20680 # of this we do not want to force a break if there is a previous ? on
20681 # this line. For now the best way to do this is to not break if we
20682 # have seen a lower strength point, which is probably a ?.
20684 # Example of unwanted breaks we are avoiding at a '.' following a ?
20685 # from pod2html using perltidy -gnu:
20687 # ? "\n<A NAME=\""
20689 # . "\">\n$text</A>\n"
20690 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
20692 ( $strength <= $lowest_strength )
20693 && ( $nesting_depth_to_go[$i_begin] >
20694 $nesting_depth_to_go[$i_next_nonblank] )
20696 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
20698 $next_nonblank_type eq 'k'
20700 ## /^(and|or)$/ # note: includes 'xor' now
20701 && $is_and_or{$next_nonblank_token}
20706 $self->set_forced_breakpoint($i_next_nonblank);
20708 && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
20713 # Try to put a break where requested by break_lists
20714 $forced_breakpoint_to_go[$i_test]
20716 # break between ) { in a continued line so that the '{' can
20718 # See similar logic in break_lists which catches instances
20719 # where a line is just something like ') {'. We have to
20720 # be careful because the corresponding block keyword might
20721 # not be on the first line, such as 'for' here:
20725 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
20731 && ( $token eq ')' )
20732 && ( $next_nonblank_type eq '{' )
20733 && ($next_nonblank_block_type)
20734 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
20736 # RT #104427: Dont break before opening sub brace because
20737 # sub block breaks handled at higher level, unless
20738 # it looks like the preceding list is long and broken
20742 $next_nonblank_block_type =~ /$SUB_PATTERN/
20743 || $next_nonblank_block_type =~ /$ASUB_PATTERN/
20745 && ( $nesting_depth_to_go[$i_begin] ==
20746 $nesting_depth_to_go[$i_next_nonblank] )
20749 && !$rOpts_opening_brace_always_on_right
20752 # There is an implied forced break at a terminal opening brace
20753 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
20757 # Forced breakpoints must sometimes be overridden, for example
20758 # because of a side comment causing a NO_BREAK. It is easier
20759 # to catch this here than when they are set.
20760 if ( $strength < NO_BREAK - 1 ) {
20761 $strength = $lowest_strength - TINY_BIAS;
20764 && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
20768 # quit if a break here would put a good terminal token on
20769 # the next line and we already have a possible break
20771 ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
20776 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
20778 ) > $maximum_line_length
20782 if ( $i_lowest >= 0 ) {
20783 DEBUG_BREAK_LINES && do {
20784 $Msg .= " :quit at good terminal='$next_nonblank_type'";
20790 #------------------------------------------------------------
20791 # Section C: Look for the lowest bond strength between tokens
20792 #------------------------------------------------------------
20793 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) {
20795 # break at previous best break if it would have produced
20796 # a leading alignment of certain common tokens, and it
20797 # is different from the latest candidate break
20798 if ($leading_alignment_type) {
20799 DEBUG_BREAK_LINES && do {
20801 " :last at leading_alignment='$leading_alignment_type'";
20806 # Force at least one breakpoint if old code had good
20807 # break It is only called if a breakpoint is required or
20808 # desired. This will probably need some adjustments
20809 # over time. A goal is to try to be sure that, if a new
20810 # side comment is introduced into formatted text, then
20811 # the same breakpoints will occur. scbreak.t
20813 $i_test == $imax # we are at the end
20814 && !$forced_breakpoint_count
20815 && $saw_good_break # old line had good break
20816 && $type =~ /^[#;\{]$/ # and this line ends in
20817 # ';' or side comment
20818 && $i_last_break < 0 # and we haven't made a break
20819 && $i_lowest >= 0 # and we saw a possible break
20820 && $i_lowest < $imax - 1 # (but not just before this ;)
20821 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
20825 DEBUG_BREAK_LINES && do {
20826 $Msg .= " :last at good old break\n";
20831 # Do not skip past an important break point in a short final
20832 # segment. For example, without this check we would miss the
20833 # break at the final / in the following code:
20836 # ( $tau * $mass_pellet * $q_0 *
20837 # ( 1. - exp( -$t_stop / $tau ) ) -
20838 # 4. * $pi * $factor * $k_ice *
20839 # ( $t_melt - $t_ice ) *
20842 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
20846 && $i_lowest >= 0 # and we saw a possible break
20847 && $i_lowest < $i_test
20848 && $i_test > $imax - 2
20849 && $nesting_depth_to_go[$i_begin] >
20850 $nesting_depth_to_go[$i_lowest]
20851 && $lowest_strength < $last_break_strength - .5 * WEAK
20854 # Make this break for math operators for now
20855 my $ir = $inext_to_go[$i_lowest];
20856 my $il = iprev_to_go($ir);
20857 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
20858 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
20860 DEBUG_BREAK_LINES && do {
20861 $Msg .= " :last-noskip_short";
20867 # Update the minimum bond strength location
20868 $lowest_strength = $strength;
20869 $i_lowest = $i_test;
20871 DEBUG_BREAK_LINES && do {
20872 $Msg .= " :last-must_break";
20877 # set flags to remember if a break here will produce a
20878 # leading alignment of certain common tokens
20879 if ( $line_count > 0
20881 && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) )
20883 my $i_last_end = iprev_to_go($i_begin);
20884 my $tok_beg = $tokens_to_go[$i_begin];
20885 my $type_beg = $types_to_go[$i_begin];
20888 # check for leading alignment of certain tokens
20890 $tok_beg eq $next_nonblank_token
20891 && $is_chain_operator{$tok_beg}
20892 && ( $type_beg eq 'k'
20893 || $type_beg eq $tok_beg )
20894 && $nesting_depth_to_go[$i_begin] >=
20895 $nesting_depth_to_go[$i_next_nonblank]
20898 || ( $tokens_to_go[$i_last_end] eq $token
20899 && $is_chain_operator{$token}
20900 && ( $type eq 'k' || $type eq $token )
20901 && $nesting_depth_to_go[$i_last_end] >=
20902 $nesting_depth_to_go[$i_test] )
20905 $leading_alignment_type = $next_nonblank_type;
20910 #-----------------------------------------------------------
20911 # Section D: See if the maximum line length will be exceeded
20912 #-----------------------------------------------------------
20914 # Quit if there are no more tokens to test
20915 last if ( $i_test >= $imax );
20917 # Keep going if we have not reached the limit
20920 $summed_lengths_to_go[ $i_test + 2 ] -
20922 $maximum_line_length;
20924 if ( $excess < 0 ) {
20927 elsif ( $excess == 0 ) {
20929 # To prevent blinkers we will avoid leaving a token exactly at
20930 # the line length limit unless it is the last token or one of
20931 # several "good" types.
20933 # The following code was a blinker with -pbp before this
20935 # $last_nonblank_token eq '('
20936 # && $is_indirect_object_taker{ $paren_type
20938 # The issue causing the problem is that if the
20939 # term [$paren_depth] gets broken across a line then
20940 # the whitespace routine doesn't see both opening and closing
20941 # brackets and will format like '[ $paren_depth ]'. This
20942 # leads to an oscillation in length depending if we break
20943 # before the closing bracket or not.
20944 if ( $i_test + 1 < $imax
20945 && $next_nonblank_type ne ','
20946 && !$is_closing_type{$next_nonblank_type} )
20949 DEBUG_BREAK_LINES && do {
20950 $Msg .= " :too_long";
20961 # a break here makes the line too long ...
20963 DEBUG_BREAK_LINES && do {
20966 $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
20967 my $i_testp2 = $i_test + 2;
20968 if ( $i_testp2 > $max_index_to_go + 1 ) {
20969 $i_testp2 = $max_index_to_go + 1;
20971 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
20972 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
20974 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n";
20977 # Exception: allow one extra terminal token after exceeding line length
20978 # if it would strand this token.
20979 if ( $i_lowest == $i_test
20980 && $token_lengths_to_go[$i_test] > 1
20981 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
20982 && $rOpts_fuzzy_line_length )
20984 DEBUG_BREAK_LINES && do {
20985 $Msg .= " :do_not_strand next='$next_nonblank_type'";
20990 # Stop if here if we have a solution and the line will be too long
20991 if ( $i_lowest >= 0 ) {
20992 DEBUG_BREAK_LINES && do {
20994 " :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax";
21000 #-----------------------------------------------------
21001 # End INNER_LOOP over the indexes in the _to_go arrays
21002 #-----------------------------------------------------
21004 # Be sure we return an index in the range ($ibegin .. $imax).
21005 # We will break at imax if no other break was found.
21006 if ( $i_lowest < 0 ) { $i_lowest = $imax }
21008 return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg );
21009 } ## end sub break_lines_inner_loop
21011 sub do_colon_breaks {
21012 my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_;
21014 # using a simple method for deciding if we are in a ?/: chain --
21015 # this is a chain if it has multiple ?/: pairs all in order;
21017 # Note that if line starts in a ':' we count that above as a break
21019 my @insert_list = ();
21020 foreach ( @{$ri_colon_breaks} ) {
21021 my $i_question = $mate_index_to_go[$_];
21022 if ( defined($i_question) ) {
21023 if ( $want_break_before{'?'} ) {
21024 $i_question = iprev_to_go($i_question);
21027 if ( $i_question >= 0 ) {
21028 push @insert_list, $i_question;
21031 $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last );
21034 } ## end sub do_colon_breaks
21036 ###########################################
21037 # CODE SECTION 11: Code to break long lists
21038 ###########################################
21040 { ## begin closure break_lists
21042 # These routines and variables are involved in finding good
21043 # places to break long lists.
21045 use constant DEBUG_BREAK_LISTS => 0;
21056 $i_last_nonblank_token,
21057 $last_nonblank_block_type,
21058 $last_nonblank_token,
21059 $last_nonblank_type,
21060 $last_old_breakpoint_count,
21062 $next_nonblank_block_type,
21063 $next_nonblank_token,
21064 $next_nonblank_type,
21065 $old_breakpoint_count,
21066 $starting_breakpoint_count,
21077 @breakpoint_undo_stack,
21080 @identifier_count_stack,
21081 @index_before_arrow,
21086 @last_nonblank_type,
21087 @old_breakpoint_count_stack,
21088 @opening_structure_index_stack,
21089 @rfor_semicolon_list,
21090 @has_old_logical_breakpoints,
21094 @type_sequence_stack,
21098 # these arrays must retain values between calls
21099 my ( @has_broken_sublist, @dont_align, @want_comma_break );
21104 sub initialize_break_lists {
21106 @has_broken_sublist = ();
21107 @want_comma_break = ();
21109 #---------------------------------------------------
21110 # Set tolerances to prevent formatting instabilities
21111 #---------------------------------------------------
21113 # Define tolerances to use when checking if closed
21114 # containers will fit on one line. This is necessary to avoid
21115 # formatting instability. The basic tolerance is based on the
21118 # - Always allow for at least one extra space after a closing token so
21119 # that we do not strand a comma or semicolon. (oneline.t).
21121 # - Use an increased line length tolerance when -ci > -i to avoid
21122 # blinking states (case b923 and others).
21124 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
21126 # In addition, it may be necessary to use a few extra tolerance spaces
21127 # when -lp is used and/or when -xci is used. The history of this
21128 # so far is as follows:
21130 # FIX1: At least 3 characters were been found to be required for -lp
21131 # to fixes cases b1059 b1063 b1117.
21133 # FIX2: Further testing showed that we need a total of 3 extra spaces
21134 # when -lp is set for non-lists, and at least 2 spaces when -lp and
21136 # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
21137 # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
21140 # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
21141 # 'find_token_starting_list' to go back before an initial blank space.
21142 # This fixed these three cases, and allowed the tolerances to be
21143 # reduced to continue to fix all other known cases of instability.
21144 # This gives the current tolerance formulation.
21148 if ($rOpts_line_up_parentheses) {
21150 # boost tol for combination -lp -xci
21151 if ($rOpts_extended_continuation_indentation) {
21155 # boost tol for combination -lp and any -vtc > 0, but only for
21156 # non-list containers
21158 foreach ( keys %closing_vertical_tightness ) {
21160 unless ( $closing_vertical_tightness{$_} );
21161 $lp_tol_boost = 1; # Fixes B1193;
21167 # Define a level where list formatting becomes highly stressed and
21168 # needs to be simplified. Introduced for case b1262.
21169 # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2);
21170 # This is now '$high_stress_level'.
21173 } ## end sub initialize_break_lists
21175 # routine to define essential variables when we go 'up' to
21177 sub check_for_new_minimum_depth {
21178 my ( $self, $depth_t, $seqno ) = @_;
21179 if ( $depth_t < $minimum_depth ) {
21181 $minimum_depth = $depth_t;
21183 # these arrays need not retain values between calls
21184 $type_sequence_stack[$depth_t] = $seqno;
21185 $override_cab3[$depth_t] = undef;
21186 if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) {
21187 $override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno};
21189 $breakpoint_stack[$depth_t] = $starting_breakpoint_count;
21190 $container_type[$depth_t] = EMPTY_STRING;
21191 $identifier_count_stack[$depth_t] = 0;
21192 $index_before_arrow[$depth_t] = -1;
21193 $interrupted_list[$depth_t] = 1;
21194 $item_count_stack[$depth_t] = 0;
21195 $last_nonblank_type[$depth_t] = EMPTY_STRING;
21196 $opening_structure_index_stack[$depth_t] = -1;
21198 $breakpoint_undo_stack[$depth_t] = undef;
21199 $comma_index[$depth_t] = undef;
21200 $last_comma_index[$depth_t] = undef;
21201 $last_dot_index[$depth_t] = undef;
21202 $old_breakpoint_count_stack[$depth_t] = undef;
21203 $has_old_logical_breakpoints[$depth_t] = 0;
21204 $rand_or_list[$depth_t] = [];
21205 $rfor_semicolon_list[$depth_t] = [];
21206 $i_equals[$depth_t] = -1;
21208 # these arrays must retain values between calls
21209 if ( !defined( $has_broken_sublist[$depth_t] ) ) {
21210 $dont_align[$depth_t] = 0;
21211 $has_broken_sublist[$depth_t] = 0;
21212 $want_comma_break[$depth_t] = 0;
21216 } ## end sub check_for_new_minimum_depth
21218 # routine to decide which commas to break at within a container;
21220 # $bp_count = number of comma breakpoints set
21221 # $do_not_break_apart = a flag indicating if container need not
21223 sub set_comma_breakpoints {
21225 my ( $self, $dd, $rbond_strength_bias ) = @_;
21227 my $do_not_break_apart = 0;
21230 if ( $item_count_stack[$dd] ) {
21232 # Do not break a list unless there are some non-line-ending commas.
21233 # This avoids getting different results with only non-essential
21234 # commas, and fixes b1192.
21235 my $seqno = $type_sequence_stack[$dd];
21237 my $real_comma_count =
21238 $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
21240 # handle commas not in containers...
21241 if ( $dont_align[$dd] ) {
21242 $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
21245 # handle commas within containers...
21246 elsif ($real_comma_count) {
21247 my $fbc = $forced_breakpoint_count;
21249 # always open comma lists not preceded by keywords,
21250 # barewords, identifiers (that is, anything that doesn't
21251 # look like a function call)
21252 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
21254 $self->table_maker(
21257 i_opening_paren => $opening_structure_index_stack[$dd],
21258 i_closing_paren => $i,
21259 item_count => $item_count_stack[$dd],
21260 identifier_count => $identifier_count_stack[$dd],
21261 rcomma_index => $comma_index[$dd],
21262 next_nonblank_type => $next_nonblank_type,
21263 list_type => $container_type[$dd],
21264 interrupted => $interrupted_list[$dd],
21265 rdo_not_break_apart => \$do_not_break_apart,
21266 must_break_open => $must_break_open,
21267 has_broken_sublist => $has_broken_sublist[$dd],
21270 $bp_count = $forced_breakpoint_count - $fbc;
21271 $do_not_break_apart = 0 if $must_break_open;
21274 return ( $bp_count, $do_not_break_apart );
21275 } ## end sub set_comma_breakpoints
21277 # These types are excluded at breakpoints to prevent blinking
21278 # Switched from excluded to included as part of fix for b1214
21279 my %is_uncontained_comma_break_included_type;
21283 my @q = qw< k R } ) ] Y Z U w i q Q .
21284 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
21285 @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
21288 sub do_uncontained_comma_breaks {
21290 # Handle commas not in containers...
21291 # This is a catch-all routine for commas that we
21292 # don't know what to do with because the don't fall
21293 # within containers. We will bias the bond strength
21294 # to break at commas which ended lines in the input
21295 # file. This usually works better than just trying
21296 # to put as many items on a line as possible. A
21297 # downside is that if the input file is garbage it
21298 # won't work very well. However, the user can always
21299 # prevent following the old breakpoints with the
21301 my ( $self, $dd, $rbond_strength_bias ) = @_;
21303 # Check added for issue c131; an error here would be due to an
21304 # error initializing @comma_index when entering depth $dd.
21306 foreach my $ii ( @{ $comma_index[$dd] } ) {
21307 if ( $ii < 0 || $ii > $max_index_to_go ) {
21308 my $KK = $K_to_go[0];
21309 my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
21311 Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
21318 my $old_comma_break_count = 0;
21319 foreach my $ii ( @{ $comma_index[$dd] } ) {
21321 if ( $old_breakpoint_to_go[$ii] ) {
21322 $old_comma_break_count++;
21324 # Store the bias info for use by sub set_bond_strength
21325 push @{$rbond_strength_bias}, [ $ii, $bias ];
21327 # reduce bias magnitude to force breaks in order
21332 # Also put a break before the first comma if
21333 # (1) there was a break there in the input, and
21334 # (2) there was exactly one old break before the first comma break
21335 # (3) OLD: there are multiple old comma breaks
21336 # (3) NEW: there are one or more old comma breaks (see return example)
21337 # (4) the first comma is at the starting level ...
21338 # ... fixes cases b064 b065 b068 b210 b747
21339 # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
21340 # ... fixes b1220. If ci>0 we are in the middle of a snippet,
21341 # maybe because -boc has been forcing out previous lines.
21343 # For example, we will follow the user and break after
21344 # 'print' in this snippet:
21346 # "conformability (Not the same dimension)\n",
21347 # "\t", $have, " is ", text_unit($hu), "\n",
21348 # "\t", $want, " is ", text_unit($wu), "\n",
21351 # Another example, just one comma, where we will break after
21354 # $x * cos($a) - $y * sin($a),
21355 # $x * sin($a) + $y * cos($a);
21357 # Breaking a print statement:
21359 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
21360 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
21362 # But we will not force a break after the opening paren here
21363 # (causes a blinker):
21364 # $heap->{stream}->set_output_filter(
21365 # poe::filter::reference->new('myotherfreezer') ),
21368 my $i_first_comma = $comma_index[$dd]->[0];
21369 my $level_comma = $levels_to_go[$i_first_comma];
21370 my $ci_start = $ci_levels_to_go[0];
21372 # Here we want to use the value of ci before any -xci adjustment
21373 if ( $ci_start && $rOpts_extended_continuation_indentation ) {
21374 my $K0 = $K_to_go[0];
21375 if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
21378 && $old_breakpoint_to_go[$i_first_comma]
21379 && $level_comma == $levels_to_go[0] )
21383 foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
21384 if ( $old_breakpoint_to_go[$ii] ) {
21386 last if ( $obp_count > 1 );
21388 if ( $levels_to_go[$ii] == $level_comma );
21392 # Changed rule from multiple old commas to just one here:
21393 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
21395 my $ibreak_m = $ibreak;
21396 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
21397 if ( $ibreak_m >= 0 ) {
21399 # In order to avoid blinkers we have to be fairly
21403 # Rule 1: Do not to break before an opening token
21404 # Rule 2: avoid breaking at ternary operators
21405 # (see b931, which is similar to the above print example)
21406 # Rule 3: Do not break at chain operators to fix case b1119
21407 # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
21409 # NEW Rule, replaced above rules after case b1214:
21410 # only break at one of the included types
21412 # Be sure to test any changes to these rules against runs
21413 # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
21415 my $type_m = $types_to_go[$ibreak_m];
21417 # Switched from excluded to included for b1214. If necessary
21418 # the token could also be checked if type_m eq 'k'
21419 if ( $is_uncontained_comma_break_included_type{$type_m} ) {
21421 # Rule added to fix b1449:
21422 # Do not break before a '?' if -nbot is set
21423 # Otherwise, we may alternately arrive here and
21424 # set the break, or not, depending on the input.
21426 my $ibreak_p = $inext_to_go[$ibreak_m];
21427 if ( !$rOpts_break_at_old_ternary_breakpoints
21428 && $ibreak_p <= $max_index_to_go )
21430 my $type_p = $types_to_go[$ibreak_p];
21431 $no_break = $type_p eq '?';
21434 $self->set_forced_breakpoint($ibreak)
21441 } ## end sub do_uncontained_comma_breaks
21443 my %is_logical_container;
21447 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
21448 @is_logical_container{@q} = (1) x scalar(@q);
21450 # This filter will allow most tokens to skip past a section of code
21451 %quick_filter = %is_assignment;
21452 @q = qw# => . ; < > ~ #;
21454 push @q, 'f'; # added for ';' for issue c154
21455 @quick_filter{@q} = (1) x scalar(@q);
21458 sub set_for_semicolon_breakpoints {
21459 my ( $self, $dd ) = @_;
21460 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
21461 $self->set_forced_breakpoint($_);
21464 } ## end sub set_for_semicolon_breakpoints
21466 sub set_logical_breakpoints {
21467 my ( $self, $dd ) = @_;
21469 $item_count_stack[$dd] == 0
21470 && $is_logical_container{ $container_type[$dd] }
21472 || $has_old_logical_breakpoints[$dd]
21476 # Look for breaks in this order:
21479 foreach my $i ( 0 .. 3 ) {
21480 if ( $rand_or_list[$dd][$i] ) {
21481 foreach ( @{ $rand_or_list[$dd][$i] } ) {
21482 $self->set_forced_breakpoint($_);
21485 # break at any 'if' and 'unless' too
21486 foreach ( @{ $rand_or_list[$dd][4] } ) {
21487 $self->set_forced_breakpoint($_);
21489 $rand_or_list[$dd] = [];
21495 } ## end sub set_logical_breakpoints
21497 sub is_unbreakable_container {
21499 # never break a container of one of these types
21500 # because bad things can happen (map1.t)
21502 return $is_sort_map_grep{ $container_type[$dd] };
21503 } ## end sub is_unbreakable_container
21507 my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
21509 #--------------------------------------------------------------------
21510 # This routine is called once per batch, if the batch is a list, to
21511 # set line breaks so that hierarchical structure can be displayed and
21512 # so that list items can be vertically aligned. The output of this
21513 # routine is stored in the array @forced_breakpoint_to_go, which is
21514 # used by sub 'break_long_lines' to set final breakpoints. This is
21515 # probably the most complex routine in perltidy, so I have
21516 # broken it into pieces and over-commented it.
21517 #--------------------------------------------------------------------
21519 $starting_depth = $nesting_depth_to_go[0];
21521 $block_type = SPACE;
21522 $current_depth = $starting_depth;
21524 $i_last_colon = -1;
21526 $i_line_start = -1;
21527 $last_nonblank_token = ';';
21528 $last_nonblank_type = ';';
21529 $last_nonblank_block_type = SPACE;
21530 $last_old_breakpoint_count = 0;
21531 $minimum_depth = $current_depth + 1; # forces update in check below
21532 $old_breakpoint_count = 0;
21533 $starting_breakpoint_count = $forced_breakpoint_count;
21536 $type_sequence = EMPTY_STRING;
21538 my $total_depth_variation = 0;
21539 my $i_old_assignment_break;
21540 my $depth_last = $starting_depth;
21541 my $comma_follows_last_closing_token;
21543 $self->check_for_new_minimum_depth( $current_depth,
21544 $parent_seqno_to_go[0] )
21545 if ( $current_depth < $minimum_depth );
21547 my $i_want_previous_break = -1;
21549 my $saw_good_breakpoint;
21551 #----------------------------------------
21552 # Main loop over all tokens in this batch
21553 #----------------------------------------
21554 while ( ++$i <= $max_index_to_go ) {
21555 if ( $type ne 'b' ) {
21556 $i_last_nonblank_token = $i - 1;
21557 $last_nonblank_type = $type;
21558 $last_nonblank_token = $token;
21559 $last_nonblank_block_type = $block_type;
21561 $type = $types_to_go[$i];
21562 $block_type = $block_type_to_go[$i];
21563 $token = $tokens_to_go[$i];
21564 $type_sequence = $type_sequence_to_go[$i];
21566 my $i_next_nonblank = $inext_to_go[$i];
21567 $next_nonblank_type = $types_to_go[$i_next_nonblank];
21568 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
21569 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
21571 #-------------------------------------------
21572 # Loop Section A: Look for special breakpoints...
21573 #-------------------------------------------
21575 # set break if flag was set
21576 if ( $i_want_previous_break >= 0 ) {
21577 $self->set_forced_breakpoint($i_want_previous_break);
21578 $i_want_previous_break = -1;
21581 $last_old_breakpoint_count = $old_breakpoint_count;
21583 # Check for a good old breakpoint ..
21584 if ( $old_breakpoint_to_go[$i] ) {
21585 ( $i_want_previous_break, $i_old_assignment_break ) =
21586 $self->examine_old_breakpoint( $i_next_nonblank,
21587 $i_want_previous_break, $i_old_assignment_break );
21590 next if ( $type eq 'b' );
21592 $depth = $nesting_depth_to_go[ $i + 1 ];
21594 $total_depth_variation += abs( $depth - $depth_last );
21595 $depth_last = $depth;
21597 # safety check - be sure we always break after a comment
21598 # Shouldn't happen .. an error here probably means that the
21599 # nobreak flag did not get turned off correctly during
21601 if ( $type eq '#' ) {
21602 if ( $i != $max_index_to_go ) {
21605 Non-fatal program bug: backup logic required to break after a comment
21608 $nobreak_to_go[$i] = 0;
21609 $self->set_forced_breakpoint($i);
21610 } ## end if ( $i != $max_index_to_go)
21611 } ## end if ( $type eq '#' )
21613 # Force breakpoints at certain tokens in long lines.
21614 # Note that such breakpoints will be undone later if these tokens
21615 # are fully contained within parens on a line.
21618 # break before a keyword within a line
21622 # if one of these keywords:
21623 && $is_if_unless_while_until_for_foreach{$token}
21625 # but do not break at something like '1 while'
21626 && ( $last_nonblank_type ne 'n' || $i > 2 )
21628 # and let keywords follow a closing 'do' brace
21629 && ( !$last_nonblank_block_type
21630 || $last_nonblank_block_type ne 'do' )
21635 # or container is broken (by side-comment, etc)
21637 $next_nonblank_token eq '('
21638 && ( !defined( $mate_index_to_go[$i_next_nonblank] )
21639 || $mate_index_to_go[$i_next_nonblank] < $i )
21644 $self->set_forced_breakpoint( $i - 1 );
21647 # remember locations of '||' and '&&' for possible breaks if we
21648 # decide this is a long logical expression.
21649 if ( $type eq '||' ) {
21650 push @{ $rand_or_list[$depth][2] }, $i;
21651 ++$has_old_logical_breakpoints[$depth]
21652 if ( ( $i == $i_line_start || $i == $i_line_end )
21653 && $rOpts_break_at_old_logical_breakpoints );
21655 elsif ( $type eq '&&' ) {
21656 push @{ $rand_or_list[$depth][3] }, $i;
21657 ++$has_old_logical_breakpoints[$depth]
21658 if ( ( $i == $i_line_start || $i == $i_line_end )
21659 && $rOpts_break_at_old_logical_breakpoints );
21661 elsif ( $type eq 'f' ) {
21662 push @{ $rfor_semicolon_list[$depth] }, $i;
21664 elsif ( $type eq 'k' ) {
21665 if ( $token eq 'and' ) {
21666 push @{ $rand_or_list[$depth][1] }, $i;
21667 ++$has_old_logical_breakpoints[$depth]
21668 if ( ( $i == $i_line_start || $i == $i_line_end )
21669 && $rOpts_break_at_old_logical_breakpoints );
21672 # break immediately at 'or's which are probably not in a logical
21673 # block -- but we will break in logical breaks below so that
21674 # they do not add to the forced_breakpoint_count
21675 elsif ( $token eq 'or' ) {
21676 push @{ $rand_or_list[$depth][0] }, $i;
21677 ++$has_old_logical_breakpoints[$depth]
21678 if ( ( $i == $i_line_start || $i == $i_line_end )
21679 && $rOpts_break_at_old_logical_breakpoints );
21680 if ( $is_logical_container{ $container_type[$depth] } ) {
21683 if ($is_long_line) { $self->set_forced_breakpoint($i) }
21684 elsif ( ( $i == $i_line_start || $i == $i_line_end )
21685 && $rOpts_break_at_old_logical_breakpoints )
21687 $saw_good_breakpoint = 1;
21691 elsif ( $token eq 'if' || $token eq 'unless' ) {
21692 push @{ $rand_or_list[$depth][4] }, $i;
21693 if ( ( $i == $i_line_start || $i == $i_line_end )
21694 && $rOpts_break_at_old_logical_breakpoints )
21696 $self->set_forced_breakpoint($i);
21700 elsif ( $is_assignment{$type} ) {
21701 $i_equals[$depth] = $i;
21704 #-----------------------------------------
21705 # Loop Section B: Handle a sequenced token
21706 #-----------------------------------------
21707 if ($type_sequence) {
21708 $self->break_lists_type_sequence;
21711 #------------------------------------------
21712 # Loop Section C: Handle Increasing Depth..
21713 #------------------------------------------
21715 # hardened against bad input syntax: depth jump must be 1 and type
21716 # must be opening..fixes c102
21717 if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
21718 $self->break_lists_increasing_depth();
21721 #------------------------------------------
21722 # Loop Section D: Handle Decreasing Depth..
21723 #------------------------------------------
21725 # hardened against bad input syntax: depth jump must be 1 and type
21726 # must be closing .. fixes c102
21727 elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
21729 $self->break_lists_decreasing_depth();
21731 $comma_follows_last_closing_token =
21732 $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
21736 #----------------------------------
21737 # Loop Section E: Handle this token
21738 #----------------------------------
21740 $current_depth = $depth;
21742 # most token types can skip the rest of this loop
21743 next unless ( $quick_filter{$type} );
21745 # handle comma-arrow
21746 if ( $type eq '=>' ) {
21747 next if ( $last_nonblank_type eq '=>' );
21748 next if $rOpts_break_at_old_comma_breakpoints;
21750 if ( $rOpts_comma_arrow_breakpoints == 3
21751 && !defined( $override_cab3[$depth] ) );
21752 $want_comma_break[$depth] = 1;
21753 $index_before_arrow[$depth] = $i_last_nonblank_token;
21757 elsif ( $type eq '.' ) {
21758 $last_dot_index[$depth] = $i;
21761 # Turn off comma alignment if we are sure that this is not a list
21762 # environment. To be safe, we will do this if we see certain
21763 # non-list tokens, such as ';', '=', and also the environment is
21765 ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
21766 elsif ( $is_non_list_type{$type}
21767 && !$self->is_in_list_by_i($i) )
21769 $dont_align[$depth] = 1;
21770 $want_comma_break[$depth] = 0;
21771 $index_before_arrow[$depth] = -1;
21773 # no special comma breaks in C-style 'for' terms (c154)
21774 if ( $type eq 'f' ) { $last_comma_index[$depth] = undef }
21777 # now just handle any commas
21778 next if ( $type ne ',' );
21779 $self->study_comma($comma_follows_last_closing_token);
21781 } ## end while ( ++$i <= $max_index_to_go)
21783 #-------------------------------------------
21784 # END of loop over all tokens in this batch
21785 # Now set breaks for any unfinished lists ..
21786 #-------------------------------------------
21788 foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
21790 $interrupted_list[$dd] = 1;
21791 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
21792 $self->set_comma_breakpoints( $dd, $rbond_strength_bias )
21793 if ( $item_count_stack[$dd] );
21794 $self->set_logical_breakpoints($dd)
21795 if ( $has_old_logical_breakpoints[$dd] );
21796 $self->set_for_semicolon_breakpoints($dd);
21798 # break open container...
21799 my $i_opening = $opening_structure_index_stack[$dd];
21800 if ( defined($i_opening) && $i_opening >= 0 ) {
21801 $self->set_forced_breakpoint($i_opening)
21803 is_unbreakable_container($dd)
21805 # Avoid a break which would place an isolated ' or "
21808 && $i_opening >= $max_index_to_go - 2
21809 && ( $token eq "'" || $token eq '"' ) )
21812 } ## end for ( my $dd = $current_depth...)
21814 #----------------------------------------
21815 # Return the flag '$saw_good_breakpoint'.
21816 #----------------------------------------
21817 # This indicates if the input file had some good breakpoints. This
21818 # flag will be used to force a break in a line shorter than the
21819 # allowed line length.
21820 if ( $has_old_logical_breakpoints[$current_depth] ) {
21821 $saw_good_breakpoint = 1;
21824 # A complex line with one break at an = has a good breakpoint.
21825 # This is not complex ($total_depth_variation=0):
21829 # This is complex ($total_depth_variation=6):
21831 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
21833 # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
21834 elsif ($i_old_assignment_break
21835 && $total_depth_variation > 4
21836 && $old_breakpoint_count == 1
21837 && $i_old_assignment_break < $max_index_to_go )
21839 $saw_good_breakpoint = 1;
21842 return $saw_good_breakpoint;
21843 } ## end sub break_lists
21847 # study and store info for a list comma
21849 my ( $self, $comma_follows_last_closing_token ) = @_;
21851 $last_dot_index[$depth] = undef;
21852 $last_comma_index[$depth] = $i;
21854 # break here if this comma follows a '=>'
21855 # but not if there is a side comment after the comma
21856 if ( $want_comma_break[$depth] ) {
21858 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
21859 if ($rOpts_comma_arrow_breakpoints) {
21860 $want_comma_break[$depth] = 0;
21865 $self->set_forced_breakpoint($i)
21866 unless ( $next_nonblank_type eq '#' );
21868 # break before the previous token if it looks safe
21869 # Example of something that we will not try to break before:
21870 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
21871 # Also we don't want to break at a binary operator (like +):
21875 # $y - $R, -fill => 'black',
21877 my $ibreak = $index_before_arrow[$depth] - 1;
21879 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
21881 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
21882 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
21883 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
21885 # don't break before a comma, as in the following:
21886 # ( LONGER_THAN,=> 1,
21887 # EIGHTY_CHARACTERS,=> 2,
21888 # CAUSES_FORMATTING,=> 3,
21891 # This example is for -tso but should be general rule
21892 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
21893 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
21895 $self->set_forced_breakpoint($ibreak);
21900 $want_comma_break[$depth] = 0;
21901 $index_before_arrow[$depth] = -1;
21903 # handle list which mixes '=>'s and ','s:
21904 # treat any list items so far as an interrupted list
21905 $interrupted_list[$depth] = 1;
21909 # Break after all commas above starting depth...
21910 # But only if the last closing token was followed by a comma,
21911 # to avoid breaking a list operator (issue c119)
21912 if ( $depth < $starting_depth
21913 && $comma_follows_last_closing_token
21914 && !$dont_align[$depth] )
21916 $self->set_forced_breakpoint($i)
21917 unless ( $next_nonblank_type eq '#' );
21921 # add this comma to the list..
21922 my $item_count = $item_count_stack[$depth];
21923 if ( $item_count == 0 ) {
21925 # but do not form a list with no opening structure
21928 # open INFILE_COPY, ">$input_file_copy"
21929 # or die ("very long message");
21930 if ( ( $opening_structure_index_stack[$depth] < 0 )
21931 && $self->is_in_block_by_i($i) )
21933 $dont_align[$depth] = 1;
21937 $comma_index[$depth][$item_count] = $i;
21938 ++$item_count_stack[$depth];
21939 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
21940 $identifier_count_stack[$depth]++;
21943 } ## end sub study_comma
21947 my %poor_next_types;
21948 my %poor_next_keywords;
21952 # Setup filters for detecting very poor breaks to ignore.
21953 # b1097: old breaks after type 'L' and before 'R' are poor
21954 # b1450: old breaks at 'eq' and related operators are poor
21955 my @q = qw(== <= >= !=);
21957 @{poor_types}{@q} = (1) x scalar(@q);
21958 @{poor_next_types}{@q} = (1) x scalar(@q);
21959 $poor_types{'L'} = 1;
21960 $poor_next_types{'R'} = 1;
21962 @q = qw(eq ne le ge lt gt);
21963 @{poor_keywords}{@q} = (1) x scalar(@q);
21964 @{poor_next_keywords}{@q} = (1) x scalar(@q);
21967 sub examine_old_breakpoint {
21969 my ( $self, $i_next_nonblank, $i_want_previous_break,
21970 $i_old_assignment_break )
21973 # Look at an old breakpoint and set/update certain flags:
21975 # Given indexes of three tokens in this batch:
21976 # $i_next_nonblank - index of the next nonblank token
21977 # $i_want_previous_break - we want a break before this index
21978 # $i_old_assignment_break - the index of an '=' or equivalent
21980 # $old_breakpoint_count - a counter to increment unless poor break
21981 # Update and return:
21982 # $i_want_previous_break
21983 # $i_old_assignment_break
21985 #-----------------------
21986 # Filter out poor breaks
21987 #-----------------------
21988 # Just return if this is a poor break and pretend it does not exist.
21989 # Otherwise, poor breaks made under stress can cause instability.
21991 if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} }
21992 else { $poor_break ||= $poor_types{$type} }
21994 if ( $next_nonblank_type eq 'k' ) {
21995 $poor_break ||= $poor_next_keywords{$next_nonblank_token};
21997 else { $poor_break ||= $poor_next_types{$next_nonblank_type} }
21999 # Also ignore any high stress level breaks; fixes b1395
22000 $poor_break ||= $levels_to_go[$i] >= $high_stress_level;
22001 if ($poor_break) { goto RETURN }
22003 #--------------------------------------------
22004 # Not a poor break, so continue to examine it
22005 #--------------------------------------------
22006 $old_breakpoint_count++;
22008 $i_line_start = $i_next_nonblank;
22010 #---------------------------------------
22011 # Do we want to break before this token?
22012 #---------------------------------------
22014 # Break before certain keywords if user broke there and
22015 # this is a 'safe' break point. The idea is to retain
22016 # any preferred breaks for sequential list operations,
22017 # like a schwartzian transform.
22018 if ($rOpts_break_at_old_keyword_breakpoints) {
22020 $next_nonblank_type eq 'k'
22021 && $is_keyword_returning_list{$next_nonblank_token}
22022 && ( $type =~ /^[=\)\]\}Riw]$/
22023 || $type eq 'k' && $is_keyword_returning_list{$token} )
22027 # we actually have to set this break next time through
22028 # the loop because if we are at a closing token (such
22029 # as '}') which forms a one-line block, this break might
22032 # But do not do this at an '=' if:
22033 # - the user wants breaks before an equals (b434 b903)
22034 # - or -naws is set (can be unstable, see b1354)
22035 my $skip = $type eq '='
22036 && ( $want_break_before{$type}
22037 || !$rOpts_add_whitespace );
22039 $i_want_previous_break = $i
22045 # Break before attributes if user broke there
22046 if ($rOpts_break_at_old_attribute_breakpoints) {
22047 if ( $next_nonblank_type eq 'A' ) {
22048 $i_want_previous_break = $i;
22052 #---------------------------------
22053 # Is this an old assignment break?
22054 #---------------------------------
22055 if ( $is_assignment{$type} ) {
22056 $i_old_assignment_break = $i;
22058 elsif ( $is_assignment{$next_nonblank_type} ) {
22059 $i_old_assignment_break = $i_next_nonblank;
22063 return ( $i_want_previous_break, $i_old_assignment_break );
22064 } ## end sub examine_old_breakpoint
22066 sub break_lists_type_sequence {
22070 # We have encountered a sequenced token while setting list breakpoints
22072 # if closing type, one of } ) ] :
22073 if ( $is_closing_sequence_token{$token} ) {
22075 if ( $type eq ':' ) {
22076 $i_last_colon = $i;
22078 # retain break at a ':' line break
22079 if ( ( $i == $i_line_start || $i == $i_line_end )
22080 && $rOpts_break_at_old_ternary_breakpoints
22081 && $levels_to_go[$i] < $high_stress_level )
22084 $self->set_forced_breakpoint($i);
22086 # Break at a previous '=', but only if it is before
22087 # the mating '?'. Mate_index test fixes b1287.
22088 my $ieq = $i_equals[$depth];
22089 my $mix = $mate_index_to_go[$i];
22090 if ( !defined($mix) ) { $mix = -1 }
22091 if ( $ieq > 0 && $ieq < $mix ) {
22092 $self->set_forced_breakpoint( $i_equals[$depth] );
22093 $i_equals[$depth] = -1;
22098 # handle any postponed closing breakpoints
22099 if ( has_postponed_breakpoint($type_sequence) ) {
22100 my $inc = ( $type eq ':' ) ? 0 : 1;
22101 if ( $i >= $inc ) {
22102 $self->set_forced_breakpoint( $i - $inc );
22107 # must be opening token, one of { ( [ ?
22110 # set breaks at ?/: if they will get separated (and are
22111 # not a ?/: chain), or if the '?' is at the end of the
22113 if ( $token eq '?' ) {
22114 my $i_colon = $mate_index_to_go[$i];
22116 !defined($i_colon) # the ':' is not in this batch
22117 || $i == 0 # this '?' is the first token of the line
22118 || $i == $max_index_to_go # or this '?' is the last token
22122 # don't break if # this has a side comment, and
22123 # don't break at a '?' if preceded by ':' on
22124 # this line of previous ?/: pair on this line.
22125 # This is an attempt to preserve a chain of ?/:
22126 # expressions (elsif2.t).
22130 || $parent_seqno_to_go[$i_last_colon] !=
22131 $parent_seqno_to_go[$i]
22133 && $tokens_to_go[$max_index_to_go] ne '#'
22136 $self->set_forced_breakpoint($i);
22138 $self->set_closing_breakpoint($i);
22142 # must be one of { ( [
22145 # do requested -lp breaks at the OPENING token for BROKEN
22146 # blocks. NOTE: this can be done for both -lp and -xlp,
22147 # but only -xlp can really take advantage of this. So this
22148 # is currently restricted to -xlp to avoid excess changes to
22149 # existing -lp formatting.
22150 if ( $rOpts_extended_line_up_parentheses
22151 && !defined( $mate_index_to_go[$i] ) )
22154 $self->[_rlp_object_by_seqno_]->{$type_sequence};
22156 my $K_begin_line = $lp_object->get_K_begin_line();
22157 my $i_begin_line = $K_begin_line - $K_to_go[0];
22158 $self->set_forced_lp_break( $i_begin_line, $i );
22164 } ## end sub break_lists_type_sequence
22166 sub break_lists_increasing_depth {
22170 #--------------------------------------------
22171 # prepare for a new list when depth increases
22172 # token $i is a '(','{', or '['
22173 #--------------------------------------------
22175 #----------------------------------------------------------
22176 # BEGIN initialize depth arrays
22177 # ... use the same order as sub check_for_new_minimum_depth
22178 #----------------------------------------------------------
22179 $type_sequence_stack[$depth] = $type_sequence;
22181 $override_cab3[$depth] = undef;
22182 if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) {
22183 $override_cab3[$depth] =
22184 $self->[_roverride_cab3_]->{$type_sequence};
22187 $breakpoint_stack[$depth] = $forced_breakpoint_count;
22188 $container_type[$depth] =
22191 $is_container_label_type{$last_nonblank_type}
22192 ? $last_nonblank_token
22194 $identifier_count_stack[$depth] = 0;
22195 $index_before_arrow[$depth] = -1;
22196 $interrupted_list[$depth] = 0;
22197 $item_count_stack[$depth] = 0;
22198 $last_nonblank_type[$depth] = $last_nonblank_type;
22199 $opening_structure_index_stack[$depth] = $i;
22201 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
22202 $comma_index[$depth] = undef;
22203 $last_comma_index[$depth] = undef;
22204 $last_dot_index[$depth] = undef;
22205 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
22206 $has_old_logical_breakpoints[$depth] = 0;
22207 $rand_or_list[$depth] = [];
22208 $rfor_semicolon_list[$depth] = [];
22209 $i_equals[$depth] = -1;
22211 # if line ends here then signal closing token to break
22212 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) {
22213 $self->set_closing_breakpoint($i);
22216 # Not all lists of values should be vertically aligned..
22217 $dont_align[$depth] =
22219 # code BLOCKS are handled at a higher level
22220 ##( $block_type ne EMPTY_STRING )
22223 # certain paren lists
22224 || ( $type eq '(' ) && (
22226 # it does not usually look good to align a list of
22227 # identifiers in a parameter list, as in:
22228 # my($var1, $var2, ...)
22229 # (This test should probably be refined, for now I'm just
22230 # testing for any keyword)
22231 ( $last_nonblank_type eq 'k' )
22233 # a trailing '(' usually indicates a non-list
22234 || ( $next_nonblank_type eq '(' )
22236 $has_broken_sublist[$depth] = 0;
22237 $want_comma_break[$depth] = 0;
22239 #----------------------------
22240 # END initialize depth arrays
22241 #----------------------------
22243 # patch to outdent opening brace of long if/for/..
22244 # statements (like this one). See similar coding in
22245 # set_continuation breaks. We have also catch it here for
22246 # short line fragments which otherwise will not go through
22247 # break_long_lines.
22251 # if we have the ')' but not its '(' in this batch..
22252 && ( $last_nonblank_token eq ')' )
22253 && !defined( $mate_index_to_go[$i_last_nonblank_token] )
22255 # and user wants brace to left
22256 && !$rOpts_opening_brace_always_on_right
22258 && ( $type eq '{' ) # should be true
22259 && ( $token eq '{' ) # should be true
22262 $self->set_forced_breakpoint( $i - 1 );
22266 } ## end sub break_lists_increasing_depth
22268 sub break_lists_decreasing_depth {
22270 my ( $self, $rbond_strength_bias ) = @_;
22272 # We have arrived at a closing container token in sub break_lists:
22273 # the token at index $i is one of these: ')','}', ']'
22274 # A number of important breakpoints for this container can now be set
22275 # based on the information that we have collected. This includes:
22276 # - breaks at commas to format tables
22277 # - breaks at certain logical operators and other good breakpoints
22278 # - breaks at opening and closing containers if needed by selected
22279 # formatting styles
22280 # These breaks are made by calling sub 'set_forced_breakpoint'
22282 $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] )
22283 if ( $depth < $minimum_depth );
22285 # force all outer logical containers to break after we see on
22287 $has_old_logical_breakpoints[$depth] ||=
22288 $has_old_logical_breakpoints[$current_depth];
22290 # Patch to break between ') {' if the paren list is broken.
22291 # There is similar logic in break_long_lines for
22292 # non-broken lists.
22294 && $next_nonblank_block_type
22295 && $interrupted_list[$current_depth]
22296 && $next_nonblank_type eq '{'
22297 && !$rOpts_opening_brace_always_on_right )
22299 $self->set_forced_breakpoint($i);
22302 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
22304 #-----------------------------------------------------------------
22305 # Set breaks at commas to display a table of values if appropriate
22306 #-----------------------------------------------------------------
22307 my ( $bp_count, $do_not_break_apart ) = ( 0, 0 );
22308 ( $bp_count, $do_not_break_apart ) =
22309 $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias )
22310 if ( $item_count_stack[$current_depth] );
22312 #-----------------------------------------------------------
22313 # Now set flags needed to decide if we should break open the
22314 # container ... This is a long rambling section which has
22315 # grown over time to handle all situations.
22316 #-----------------------------------------------------------
22317 my $i_opening = $opening_structure_index_stack[$current_depth];
22318 my $saw_opening_structure = ( $i_opening >= 0 );
22320 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
22321 $lp_object = $self->[_rlp_object_by_seqno_]
22322 ->{ $type_sequence_to_go[$i_opening] };
22325 # this term is long if we had to break at interior commas..
22326 my $is_long_term = $bp_count > 0;
22328 # If this is a short container with one or more comma arrows,
22329 # then we will mark it as a long term to open it if requested.
22330 # $rOpts_comma_arrow_breakpoints =
22331 # 0 - open only if comma precedes closing brace
22332 # 1 - stable: except for one line blocks
22333 # 2 - try to form 1 line blocks
22335 # 4 - always open up if vt=0
22336 # 5 - stable: even for one line blocks if vt=0
22338 my $cab_flag = $rOpts_comma_arrow_breakpoints;
22340 # replace -cab=3 if overriden
22341 if ( $cab_flag == 3 && $type_sequence ) {
22342 my $test_cab = $self->[_roverride_cab3_]->{$type_sequence};
22343 if ( defined($test_cab) ) { $cab_flag = $test_cab }
22346 # PATCH: Modify the -cab flag if we are not processing a list:
22347 # We only want the -cab flag to apply to list containers, so
22348 # for non-lists we use the default and stable -cab=5 value.
22349 # Fixes case b939a.
22350 if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} )
22355 # Ignore old breakpoints when under stress.
22356 # Fixes b1203 b1204 as well as b1197-b1200.
22357 # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
22358 # b1264 to see if this check is still required at all, and
22359 # these still require a check, but at higher level beta+3
22360 # instead of beta: b1193 b780
22361 if ( $saw_opening_structure
22363 && $levels_to_go[$i_opening] >= $high_stress_level )
22367 # Do not break hash braces under stress (fixes b1238)
22368 $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
22370 # This option fixes b1235, b1237, b1240 with old and new
22371 # -lp, but formatting is nicer with next option.
22372 ## $is_long_term ||=
22373 ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
22375 # This option fixes b1240 but not b1235, b1237 with new -lp,
22376 # but this gives better formatting than the previous option.
22377 # TODO: see if stress_level_alpha should also be considered
22378 $do_not_break_apart ||=
22379 $levels_to_go[$i_opening] > $stress_level_beta;
22382 if ( !$is_long_term
22383 && $saw_opening_structure
22384 && $is_opening_token{ $tokens_to_go[$i_opening] }
22385 && $index_before_arrow[ $depth + 1 ] > 0
22386 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } )
22390 || $cab_flag == 0 && $last_nonblank_token eq ','
22391 || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
22394 # mark term as long if the length between opening and closing
22395 # parens exceeds allowed line length
22396 if ( !$is_long_term && $saw_opening_structure ) {
22398 my $i_opening_minus = $self->find_token_starting_list($i_opening);
22400 my $excess = $self->excess_line_length( $i_opening_minus, $i );
22402 # Use standard spaces for indentation of lists in -lp mode
22403 # if it gives a longer line length. This helps to avoid an
22404 # instability due to forming and breaking one-line blocks.
22405 # This fixes case b1314.
22406 my $indentation = $leading_spaces_to_go[$i_opening_minus];
22407 if ( ref($indentation)
22408 && $self->[_ris_broken_container_]->{$type_sequence} )
22410 my $lp_spaces = $indentation->get_spaces();
22411 my $std_spaces = $indentation->get_standard_spaces();
22412 my $diff = $std_spaces - $lp_spaces;
22413 if ( $diff > 0 ) { $excess += $diff }
22416 my $tol = $length_tol;
22418 # boost tol for an -lp container
22422 && ( $rOpts_extended_continuation_indentation
22423 || !$self->[_ris_list_by_seqno_]->{$type_sequence} )
22426 $tol += $lp_tol_boost;
22429 # Patch to avoid blinking with -bbxi=2 and -cab=2
22430 # in which variations in -ci cause unstable formatting
22431 # in edge cases. We just always add one ci level so that
22432 # the formatting is independent of the -BBX results.
22433 # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
22434 # b1161 b1166 b1167 b1168
22435 if ( !$ci_levels_to_go[$i_opening]
22436 && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence}
22439 $tol += $rOpts_continuation_indentation;
22442 $is_long_term = $excess + $tol > 0;
22446 # We've set breaks after all comma-arrows. Now we have to
22447 # undo them if this can be a one-line block
22448 # (the only breakpoints set will be due to comma-arrows)
22452 # user doesn't require breaking after all comma-arrows
22453 ( $cab_flag != 0 ) && ( $cab_flag != 4 )
22455 # and if the opening structure is in this batch
22456 && $saw_opening_structure
22458 # and either on the same old line
22460 $old_breakpoint_count_stack[$current_depth] ==
22461 $last_old_breakpoint_count
22463 # or user wants to form long blocks with arrows
22467 # and we made breakpoints between the opening and closing
22468 && ( $breakpoint_undo_stack[$current_depth] <
22469 $forced_breakpoint_undo_count )
22471 # and this block is short enough to fit on one line
22472 # Note: use < because need 1 more space for possible comma
22477 $self->undo_forced_breakpoint_stack(
22478 $breakpoint_undo_stack[$current_depth] );
22481 # now see if we have any comma breakpoints left
22482 my $has_comma_breakpoints =
22483 ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count );
22485 # update broken-sublist flag of the outer container
22486 $has_broken_sublist[$depth] =
22487 $has_broken_sublist[$depth]
22488 || $has_broken_sublist[$current_depth]
22490 || $has_comma_breakpoints;
22492 # Having come to the closing ')', '}', or ']', now we have to decide
22493 # if we should 'open up' the structure by placing breaks at the
22494 # opening and closing containers. This is a tricky decision. Here
22495 # are some of the basic considerations:
22497 # -If this is a BLOCK container, then any breakpoints will have
22498 # already been set (and according to user preferences), so we need do
22501 # -If we have a comma-separated list for which we can align the list
22502 # items, then we need to do so because otherwise the vertical aligner
22503 # cannot currently do the alignment.
22505 # -If this container does itself contain a container which has been
22506 # broken open, then it should be broken open to properly show the
22509 # -If there is nothing to align, and no other reason to break apart,
22510 # then do not do it.
22512 # We will not break open the parens of a long but 'simple' logical
22513 # expression. For example:
22515 # This is an example of a simple logical expression and its formatting:
22517 # if ( $bigwasteofspace1 && $bigwasteofspace2
22518 # || $bigwasteofspace3 && $bigwasteofspace4 )
22520 # Most people would prefer this than the 'spacey' version:
22523 # $bigwasteofspace1 && $bigwasteofspace2
22524 # || $bigwasteofspace3 && $bigwasteofspace4
22527 # To illustrate the rules for breaking logical expressions, consider:
22531 # and ( exists $ids_excl_uc{$id_uc}
22532 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
22534 # This is on the verge of being difficult to read. The current
22535 # default is to open it up like this:
22540 # and ( exists $ids_excl_uc{$id_uc}
22541 # or grep $id_uc =~ /$_/, @ids_excl_uc )
22544 # This is a compromise which tries to avoid being too dense and to
22545 # spacey. A more spaced version would be:
22551 # exists $ids_excl_uc{$id_uc}
22552 # or grep $id_uc =~ /$_/, @ids_excl_uc
22556 # Some people might prefer the spacey version -- an option could be
22557 # added. The innermost expression contains a long block '( exists
22560 # Here is how the logic goes: We will force a break at the 'or' that
22561 # the innermost expression contains, but we will not break apart its
22562 # opening and closing containers because (1) it contains no
22563 # multi-line sub-containers itself, and (2) there is no alignment to
22564 # be gained by breaking it open like this
22567 # exists $ids_excl_uc{$id_uc}
22568 # or grep $id_uc =~ /$_/, @ids_excl_uc
22571 # (although this looks perfectly ok and might be good for long
22572 # expressions). The outer 'if' container, though, contains a broken
22573 # sub-container, so it will be broken open to avoid too much density.
22574 # Also, since it contains no 'or's, there will be a forced break at
22577 # Handle the experimental flag --break-open-compact-parens
22578 # NOTE: This flag is not currently used and may eventually be removed.
22579 # If this flag is set, we will implement it by
22580 # pretending we did not see the opening structure, since in that case
22581 # parens always get opened up.
22582 if ( $saw_opening_structure
22583 && $rOpts_break_open_compact_parens )
22586 # This parameter is a one-character flag, as follows:
22587 # '0' matches no parens -> break open NOT OK
22588 # '1' matches all parens -> break open OK
22589 # Other values are same as used by the weld-exclusion-list
22590 my $flag = $rOpts_break_open_compact_parens;
22594 $saw_opening_structure = 0;
22598 # NOTE: $seqno will be equal to closure var $type_sequence here
22599 my $seqno = $type_sequence_to_go[$i_opening];
22600 $saw_opening_structure =
22601 !$self->match_paren_control_flag( $seqno, $flag );
22605 # Set some more flags telling something about this container..
22606 my $is_simple_logical_expression;
22607 if ( $item_count_stack[$current_depth] == 0
22608 && $saw_opening_structure
22609 && $tokens_to_go[$i_opening] eq '('
22610 && $is_logical_container{ $container_type[$current_depth] } )
22613 # This seems to be a simple logical expression with
22614 # no existing breakpoints. Set a flag to prevent
22616 if ( !$has_comma_breakpoints ) {
22617 $is_simple_logical_expression = 1;
22620 #---------------------------------------------------
22621 # This seems to be a simple logical expression with
22622 # breakpoints (broken sublists, for example). Break
22623 # at all 'or's and '||'s.
22624 #---------------------------------------------------
22626 $self->set_logical_breakpoints($current_depth);
22630 # break long terms at any C-style for semicolons (c154)
22632 && @{ $rfor_semicolon_list[$current_depth] } )
22634 $self->set_for_semicolon_breakpoints($current_depth);
22636 # and open up a long 'for' or 'foreach' container to allow
22637 # leading term alignment unless -lp is used.
22638 $has_comma_breakpoints = 1 unless ($lp_object);
22641 #----------------------------------------------------------------
22642 # FINALLY: Break open container according to the flags which have
22644 #----------------------------------------------------------------
22647 # breaks for code BLOCKS are handled at a higher level
22650 # we do not need to break at the top level of an 'if'
22652 && !$is_simple_logical_expression
22654 ## modification to keep ': (' containers vertically tight;
22655 ## but probably better to let user set -vt=1 to avoid
22656 ## inconsistency with other paren types
22657 ## && ($container_type[$current_depth] ne ':')
22659 # otherwise, we require one of these reasons for breaking:
22662 # - this term has forced line breaks
22663 $has_comma_breakpoints
22665 # - the opening container is separated from this batch
22666 # for some reason (comment, blank line, code block)
22667 # - this is a non-paren container spanning multiple lines
22668 || !$saw_opening_structure
22670 # - this is a long block contained in another breakable
22672 || $is_long_term && !$self->is_in_block_by_i($i_opening)
22677 # do special -lp breaks at the CLOSING token for INTACT
22678 # blocks (because we might not do them if the block does
22681 my $K_begin_line = $lp_object->get_K_begin_line();
22682 my $i_begin_line = $K_begin_line - $K_to_go[0];
22683 $self->set_forced_lp_break( $i_begin_line, $i_opening );
22686 # break after opening structure.
22687 # note: break before closing structure will be automatic
22688 if ( $minimum_depth <= $current_depth ) {
22690 if ( $i_opening >= 0 ) {
22691 if ( !$do_not_break_apart
22692 && !is_unbreakable_container($current_depth) )
22694 $self->set_forced_breakpoint($i_opening);
22696 # Do not let brace types L/R use vertical tightness
22697 # flags to recombine if we have to break on length
22698 # because instability is possible if both vt and vtc
22699 # flags are set ... see issue b1444.
22701 && $types_to_go[$i_opening] eq 'L'
22702 && $opening_vertical_tightness{'{'}
22703 && $closing_vertical_tightness{'}'} )
22705 my $seqno = $type_sequence_to_go[$i_opening];
22707 $self->[_rbreak_container_]->{$seqno} = 1;
22713 # break at ',' of lower depth level before opening token
22714 if ( $last_comma_index[$depth] ) {
22715 $self->set_forced_breakpoint( $last_comma_index[$depth] );
22718 # break at '.' of lower depth level before opening token
22719 if ( $last_dot_index[$depth] ) {
22720 $self->set_forced_breakpoint( $last_dot_index[$depth] );
22723 # break before opening structure if preceded by another
22724 # closing structure and a comma. This is normally
22725 # done by the previous closing brace, but not
22726 # if it was a one-line block.
22727 if ( $i_opening > 2 ) {
22729 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
22733 my $type_prev = $types_to_go[$i_prev];
22734 my $token_prev = $tokens_to_go[$i_prev];
22737 && ( $types_to_go[ $i_prev - 1 ] eq ')'
22738 || $types_to_go[ $i_prev - 1 ] eq '}' )
22741 $self->set_forced_breakpoint($i_prev);
22744 # also break before something like ':(' or '?('
22746 elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/
22747 && $want_break_before{$token_prev} )
22749 $self->set_forced_breakpoint($i_prev);
22754 # break after comma following closing structure
22755 if ( $types_to_go[ $i + 1 ] eq ',' ) {
22756 $self->set_forced_breakpoint( $i + 1 );
22759 # break before an '=' following closing structure
22761 $is_assignment{$next_nonblank_type}
22762 && ( $breakpoint_stack[$current_depth] !=
22763 $forced_breakpoint_count )
22766 $self->set_forced_breakpoint($i);
22769 # break at any comma before the opening structure Added
22770 # for -lp, but seems to be good in general. It isn't
22771 # obvious how far back to look; the '5' below seems to
22772 # work well and will catch the comma in something like
22773 # push @list, myfunc( $param, $param, ..
22775 my $icomma = $last_comma_index[$depth];
22776 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
22777 unless ( $forced_breakpoint_to_go[$icomma] ) {
22778 $self->set_forced_breakpoint($icomma);
22783 #-----------------------------------------------------------
22784 # Break open a logical container open if it was already open
22785 #-----------------------------------------------------------
22786 elsif ($is_simple_logical_expression
22787 && $has_old_logical_breakpoints[$current_depth] )
22789 $self->set_logical_breakpoints($current_depth);
22792 # Handle long container which does not get opened up
22793 elsif ($is_long_term) {
22795 # must set fake breakpoint to alert outer containers that
22797 set_fake_breakpoint();
22801 } ## end sub break_lists_decreasing_depth
22802 } ## end closure break_lists
22809 # Added 'w' to fix b1172
22810 my @q = qw(k w i Z ->);
22811 @is_kwiZ{@q} = (1) x scalar(@q);
22813 # added = for b1211
22814 @q = qw<( [ { L R } ] ) = b>;
22816 @is_key_type{@q} = (1) x scalar(@q);
22819 use constant DEBUG_FIND_START => 0;
22821 sub find_token_starting_list {
22823 # When testing to see if a block will fit on one line, some
22824 # previous token(s) may also need to be on the line; particularly
22825 # if this is a sub call. So we will look back at least one
22827 my ( $self, $i_opening_paren ) = @_;
22829 # This will be the return index
22830 my $i_opening_minus = $i_opening_paren;
22832 if ( $i_opening_minus <= 0 ) {
22833 return $i_opening_minus;
22836 my $im1 = $i_opening_paren - 1;
22837 my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
22838 if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
22840 $type_prev_nb = $types_to_go[$iprev_nb];
22843 if ( $type_prev_nb eq ',' ) {
22845 # a previous comma is a good break point
22846 # $i_opening_minus = $i_opening_paren;
22850 $tokens_to_go[$i_opening_paren] eq '('
22852 # non-parens added here to fix case b1186
22853 || $is_kwiZ{$type_prev_nb}
22856 $i_opening_minus = $im1;
22858 # Walk back to improve length estimate...
22859 # FIX for cases b1169 b1170 b1171: start walking back
22860 # at the previous nonblank. This makes the result insensitive
22861 # to the flag --space-function-paren, and similar.
22862 # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
22863 foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
22864 if ( $is_key_type{ $types_to_go[$j] } ) {
22867 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
22870 $i_opening_minus = $j;
22872 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
22875 DEBUG_FIND_START && print <<EOM;
22876 FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
22879 return $i_opening_minus;
22880 } ## end sub find_token_starting_list
22882 { ## begin closure table_maker
22884 my %is_keyword_with_special_leading_term;
22888 # These keywords have prototypes which allow a special leading item
22889 # followed by a list
22903 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
22906 use constant DEBUG_SPARSE => 0;
22910 # Given a list of comma-separated items, set breakpoints at some of
22911 # the commas, if necessary, to make it easy to read.
22912 # This is done by making calls to 'set_forced_breakpoint'.
22913 # This is a complex routine because there are many special cases.
22917 # The numerous variables involved are contained three hashes:
22918 # $rhash_IN : For contents see the calling routine
22919 # $rhash_A: For contents see return from sub 'table_layout_A'
22920 # $rhash_B: For contents see return from sub 'table_layout_B'
22922 my ( $self, $rhash_IN ) = @_;
22924 # Find lengths of all list items needed for calculating page layout
22925 my $rhash_A = table_layout_A($rhash_IN);
22926 return if ( !defined($rhash_A) );
22928 # Some variables received from caller...
22929 my $i_closing_paren = $rhash_IN->{i_closing_paren};
22930 my $i_opening_paren = $rhash_IN->{i_opening_paren};
22931 my $has_broken_sublist = $rhash_IN->{has_broken_sublist};
22932 my $interrupted = $rhash_IN->{interrupted};
22934 #-----------------------------------------
22935 # Section A: Handle some special cases ...
22936 #-----------------------------------------
22938 #-------------------------------------------------------------
22939 # Special Case A1: Compound List Rule 1:
22940 # Break at (almost) every comma for a list containing a broken
22941 # sublist. This has higher priority than the Interrupted List
22943 #-------------------------------------------------------------
22944 if ($has_broken_sublist) {
22946 $self->apply_broken_sublist_rule( $rhash_A, $interrupted );
22951 #--------------------------------------------------------------
22952 # Special Case A2: Interrupted List Rule:
22953 # A list is forced to use old breakpoints if it was interrupted
22954 # by side comments or blank lines, or requested by user.
22955 #--------------------------------------------------------------
22956 if ( $rOpts_break_at_old_comma_breakpoints
22958 || $i_opening_paren < 0 )
22960 my $i_first_comma = $rhash_A->{_i_first_comma};
22961 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
22962 $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
22966 #-----------------------------------------------------------------
22967 # Special Case A3: If it fits on one line, return and let the line
22968 # break logic decide if and where to break.
22969 #-----------------------------------------------------------------
22971 # The -bbxi=2 parameters can add an extra hidden level of indentation
22972 # so they need a tolerance to avoid instability. Fixes b1259, 1260.
22973 my $opening_token = $tokens_to_go[$i_opening_paren];
22975 if ( $break_before_container_types{$opening_token}
22976 && $container_indentation_options{$opening_token}
22977 && $container_indentation_options{$opening_token} == 2 )
22979 $tol = $rOpts_indent_columns;
22981 # use greater of -ci and -i (fix for case b1334)
22982 if ( $tol < $rOpts_continuation_indentation ) {
22983 $tol = $rOpts_continuation_indentation;
22987 my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
22989 $self->excess_line_length( $i_opening_minus, $i_closing_paren );
22990 return if ( $excess + $tol <= 0 );
22992 #---------------------------------------
22993 # Section B: Handle a multiline list ...
22994 #---------------------------------------
22996 $self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus );
22999 } ## end sub table_maker
23001 sub apply_broken_sublist_rule {
23003 my ( $self, $rhash_A, $interrupted ) = @_;
23005 my $ritem_lengths = $rhash_A->{_ritem_lengths};
23006 my $ri_term_begin = $rhash_A->{_ri_term_begin};
23007 my $ri_term_end = $rhash_A->{_ri_term_end};
23008 my $ri_term_comma = $rhash_A->{_ri_term_comma};
23009 my $item_count = $rhash_A->{_item_count_A};
23010 my $i_first_comma = $rhash_A->{_i_first_comma};
23011 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
23013 # Break at every comma except for a comma between two
23014 # simple, small terms. This prevents long vertical
23015 # columns of, say, just 0's.
23016 my $small_length = 10; # 2 + actual maximum length wanted
23018 # We'll insert a break in long runs of small terms to
23019 # allow alignment in uniform tables.
23020 my $skipped_count = 0;
23021 my $columns = table_columns_available($i_first_comma);
23022 my $fields = int( $columns / $small_length );
23023 if ( $rOpts_maximum_fields_per_table
23024 && $fields > $rOpts_maximum_fields_per_table )
23026 $fields = $rOpts_maximum_fields_per_table;
23028 my $max_skipped_count = $fields - 1;
23030 my $is_simple_last_term = 0;
23031 my $is_simple_next_term = 0;
23032 foreach my $j ( 0 .. $item_count ) {
23033 $is_simple_last_term = $is_simple_next_term;
23034 $is_simple_next_term = 0;
23035 if ( $j < $item_count
23036 && $ri_term_end->[$j] == $ri_term_begin->[$j]
23037 && $ritem_lengths->[$j] <= $small_length )
23039 $is_simple_next_term = 1;
23042 if ( $is_simple_last_term
23043 && $is_simple_next_term
23044 && $skipped_count < $max_skipped_count )
23049 $skipped_count = 0;
23050 my $i_tc = $ri_term_comma->[ $j - 1 ];
23051 last unless defined $i_tc;
23052 $self->set_forced_breakpoint($i_tc);
23056 # always break at the last comma if this list is
23057 # interrupted; we wouldn't want to leave a terminal '{', for
23059 if ($interrupted) {
23060 $self->set_forced_breakpoint($i_true_last_comma);
23063 } ## end sub apply_broken_sublist_rule
23065 sub set_emergency_comma_breakpoints {
23071 $number_of_fields_best,
23078 # The number of fields worked out to be negative, so we
23079 # have to make an emergency fix.
23081 my $rcomma_index = $rhash_IN->{rcomma_index};
23082 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
23083 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
23084 my $must_break_open = $rhash_IN->{must_break_open};
23086 # are we an item contained in an outer list?
23087 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
23089 # In many cases, it may be best to not force a break if there is just
23090 # one comma, because the standard continuation break logic will do a
23091 # better job without it.
23093 # In the common case that all but one of the terms can fit
23094 # on a single line, it may look better not to break open the
23095 # containing parens. Consider, for example
23099 # sort { $color_value{$::a} <=> $color_value{$::b}; }
23102 # which will look like this with the container broken:
23106 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
23109 # Here is an example of this rule for a long last term:
23111 # log_message( 0, 256, 128,
23112 # "Number of routes in adj-RIB-in to be considered: $peercount" );
23114 # And here is an example with a long first term:
23117 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
23118 # $r, $pu, $ps, $cu, $cs, $tt
23120 # if $style eq 'all';
23122 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
23124 my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0;
23125 my $long_first_term =
23126 $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <=
23129 # break at every comma ...
23132 # if requested by user or is best looking
23133 $number_of_fields_best == 1
23135 # or if this is a sublist of a larger list
23136 || $in_hierarchical_list
23138 # or if multiple commas and we don't have a long first or last
23140 || ( $comma_count > 1
23141 && !( $long_last_term || $long_first_term ) )
23144 foreach ( 0 .. $comma_count - 1 ) {
23145 $self->set_forced_breakpoint( $rcomma_index->[$_] );
23148 elsif ($long_last_term) {
23150 $self->set_forced_breakpoint($i_last_comma);
23151 ${$rdo_not_break_apart} = 1 unless $must_break_open;
23153 elsif ($long_first_term) {
23155 $self->set_forced_breakpoint($i_first_comma);
23159 # let breaks be defined by default bond strength logic
23162 } ## end sub set_emergency_comma_breakpoints
23164 sub break_multiline_list {
23165 my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_;
23167 # Overriden variables
23168 my $item_count = $rhash_A->{_item_count_A};
23169 my $identifier_count = $rhash_A->{_identifier_count_A};
23171 # Derived variables:
23172 my $ritem_lengths = $rhash_A->{_ritem_lengths};
23173 my $ri_term_begin = $rhash_A->{_ri_term_begin};
23174 my $ri_term_end = $rhash_A->{_ri_term_end};
23175 my $ri_term_comma = $rhash_A->{_ri_term_comma};
23176 my $rmax_length = $rhash_A->{_rmax_length};
23177 my $comma_count = $rhash_A->{_comma_count};
23178 my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
23179 my $first_term_length = $rhash_A->{_first_term_length};
23180 my $i_first_comma = $rhash_A->{_i_first_comma};
23181 my $i_last_comma = $rhash_A->{_i_last_comma};
23182 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
23184 # Veriables received from caller
23185 my $i_opening_paren = $rhash_IN->{i_opening_paren};
23186 my $i_closing_paren = $rhash_IN->{i_closing_paren};
23187 my $rcomma_index = $rhash_IN->{rcomma_index};
23188 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
23189 my $list_type = $rhash_IN->{list_type};
23190 my $interrupted = $rhash_IN->{interrupted};
23191 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
23192 my $must_break_open = $rhash_IN->{must_break_open};
23193 ## NOTE: these input vars from caller use the values from rhash_A (see above):
23194 ## my $item_count = $rhash_IN->{item_count};
23195 ## my $identifier_count = $rhash_IN->{identifier_count};
23197 # NOTE: i_opening_paren changes value below so we need to get these here
23198 my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
23199 my $opening_token = $tokens_to_go[$i_opening_paren];
23201 #---------------------------------------------------------------
23202 # Section B1: Determine '$number_of_fields' = the best number of
23203 # fields to use if this is to be formatted as a table.
23204 #---------------------------------------------------------------
23206 # Now we know that this block spans multiple lines; we have to set
23207 # at least one breakpoint -- real or fake -- as a signal to break
23208 # open any outer containers.
23209 set_fake_breakpoint();
23211 # Set a flag indicating if we need to break open to keep -lp
23212 # items aligned. This is necessary if any of the list terms
23213 # exceeds the available space after the '('.
23214 my $need_lp_break_open = $must_break_open;
23215 my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
23216 if ( $is_lp_formatting && !$must_break_open ) {
23217 my $columns_if_unbroken =
23218 $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
23219 - total_line_length( $i_opening_minus, $i_opening_paren );
23220 $need_lp_break_open =
23221 ( $rmax_length->[0] > $columns_if_unbroken )
23222 || ( $rmax_length->[1] > $columns_if_unbroken )
23223 || ( $first_term_length > $columns_if_unbroken );
23227 $self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting );
23228 return if ( !defined($hash_B) );
23230 # Updated variables
23231 $i_first_comma = $hash_B->{_i_first_comma_B};
23232 $i_opening_paren = $hash_B->{_i_opening_paren_B};
23233 $item_count = $hash_B->{_item_count_B};
23236 my $columns = $hash_B->{_columns};
23237 my $formatted_columns = $hash_B->{_formatted_columns};
23238 my $formatted_lines = $hash_B->{_formatted_lines};
23239 my $max_width = $hash_B->{_max_width};
23240 my $new_identifier_count = $hash_B->{_new_identifier_count};
23241 my $number_of_fields = $hash_B->{_number_of_fields};
23242 my $odd_or_even = $hash_B->{_odd_or_even};
23243 my $packed_columns = $hash_B->{_packed_columns};
23244 my $packed_lines = $hash_B->{_packed_lines};
23245 my $pair_width = $hash_B->{_pair_width};
23246 my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list};
23247 my $use_separate_first_term = $hash_B->{_use_separate_first_term};
23249 # are we an item contained in an outer list?
23250 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
23252 my $unused_columns = $formatted_columns - $packed_columns;
23254 # set some empirical parameters to help decide if we should try to
23255 # align; high sparsity does not look good, especially with few lines
23256 my $sparsity = ($unused_columns) / ($formatted_columns);
23257 my $max_allowed_sparsity =
23258 ( $item_count < 3 ) ? 0.1
23259 : ( $packed_lines == 1 ) ? 0.15
23260 : ( $packed_lines == 2 ) ? 0.4
23263 my $two_line_word_wrap_ok;
23264 if ( $opening_token eq '(' ) {
23266 # default is to allow wrapping of short paren lists
23267 $two_line_word_wrap_ok = 1;
23269 # but turn off word wrap where requested
23270 if ($rOpts_break_open_compact_parens) {
23272 # This parameter is a one-character flag, as follows:
23273 # '0' matches no parens -> break open NOT OK -> word wrap OK
23274 # '1' matches all parens -> break open OK -> word wrap NOT OK
23275 # Other values are the same as used by the weld-exclusion-list
23276 my $flag = $rOpts_break_open_compact_parens;
23280 $two_line_word_wrap_ok = 0;
23282 elsif ( $flag eq '0' ) {
23283 $two_line_word_wrap_ok = 1;
23286 my $seqno = $type_sequence_to_go[$i_opening_paren];
23287 $two_line_word_wrap_ok =
23288 !$self->match_paren_control_flag( $seqno, $flag );
23293 #-------------------------------------------------------------------
23294 # Section B2: Check for shortcut methods, which avoid treating
23295 # a list as a table for relatively small parenthesized lists. These
23296 # are usually easier to read if not formatted as tables.
23297 #-------------------------------------------------------------------
23299 $packed_lines <= 2 # probably can fit in 2 lines
23300 && $item_count < 9 # doesn't have too many items
23301 && $opening_is_in_block # not a sub-container
23302 && $two_line_word_wrap_ok # ok to wrap this paren list
23306 # Section B2A: Shortcut method 1: for -lp and just one comma:
23307 # This is a no-brainer, just break at the comma.
23309 $is_lp_formatting # -lp
23310 && $item_count == 2 # two items, one comma
23311 && !$must_break_open
23314 my $i_break = $rcomma_index->[0];
23315 $self->set_forced_breakpoint($i_break);
23316 ${$rdo_not_break_apart} = 1;
23321 # Section B2B: Shortcut method 2 is for most small ragged lists
23322 # which might look best if not displayed as a table.
23324 ( $number_of_fields == 2 && $item_count == 3 )
23326 $new_identifier_count > 0 # isn't all quotes
23327 && $sparsity > 0.15
23328 ) # would be fairly spaced gaps if aligned
23332 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
23333 $ri_ragged_break_list );
23334 ++$break_count if ($use_separate_first_term);
23336 # NOTE: we should really use the true break count here,
23337 # which can be greater if there are large terms and
23338 # little space, but usually this will work well enough.
23339 unless ($must_break_open) {
23341 if ( $break_count <= 1 ) {
23342 ${$rdo_not_break_apart} = 1;
23344 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
23345 ${$rdo_not_break_apart} = 1;
23351 } ## end shortcut methods
23354 DEBUG_SPARSE && do {
23356 # How many spaces across the page will we fill?
23357 my $columns_per_line =
23358 ( int $number_of_fields / 2 ) * $pair_width +
23359 ( $number_of_fields % 2 ) * $max_width;
23362 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
23366 #------------------------------------------------------------------
23367 # Section B3: Compound List Rule 2:
23368 # If this list is too long for one line, and it is an item of a
23369 # larger list, then we must format it, regardless of sparsity
23370 # (ian.t). One reason that we have to do this is to trigger
23371 # Compound List Rule 1, above, which causes breaks at all commas of
23372 # all outer lists. In this way, the structure will be properly
23374 #------------------------------------------------------------------
23376 # Decide if this list is too long for one line unless broken
23377 my $total_columns = table_columns_available($i_opening_paren);
23378 my $too_long = $packed_columns > $total_columns;
23380 # For a paren list, include the length of the token just before the
23381 # '(' because this is likely a sub call, and we would have to
23382 # include the sub name on the same line as the list. This is still
23383 # imprecise, but not too bad. (steve.t)
23384 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
23386 $too_long = $self->excess_line_length( $i_opening_minus,
23387 $i_effective_last_comma + 1 ) > 0;
23390 # TODO: For an item after a '=>', try to include the length of the
23391 # thing before the '=>'. This is crude and should be improved by
23392 # actually looking back token by token.
23393 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
23394 my $i_opening_minus_test = $i_opening_paren - 4;
23395 if ( $i_opening_minus >= 0 ) {
23396 $too_long = $self->excess_line_length( $i_opening_minus_test,
23397 $i_effective_last_comma + 1 ) > 0;
23401 # Always break lists contained in '[' and '{' if too long for 1 line,
23402 # and always break lists which are too long and part of a more complex
23404 my $must_break_open_container = $must_break_open
23406 && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
23408 #--------------------------------------------------------------------
23409 # Section B4: A table will work here. But do not attempt to align
23410 # columns if this is a tiny table or it would be too spaced. It
23411 # seems that the more packed lines we have, the sparser the list that
23412 # can be allowed and still look ok.
23413 #--------------------------------------------------------------------
23415 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
23416 || ( $formatted_lines < 2 )
23417 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
23420 #----------------------------------------------------------------
23421 # Section B4A: too sparse: would not look good aligned in a table
23422 #----------------------------------------------------------------
23424 # use old breakpoints if this is a 'big' list
23425 if ( $packed_lines > 2 && $item_count > 10 ) {
23426 write_logfile_entry("List sparse: using old breakpoints\n");
23427 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
23430 # let the continuation logic handle it if 2 lines
23433 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
23434 $ri_ragged_break_list );
23435 ++$break_count if ($use_separate_first_term);
23437 unless ($must_break_open_container) {
23438 if ( $break_count <= 1 ) {
23439 ${$rdo_not_break_apart} = 1;
23441 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
23442 ${$rdo_not_break_apart} = 1;
23449 #--------------------------------------------
23450 # Section B4B: Go ahead and format as a table
23451 #--------------------------------------------
23452 $self->write_formatted_table( $number_of_fields, $comma_count,
23453 $rcomma_index, $use_separate_first_term );
23456 } ## end sub break_multiline_list
23458 sub table_layout_A {
23460 my ($rhash_IN) = @_;
23462 # Find lengths of all list items needed to calculate page layout
23465 # - nothing if this list is empty, or
23466 # - a ref to a hash containg some derived parameters
23468 my $i_opening_paren = $rhash_IN->{i_opening_paren};
23469 my $i_closing_paren = $rhash_IN->{i_closing_paren};
23470 my $identifier_count = $rhash_IN->{identifier_count};
23471 my $rcomma_index = $rhash_IN->{rcomma_index};
23472 my $item_count = $rhash_IN->{item_count};
23474 # nothing to do if no commas seen
23475 return if ( $item_count < 1 );
23477 my $i_first_comma = $rcomma_index->[0];
23478 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
23479 my $i_last_comma = $i_true_last_comma;
23480 if ( $i_last_comma >= $max_index_to_go ) {
23482 return if ( $item_count < 1 );
23483 $i_last_comma = $rcomma_index->[ $item_count - 1 ];
23486 my $comma_count = $item_count;
23488 my $ritem_lengths = [];
23489 my $ri_term_begin = [];
23490 my $ri_term_end = [];
23491 my $ri_term_comma = [];
23493 my $rmax_length = [ 0, 0 ];
23496 my $first_term_length;
23497 my $i = $i_opening_paren;
23500 foreach my $j ( 0 .. $comma_count - 1 ) {
23501 $is_odd = 1 - $is_odd;
23502 $i_prev_plus = $i + 1;
23503 $i = $rcomma_index->[$j];
23506 ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' )
23510 ( $types_to_go[$i_prev_plus] eq 'b' )
23513 push @{$ri_term_begin}, $i_term_begin;
23514 push @{$ri_term_end}, $i_term_end;
23515 push @{$ri_term_comma}, $i;
23517 # note: currently adding 2 to all lengths (for comma and space)
23519 2 + token_sequence_length( $i_term_begin, $i_term_end );
23520 push @{$ritem_lengths}, $length;
23523 $first_term_length = $length;
23527 if ( $length > $rmax_length->[$is_odd] ) {
23528 $rmax_length->[$is_odd] = $length;
23533 # now we have to make a distinction between the comma count and item
23534 # count, because the item count will be one greater than the comma
23535 # count if the last item is not terminated with a comma
23537 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
23538 ? $i_last_comma + 1
23541 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
23542 ? $i_closing_paren - 2
23543 : $i_closing_paren - 1;
23544 my $i_effective_last_comma = $i_last_comma;
23546 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
23548 if ( $last_item_length > 0 ) {
23550 # add 2 to length because other lengths include a comma and a blank
23551 $last_item_length += 2;
23552 push @{$ritem_lengths}, $last_item_length;
23553 push @{$ri_term_begin}, $i_b + 1;
23554 push @{$ri_term_end}, $i_e;
23555 push @{$ri_term_comma}, undef;
23557 my $i_odd = $item_count % 2;
23559 if ( $last_item_length > $rmax_length->[$i_odd] ) {
23560 $rmax_length->[$i_odd] = $last_item_length;
23564 $i_effective_last_comma = $i_e + 1;
23566 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
23567 $identifier_count++;
23571 # be sure we do not extend beyond the current list length
23572 if ( $i_effective_last_comma >= $max_index_to_go ) {
23573 $i_effective_last_comma = $max_index_to_go - 1;
23576 # Return the hash of derived variables.
23579 # Updated variables
23580 _item_count_A => $item_count,
23581 _identifier_count_A => $identifier_count,
23584 _ritem_lengths => $ritem_lengths,
23585 _ri_term_begin => $ri_term_begin,
23586 _ri_term_end => $ri_term_end,
23587 _ri_term_comma => $ri_term_comma,
23588 _rmax_length => $rmax_length,
23589 _comma_count => $comma_count,
23590 _i_effective_last_comma => $i_effective_last_comma,
23591 _first_term_length => $first_term_length,
23592 _i_first_comma => $i_first_comma,
23593 _i_last_comma => $i_last_comma,
23594 _i_true_last_comma => $i_true_last_comma,
23597 } ## end sub table_layout_A
23599 sub table_layout_B {
23601 my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_;
23603 # Determine variables for the best table layout, including
23604 # the best number of fields.
23607 # - nothing if nothing more to do
23608 # - a ref to a hash containg some derived parameters
23610 # Variables from caller
23611 my $i_opening_paren = $rhash_IN->{i_opening_paren};
23612 my $list_type = $rhash_IN->{list_type};
23613 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
23614 my $rcomma_index = $rhash_IN->{rcomma_index};
23615 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
23617 # Table size variables
23618 my $comma_count = $rhash_A->{_comma_count};
23619 my $first_term_length = $rhash_A->{_first_term_length};
23620 my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
23621 my $i_first_comma = $rhash_A->{_i_first_comma};
23622 my $identifier_count = $rhash_A->{_identifier_count_A};
23623 my $item_count = $rhash_A->{_item_count_A};
23624 my $ri_term_begin = $rhash_A->{_ri_term_begin};
23625 my $ri_term_comma = $rhash_A->{_ri_term_comma};
23626 my $ri_term_end = $rhash_A->{_ri_term_end};
23627 my $ritem_lengths = $rhash_A->{_ritem_lengths};
23628 my $rmax_length = $rhash_A->{_rmax_length};
23630 # Specify if the list must have an even number of fields or not.
23631 # It is generally safest to assume an even number, because the
23632 # list items might be a hash list. But if we can be sure that
23633 # it is not a hash, then we can allow an odd number for more
23635 # 1 = odd field count ok, 2 = want even count
23636 my $odd_or_even = 2;
23638 $identifier_count >= $item_count - 1
23639 || $is_assignment{$next_nonblank_type}
23641 && $list_type ne '=>'
23642 && $list_type !~ /^[\:\?]$/ )
23648 # do we have a long first term which should be
23649 # left on a line by itself?
23650 my $use_separate_first_term = (
23651 $odd_or_even == 1 # only if we can use 1 field/line
23652 && $item_count > 3 # need several items
23653 && $first_term_length >
23654 2 * $rmax_length->[0] - 2 # need long first term
23655 && $first_term_length >
23656 2 * $rmax_length->[1] - 2 # need long first term
23659 # or do we know from the type of list that the first term should
23661 if ( !$use_separate_first_term ) {
23662 if ( $is_keyword_with_special_leading_term{$list_type} ) {
23663 $use_separate_first_term = 1;
23665 # should the container be broken open?
23666 if ( $item_count < 3 ) {
23667 if ( $i_first_comma - $i_opening_paren < 4 ) {
23668 ${$rdo_not_break_apart} = 1;
23671 elsif ($first_term_length < 20
23672 && $i_first_comma - $i_opening_paren < 4 )
23674 my $columns = table_columns_available($i_first_comma);
23675 if ( $first_term_length < $columns ) {
23676 ${$rdo_not_break_apart} = 1;
23683 if ($use_separate_first_term) {
23685 # ..set a break and update starting values
23686 $self->set_forced_breakpoint($i_first_comma);
23689 #---------------------------------------------------------------
23690 # Section B1A: Stop if one item remains ($i_first_comma = undef)
23691 #---------------------------------------------------------------
23692 # Fix for b1442: use '$item_count' here instead of '$comma_count'
23693 # to make the result independent of any trailing comma.
23694 return if ( $item_count <= 1 );
23696 $i_opening_paren = $i_first_comma;
23697 $i_first_comma = $rcomma_index->[1];
23698 shift @{$ritem_lengths};
23699 shift @{$ri_term_begin};
23700 shift @{$ri_term_end};
23701 shift @{$ri_term_comma};
23704 # if not, update the metrics to include the first term
23706 if ( $first_term_length > $rmax_length->[0] ) {
23707 $rmax_length->[0] = $first_term_length;
23711 # Field width parameters
23712 my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] );
23714 ( $rmax_length->[0] > $rmax_length->[1] )
23715 ? $rmax_length->[0]
23716 : $rmax_length->[1];
23718 # Number of free columns across the page width for laying out tables
23719 my $columns = table_columns_available($i_first_comma);
23721 # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
23722 # to break after an opening paren, then the maximum line length for the
23723 # first line could be less than the later lines. So we need to reduce
23724 # the line length. Normally, we will get a break after an opening
23725 # paren, but in some cases we might not.
23726 if ( $rOpts_variable_maximum_line_length
23727 && $tokens_to_go[$i_opening_paren] eq '('
23728 && @{$ri_term_begin} )
23730 my $ib = $ri_term_begin->[0];
23731 my $type = $types_to_go[$ib];
23733 # So far, the only known instance of this problem is when
23734 # a bareword follows an opening paren with -vmll
23735 if ( $type eq 'w' ) {
23737 # If a line starts with paren+space+terms, then its max length
23738 # could be up to ci+2-i spaces less than if the term went out
23739 # on a line after the paren. So..
23740 my $tol_w = max( 0,
23741 2 + $rOpts_continuation_indentation -
23742 $rOpts_indent_columns );
23743 $columns = max( 0, $columns - $tol_w );
23745 ## Here is the original b1210 fix, but it failed on b1216-b1218
23746 ##my $columns2 = table_columns_available($i_opening_paren);
23747 ##$columns = min( $columns, $columns2 );
23751 # Estimated maximum number of fields which fit this space.
23752 # This will be our first guess:
23753 my $number_of_fields_max =
23754 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
23756 my $number_of_fields = $number_of_fields_max;
23758 # Find the best-looking number of fields.
23759 # This will be our second guess, if possible.
23760 my ( $number_of_fields_best, $ri_ragged_break_list,
23761 $new_identifier_count )
23762 = $self->study_list_complexity( $ri_term_begin, $ri_term_end,
23763 $ritem_lengths, $max_width );
23765 if ( $number_of_fields_best != 0
23766 && $number_of_fields_best < $number_of_fields_max )
23768 $number_of_fields = $number_of_fields_best;
23772 elsif ($number_of_fields_best > 1
23773 && $number_of_fields_best > $number_of_fields_max )
23775 $number_of_fields_best = $number_of_fields_max;
23778 # If we are crowded and the -lp option is being used, try
23779 # to undo some indentation
23783 $number_of_fields == 0
23784 || ( $number_of_fields == 1
23785 && $number_of_fields != $number_of_fields_best )
23789 ( $number_of_fields, $number_of_fields_best, $columns ) =
23790 $self->lp_table_fix(
23796 $number_of_fields_best,
23804 # try for one column if two won't work
23805 if ( $number_of_fields <= 0 ) {
23806 $number_of_fields = int( $columns / $max_width );
23809 # The user can place an upper bound on the number of fields,
23810 # which can be useful for doing maintenance on tables
23811 if ( $rOpts_maximum_fields_per_table
23812 && $number_of_fields > $rOpts_maximum_fields_per_table )
23814 $number_of_fields = $rOpts_maximum_fields_per_table;
23817 # How many columns (characters) and lines would this container take
23818 # if no additional whitespace were added?
23819 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
23820 $i_effective_last_comma + 1 );
23821 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
23822 my $packed_lines = 1 + int( $packed_columns / $columns );
23824 #-----------------------------------------------------------------
23825 # Section B1B: Stop here if we did not compute a positive number of
23826 # fields. In this case we just have to bail out.
23827 #-----------------------------------------------------------------
23828 if ( $number_of_fields <= 0 ) {
23830 $self->set_emergency_comma_breakpoints(
23832 $number_of_fields_best,
23841 #------------------------------------------------------------------
23842 # Section B1B: We have a tentative field count that seems to work.
23843 # Now we must look more closely to determine if a table layout will
23844 # actually look okay.
23845 #------------------------------------------------------------------
23847 # How many lines will this require?
23848 my $formatted_lines = $item_count / ($number_of_fields);
23849 if ( $formatted_lines != int $formatted_lines ) {
23850 $formatted_lines = 1 + int $formatted_lines;
23853 # So far we've been trying to fill out to the right margin. But
23854 # compact tables are easier to read, so let's see if we can use fewer
23855 # fields without increasing the number of lines.
23856 $number_of_fields = compactify_table( $item_count, $number_of_fields,
23857 $formatted_lines, $odd_or_even );
23859 my $formatted_columns;
23861 if ( $number_of_fields > 1 ) {
23862 $formatted_columns =
23863 ( $pair_width * ( int( $item_count / 2 ) ) +
23864 ( $item_count % 2 ) * $max_width );
23867 $formatted_columns = $max_width * $item_count;
23869 if ( $formatted_columns < $packed_columns ) {
23870 $formatted_columns = $packed_columns;
23873 # Construce hash_B:
23876 # Updated variables
23877 _i_first_comma_B => $i_first_comma,
23878 _i_opening_paren_B => $i_opening_paren,
23879 _item_count_B => $item_count,
23882 _columns => $columns,
23883 _formatted_columns => $formatted_columns,
23884 _formatted_lines => $formatted_lines,
23885 _max_width => $max_width,
23886 _new_identifier_count => $new_identifier_count,
23887 _number_of_fields => $number_of_fields,
23888 _odd_or_even => $odd_or_even,
23889 _packed_columns => $packed_columns,
23890 _packed_lines => $packed_lines,
23891 _pair_width => $pair_width,
23892 _ri_ragged_break_list => $ri_ragged_break_list,
23893 _use_separate_first_term => $use_separate_first_term,
23895 } ## end sub table_layout_B
23899 # try to undo some -lp indentation to improve table formatting
23909 $number_of_fields_best,
23916 my $available_spaces =
23917 $self->get_available_spaces_to_go($i_first_comma);
23918 if ( $available_spaces > 0 ) {
23920 my $spaces_wanted = $max_width - $columns; # for 1 field
23922 if ( $number_of_fields_best == 0 ) {
23923 $number_of_fields_best =
23924 get_maximum_fields_wanted($ritem_lengths);
23927 if ( $number_of_fields_best != 1 ) {
23928 my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields
23929 if ( $available_spaces > $spaces_wanted_2 ) {
23930 $spaces_wanted = $spaces_wanted_2;
23934 if ( $spaces_wanted > 0 ) {
23935 my $deleted_spaces =
23936 $self->reduce_lp_indentation( $i_first_comma,
23940 if ( $deleted_spaces > 0 ) {
23941 $columns = table_columns_available($i_first_comma);
23942 $number_of_fields =
23943 maximum_number_of_fields( $columns, $odd_or_even,
23944 $max_width, $pair_width );
23946 if ( $number_of_fields_best == 1
23947 && $number_of_fields >= 1 )
23949 $number_of_fields = $number_of_fields_best;
23954 return ( $number_of_fields, $number_of_fields_best, $columns );
23955 } ## end sub lp_table_fix
23957 sub write_formatted_table {
23959 # Write a table of comma separated items with fixed number of fields
23960 my ( $self, $number_of_fields, $comma_count, $rcomma_index,
23961 $use_separate_first_term )
23964 write_logfile_entry(
23965 "List: auto formatting with $number_of_fields fields/row\n");
23967 my $j_first_break =
23968 $use_separate_first_term
23969 ? $number_of_fields
23970 : $number_of_fields - 1;
23972 my $j = $j_first_break;
23973 while ( $j < $comma_count ) {
23974 my $i_comma = $rcomma_index->[$j];
23975 $self->set_forced_breakpoint($i_comma);
23976 $j += $number_of_fields;
23979 } ## end sub write_formatted_table
23981 } ## end closure set_comma_breakpoint_final
23983 sub study_list_complexity {
23985 # Look for complex tables which should be formatted with one term per line.
23986 # Returns the following:
23988 # \@i_ragged_break_list = list of good breakpoints to avoid lines
23989 # which are hard to read
23990 # $number_of_fields_best = suggested number of fields based on
23991 # complexity; = 0 if any number may be used.
23993 my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
23994 my $item_count = @{$ri_term_begin};
23995 my $complex_item_count = 0;
23996 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
23997 my $i_max = @{$ritem_lengths} - 1;
23998 ##my @item_complexity;
24000 my $i_last_last_break = -3;
24001 my $i_last_break = -2;
24002 my @i_ragged_break_list;
24004 my $definitely_complex = 30;
24005 my $definitely_simple = 12;
24006 my $quote_count = 0;
24008 for my $i ( 0 .. $i_max ) {
24009 my $ib = $ri_term_begin->[$i];
24010 my $ie = $ri_term_end->[$i];
24012 # define complexity: start with the actual term length
24013 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
24015 ##TBD: join types here and check for variations
24016 ##my $str=join "", @tokens_to_go[$ib..$ie];
24019 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
24023 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
24027 if ( $ib eq $ie ) {
24028 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
24029 $complex_item_count++;
24030 $weighted_length *= 2;
24036 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
24037 $complex_item_count++;
24038 $weighted_length *= 2;
24040 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
24041 $weighted_length += 4;
24045 # add weight for extra tokens.
24046 $weighted_length += 2 * ( $ie - $ib );
24048 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
24049 ## print "# COMPLEXITY:$weighted_length $BUB\n";
24051 ##push @item_complexity, $weighted_length;
24053 # now mark a ragged break after this item it if it is 'long and
24055 if ( $weighted_length >= $definitely_complex ) {
24057 # if we broke after the previous term
24058 # then break before it too
24059 if ( $i_last_break == $i - 1
24061 && $i_last_last_break != $i - 2 )
24064 ## TODO: don't strand a small term
24065 pop @i_ragged_break_list;
24066 push @i_ragged_break_list, $i - 2;
24067 push @i_ragged_break_list, $i - 1;
24070 push @i_ragged_break_list, $i;
24071 $i_last_last_break = $i_last_break;
24072 $i_last_break = $i;
24075 # don't break before a small last term -- it will
24076 # not look good on a line by itself.
24077 elsif ($i == $i_max
24078 && $i_last_break == $i - 1
24079 && $weighted_length <= $definitely_simple )
24081 pop @i_ragged_break_list;
24085 my $identifier_count = $i_max + 1 - $quote_count;
24087 # Need more tuning here..
24088 if ( $max_width > 12
24089 && $complex_item_count > $item_count / 2
24090 && $number_of_fields_best != 2 )
24092 $number_of_fields_best = 1;
24095 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
24096 } ## end sub study_list_complexity
24098 sub get_maximum_fields_wanted {
24100 # Not all tables look good with more than one field of items.
24101 # This routine looks at a table and decides if it should be
24102 # formatted with just one field or not.
24103 # This coding is still under development.
24104 my ($ritem_lengths) = @_;
24106 my $number_of_fields_best = 0;
24108 # For just a few items, we tentatively assume just 1 field.
24109 my $item_count = @{$ritem_lengths};
24110 if ( $item_count <= 5 ) {
24111 $number_of_fields_best = 1;
24114 # For larger tables, look at it both ways and see what looks best
24118 my @max_length = ( 0, 0 );
24119 my @last_length_2 = ( undef, undef );
24120 my @first_length_2 = ( undef, undef );
24121 my $last_length = undef;
24122 my $total_variation_1 = 0;
24123 my $total_variation_2 = 0;
24124 my @total_variation_2 = ( 0, 0 );
24126 foreach my $j ( 0 .. $item_count - 1 ) {
24128 $is_odd = 1 - $is_odd;
24129 my $length = $ritem_lengths->[$j];
24130 if ( $length > $max_length[$is_odd] ) {
24131 $max_length[$is_odd] = $length;
24134 if ( defined($last_length) ) {
24135 my $dl = abs( $length - $last_length );
24136 $total_variation_1 += $dl;
24138 $last_length = $length;
24140 my $ll = $last_length_2[$is_odd];
24141 if ( defined($ll) ) {
24142 my $dl = abs( $length - $ll );
24143 $total_variation_2[$is_odd] += $dl;
24146 $first_length_2[$is_odd] = $length;
24148 $last_length_2[$is_odd] = $length;
24150 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
24152 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
24153 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
24154 $number_of_fields_best = 1;
24157 return ($number_of_fields_best);
24158 } ## end sub get_maximum_fields_wanted
24160 sub table_columns_available {
24161 my $i_first_comma = shift;
24163 $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
24164 leading_spaces_to_go($i_first_comma);
24166 # Patch: the vertical formatter does not line up lines whose lengths
24167 # exactly equal the available line length because of allowances
24168 # that must be made for side comments. Therefore, the number of
24169 # available columns is reduced by 1 character.
24172 } ## end sub table_columns_available
24174 sub maximum_number_of_fields {
24176 # how many fields will fit in the available space?
24177 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
24178 my $max_pairs = int( $columns / $pair_width );
24179 my $number_of_fields = $max_pairs * 2;
24180 if ( $odd_or_even == 1
24181 && $max_pairs * $pair_width + $max_width <= $columns )
24183 $number_of_fields++;
24185 return $number_of_fields;
24186 } ## end sub maximum_number_of_fields
24188 sub compactify_table {
24190 # given a table with a certain number of fields and a certain number
24191 # of lines, see if reducing the number of fields will make it look
24193 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
24194 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
24196 my $min_fields = $number_of_fields;
24198 while ($min_fields >= $odd_or_even
24199 && $min_fields * $formatted_lines >= $item_count )
24201 $number_of_fields = $min_fields;
24202 $min_fields -= $odd_or_even;
24205 return $number_of_fields;
24206 } ## end sub compactify_table
24208 sub set_ragged_breakpoints {
24210 # Set breakpoints in a list that cannot be formatted nicely as a
24212 my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
24214 my $break_count = 0;
24215 foreach ( @{$ri_ragged_break_list} ) {
24216 my $j = $ri_term_comma->[$_];
24218 $self->set_forced_breakpoint($j);
24222 return $break_count;
24223 } ## end sub set_ragged_breakpoints
24225 sub copy_old_breakpoints {
24226 my ( $self, $i_first_comma, $i_last_comma ) = @_;
24227 for my $i ( $i_first_comma .. $i_last_comma ) {
24228 if ( $old_breakpoint_to_go[$i] ) {
24230 # If the comma style is under certain controls, and if this is a
24231 # comma breakpoint with the comma is at the beginning of the next
24232 # line, then we must pass that index instead. This will allow sub
24233 # set_forced_breakpoints to check and follow the user settings. This
24234 # produces a uniform style and can prevent instability (b1422).
24236 # The flag '$controlled_comma_style' will be set if the user
24237 # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not
24238 # needed or set for the -boc flag.
24240 if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) {
24241 my $index = $inext_to_go[$ibreak];
24242 if ( $index > $ibreak && $types_to_go[$index] eq ',' ) {
24246 $self->set_forced_breakpoint($ibreak);
24250 } ## end sub copy_old_breakpoints
24253 my ( $self, $i, $j ) = @_;
24254 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
24257 my ( $a, $b, $c ) = caller();
24259 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
24262 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
24265 # shouldn't happen; non-critical error
24268 my ( $a, $b, $c ) = caller();
24270 NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go
24275 } ## end sub set_nobreaks
24277 ###############################################
24278 # CODE SECTION 12: Code for setting indentation
24279 ###############################################
24281 sub token_sequence_length {
24283 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
24284 my ( $ibeg, $iend ) = @_;
24286 # fix possible negative starting index
24287 if ( $ibeg < 0 ) { $ibeg = 0 }
24289 # returns 0 if index range is empty (some subs assume this)
24290 if ( $ibeg > $iend ) {
24294 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
24295 } ## end sub token_sequence_length
24297 sub total_line_length {
24299 # return length of a line of tokens ($ibeg .. $iend)
24300 my ( $ibeg, $iend ) = @_;
24302 # Start with the leading spaces on this line ...
24303 my $length = $leading_spaces_to_go[$ibeg];
24304 if ( ref($length) ) { $length = $length->get_spaces() }
24306 # ... then add the net token length
24308 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
24311 } ## end sub total_line_length
24313 sub excess_line_length {
24315 # return number of characters by which a line of tokens ($ibeg..$iend)
24316 # exceeds the allowable line length.
24317 # NOTE: profiling shows that efficiency of this routine is essential.
24319 my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
24321 # Start with the leading spaces on this line ...
24322 my $excess = $leading_spaces_to_go[$ibeg];
24323 if ( ref($excess) ) { $excess = $excess->get_spaces() }
24325 # ... then add the net token length, minus the maximum length
24327 $summed_lengths_to_go[ $iend + 1 ] -
24328 $summed_lengths_to_go[$ibeg] -
24329 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
24331 # ... and include right weld lengths unless requested not to
24332 if ( $total_weld_count
24333 && $type_sequence_to_go[$iend]
24334 && !$ignore_right_weld )
24336 my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
24337 $excess += $wr if defined($wr);
24341 } ## end sub excess_line_length
24345 # return the number of leading spaces associated with an indentation
24346 # variable $indentation is either a constant number of spaces or an object
24347 # with a get_spaces method.
24348 my $indentation = shift;
24349 return ref($indentation) ? $indentation->get_spaces() : $indentation;
24350 } ## end sub get_spaces
24352 sub get_recoverable_spaces {
24354 # return the number of spaces (+ means shift right, - means shift left)
24355 # that we would like to shift a group of lines with the same indentation
24356 # to get them to line up with their opening parens
24357 my $indentation = shift;
24358 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
24359 } ## end sub get_recoverable_spaces
24361 sub get_available_spaces_to_go {
24363 my ( $self, $ii ) = @_;
24364 my $item = $leading_spaces_to_go[$ii];
24366 # return the number of available leading spaces associated with an
24367 # indentation variable. $indentation is either a constant number of
24368 # spaces or an object with a get_available_spaces method.
24369 return ref($item) ? $item->get_available_spaces() : 0;
24370 } ## end sub get_available_spaces_to_go
24372 { ## begin closure set_lp_indentation
24374 use constant DEBUG_LP => 0;
24376 # Stack of -lp index objects which survives between batches.
24380 # The predicted position of the next opening container which may start
24381 # an -lp indentation level. This survives between batches.
24382 my $lp_position_predictor;
24386 # Index names for the -lp stack variables.
24387 # Do not combine with other BEGIN blocks (c101).
24391 _lp_ci_level_ => $i++,
24392 _lp_level_ => $i++,
24393 _lp_object_ => $i++,
24394 _lp_container_seqno_ => $i++,
24395 _lp_space_count_ => $i++,
24399 sub initialize_lp_vars {
24401 # initialize gnu variables for a new file;
24402 # must be called once at the start of a new file.
24404 $lp_position_predictor = 0;
24407 # we can turn off -lp if all levels will be at or above the cutoff
24408 if ( $high_stress_level <= 1 ) {
24409 $rOpts_line_up_parentheses = 0;
24410 $rOpts_extended_line_up_parentheses = 0;
24415 # initialize the leading whitespace stack to negative levels
24416 # so that we can never run off the end of the stack
24417 $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
24418 $rLP->[$max_lp_stack]->[_lp_level_] = -1;
24419 $rLP->[$max_lp_stack]->[_lp_object_] = undef;
24420 $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
24421 $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
24424 } ## end sub initialize_lp_vars
24426 # hashes for efficient testing
24432 my @q = qw< } ) ] >;
24433 @hash_test1{@q} = (1) x scalar(@q);
24436 @hash_test2{@q} = (1) x scalar(@q);
24437 @q = qw( . || && );
24438 @hash_test3{@q} = (1) x scalar(@q);
24441 # shared variables, re-initialized for each batch
24442 my $rlp_object_list;
24443 my $max_lp_object_list;
24444 my %lp_comma_count;
24445 my %lp_arrow_count;
24448 my $current_ci_level;
24452 my $K_last_nonblank;
24453 my $last_nonblank_token;
24454 my $last_nonblank_type;
24455 my $last_last_nonblank_type;
24457 sub set_lp_indentation {
24461 #------------------------------------------------------------------
24462 # Define the leading whitespace for all tokens in the current batch
24463 # when the -lp formatting is selected.
24464 #------------------------------------------------------------------
24466 return unless ($rOpts_line_up_parentheses);
24467 return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
24469 # List of -lp indentation objects created in this batch
24470 $rlp_object_list = [];
24471 $max_lp_object_list = -1;
24473 %lp_comma_count = ();
24474 %lp_arrow_count = ();
24475 $space_count = undef;
24476 $current_level = undef;
24477 $current_ci_level = undef;
24478 $ii_begin_line = 0;
24480 $stack_changed = 1;
24481 $K_last_nonblank = undef;
24482 $last_nonblank_token = EMPTY_STRING;
24483 $last_nonblank_type = EMPTY_STRING;
24484 $last_last_nonblank_type = EMPTY_STRING;
24486 my %last_lp_equals = ();
24488 my $rLL = $self->[_rLL_];
24489 my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
24493 # The 'starting_in_quote' flag means that the first token is the first
24494 # token of a line and it is also the continuation of some kind of
24495 # multi-line quote or pattern. It must have no added leading
24496 # whitespace, so we can skip it.
24497 if ($starting_in_quote) {
24501 my $Kpnb = $K_to_go[0] - 1;
24502 if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
24505 if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
24506 $K_last_nonblank = $Kpnb;
24509 if ( defined($K_last_nonblank) ) {
24510 $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
24511 $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
24514 #-----------------------------------
24515 # Loop over all tokens in this batch
24516 #-----------------------------------
24517 foreach my $ii ( $imin .. $max_index_to_go ) {
24519 my $type = $types_to_go[$ii];
24520 my $token = $tokens_to_go[$ii];
24521 my $level = $levels_to_go[$ii];
24522 my $ci_level = $ci_levels_to_go[$ii];
24523 my $total_depth = $nesting_depth_to_go[$ii];
24525 # get the top state from the stack if it has changed
24526 if ($stack_changed) {
24527 my $rLP_top = $rLP->[$max_lp_stack];
24528 my $lp_object = $rLP_top->[_lp_object_];
24530 ( $space_count, $current_level, $current_ci_level ) =
24531 @{ $lp_object->get_spaces_level_ci() };
24534 $current_ci_level = $rLP_top->[_lp_ci_level_];
24535 $current_level = $rLP_top->[_lp_level_];
24536 $space_count = $rLP_top->[_lp_space_count_];
24538 $stack_changed = 0;
24541 #------------------------------------------------------------
24542 # Break at a previous '=' if necessary to control line length
24543 #------------------------------------------------------------
24544 if ( $type eq '{' || $type eq '(' ) {
24545 $lp_comma_count{ $total_depth + 1 } = 0;
24546 $lp_arrow_count{ $total_depth + 1 } = 0;
24548 # If we come to an opening token after an '=' token of some
24549 # type, see if it would be helpful to 'break' after the '=' to
24551 my $ii_last_equals = $last_lp_equals{$total_depth};
24552 if ($ii_last_equals) {
24553 $self->lp_equals_break_check( $ii, $ii_last_equals );
24557 #------------------------
24558 # Handle decreasing depth
24559 #------------------------
24560 # Note that one token may have both decreasing and then increasing
24561 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
24562 # in this example we would first go back to (1,0) then up to (2,0)
24563 # in a single call.
24564 if ( $level < $current_level || $ci_level < $current_ci_level ) {
24565 $self->lp_decreasing_depth($ii);
24568 #------------------------
24569 # handle increasing depth
24570 #------------------------
24571 if ( $level > $current_level || $ci_level > $current_ci_level ) {
24572 $self->lp_increasing_depth($ii);
24575 #------------------
24576 # Handle all tokens
24577 #------------------
24578 if ( $type ne 'b' ) {
24580 # Count commas and look for non-list characters. Once we see a
24581 # non-list character, we give up and don't look for any more
24583 if ( $type eq '=>' ) {
24584 $lp_arrow_count{$total_depth}++;
24586 # remember '=>' like '=' for estimating breaks (but see
24587 # above note for b1035)
24588 $last_lp_equals{$total_depth} = $ii;
24591 elsif ( $type eq ',' ) {
24592 $lp_comma_count{$total_depth}++;
24595 elsif ( $is_assignment{$type} ) {
24596 $last_lp_equals{$total_depth} = $ii;
24599 # this token might start a new line if ..
24601 $ii > $ii_begin_line
24605 # this is the first nonblank token of the line
24606 $ii == 1 && $types_to_go[0] eq 'b'
24608 # or previous character was one of these:
24610 || $hash_test2{$last_nonblank_type}
24612 # or previous character was opening and this is not
24614 || ( $last_nonblank_type eq '{' && $type ne '}' )
24615 || ( $last_nonblank_type eq '(' and $type ne ')' )
24617 # or this token is one of these:
24618 # /^([\.]|\|\||\&\&)$/
24619 || $hash_test3{$type}
24621 # or this is a closing structure
24622 || ( $last_nonblank_type eq '}'
24623 && $last_nonblank_token eq $last_nonblank_type )
24625 # or previous token was keyword 'return'
24627 $last_nonblank_type eq 'k'
24628 && ( $last_nonblank_token eq 'return'
24632 # or starting a new line at certain keywords is fine
24634 && $is_if_unless_and_or_last_next_redo_return{
24637 # or this is after an assignment after a closing
24640 $is_assignment{$last_nonblank_type}
24643 $hash_test1{$last_last_nonblank_type}
24645 # and it is significantly to the right
24646 || $lp_position_predictor > (
24647 $maximum_line_length_at_level[$level] -
24648 $rOpts_maximum_line_length / 2
24655 check_for_long_gnu_style_lines($ii);
24656 $ii_begin_line = $ii;
24658 # back up 1 token if we want to break before that type
24659 # otherwise, we may strand tokens like '?' or ':' on a line
24660 if ( $ii_begin_line > 0 ) {
24662 $last_nonblank_type eq 'k'
24663 ? $want_break_before{$last_nonblank_token}
24664 : $want_break_before{$last_nonblank_type};
24665 $ii_begin_line-- if ($wbb);
24669 $K_last_nonblank = $K_to_go[$ii];
24670 $last_last_nonblank_type = $last_nonblank_type;
24671 $last_nonblank_type = $type;
24672 $last_nonblank_token = $token;
24674 } ## end if ( $type ne 'b' )
24676 # remember the predicted position of this token on the output line
24677 if ( $ii > $ii_begin_line ) {
24679 ## NOTE: this is a critical loop - the following call has been
24680 ## expanded for about 2x speedup:
24681 ## $lp_position_predictor =
24682 ## total_line_length( $ii_begin_line, $ii );
24684 my $indentation = $leading_spaces_to_go[$ii_begin_line];
24685 if ( ref($indentation) ) {
24686 $indentation = $indentation->get_spaces();
24688 $lp_position_predictor =
24690 $summed_lengths_to_go[ $ii + 1 ] -
24691 $summed_lengths_to_go[$ii_begin_line];
24694 $lp_position_predictor =
24695 $space_count + $token_lengths_to_go[$ii];
24698 # Store the indentation object for this token.
24699 # This allows us to manipulate the leading whitespace
24700 # (in case we have to reduce indentation to fit a line) without
24701 # having to change any token values.
24703 #---------------------------------------------------------------
24704 # replace leading whitespace with indentation objects where used
24705 #---------------------------------------------------------------
24706 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
24707 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
24708 $leading_spaces_to_go[$ii] = $lp_object;
24709 if ( $max_lp_stack > 0
24711 && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
24713 $reduced_spaces_to_go[$ii] =
24714 $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
24717 $reduced_spaces_to_go[$ii] = $lp_object;
24720 } ## end loop over all tokens in this batch
24722 undo_incomplete_lp_indentation()
24723 if ( !$rOpts_extended_line_up_parentheses );
24726 } ## end sub set_lp_indentation
24728 sub lp_equals_break_check {
24730 my ( $self, $ii, $ii_last_equals ) = @_;
24732 # If we come to an opening token after an '=' token of some
24733 # type, see if it would be helpful to 'break' after the '=' to
24737 # $ii = index of an opening token in the output batch
24738 # $ii_begin_line = index of token starting next output line
24740 # $lp_position_predictor - updated position predictor
24741 # $ii_begin_line = updated starting token index
24743 # Skip an empty set of parens, such as after channel():
24744 # my $exchange = $self->_channel()->exchange(
24745 # This fixes issues b1318 b1322 b1323 b1328
24746 my $is_empty_container;
24747 if ( $ii_last_equals && $ii < $max_index_to_go ) {
24748 my $seqno = $type_sequence_to_go[$ii];
24749 my $inext_nb = $ii + 1;
24751 if ( $types_to_go[$inext_nb] eq 'b' );
24752 my $seqno_nb = $type_sequence_to_go[$inext_nb];
24753 $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno;
24756 if ( $ii_last_equals
24757 && $ii_last_equals > $ii_begin_line
24758 && !$is_empty_container )
24761 my $seqno = $type_sequence_to_go[$ii];
24763 # find the position if we break at the '='
24764 my $i_test = $ii_last_equals;
24766 # Fix for issue b1229, check if want break before this token
24767 # Fix for issue b1356, if i_test is a blank, the leading spaces may
24768 # be incorrect (if it was an interline blank).
24769 # Fix for issue b1357 .. b1370, i_test must be prev nonblank
24770 # ( the ci value for blanks can vary )
24771 # See also case b223
24772 # Fix for issue b1371-b1374 : all of these and the above are fixed
24773 # by simply backing up one index and setting the leading spaces of
24774 # a blank equal to that of the equals.
24775 if ( $want_break_before{ $types_to_go[$i_test] } ) {
24777 $leading_spaces_to_go[$i_test] =
24778 $leading_spaces_to_go[$ii_last_equals]
24779 if ( $types_to_go[$i_test] eq 'b' );
24781 elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
24783 my $test_position = total_line_length( $i_test, $ii );
24784 my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
24786 #------------------------------------------------------
24787 # Break if structure will reach the maximum line length
24788 #------------------------------------------------------
24790 # Historically, -lp just used one-half line length here
24791 my $len_increase = $rOpts_maximum_line_length / 2;
24793 # For -xlp, we can also use the pre-computed lengths
24794 my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno};
24795 if ( $min_len && $min_len > $len_increase ) {
24796 $len_increase = $min_len;
24801 # if we might exceed the maximum line length
24802 $lp_position_predictor + $len_increase > $mll
24804 # if a -bbx flag WANTS a break before this opening token
24806 && $self->[_rbreak_before_container_by_seqno_]->{$seqno} )
24808 # or we are beyond the 1/4 point and there was an old
24809 # break at an assignment (not '=>') [fix for b1035]
24811 $lp_position_predictor >
24812 $mll - $rOpts_maximum_line_length * 3 / 4
24813 && $types_to_go[$ii_last_equals] ne '=>'
24815 $old_breakpoint_to_go[$ii_last_equals]
24816 || ( $ii_last_equals > 0
24817 && $old_breakpoint_to_go[ $ii_last_equals - 1 ] )
24818 || ( $ii_last_equals > 1
24819 && $types_to_go[ $ii_last_equals - 1 ] eq 'b'
24820 && $old_breakpoint_to_go[ $ii_last_equals - 2 ] )
24826 # then make the switch -- note that we do not set a
24827 # real breakpoint here because we may not really need
24828 # one; sub break_lists will do that if necessary.
24830 my $Kc = $self->[_K_closing_container_]->{$seqno};
24833 # For -lp, only if the closing token is in this
24834 # batch (c117). Otherwise it cannot be done by sub
24836 defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
24838 # For -xlp, we only need one nonblank token after
24839 # the opening token.
24840 || $rOpts_extended_line_up_parentheses
24843 $ii_begin_line = $i_test + 1;
24844 $lp_position_predictor = $test_position;
24846 #--------------------------------------------------
24847 # Fix for an opening container terminating a batch:
24848 #--------------------------------------------------
24849 # To get alignment of a -lp container with its
24850 # contents, we have to put a break after $i_test.
24851 # For $ii<$max_index_to_go, this will be done by
24852 # sub break_lists based on the indentation object.
24853 # But for $ii=$max_index_to_go, the indentation
24854 # object for this seqno will not be created until
24855 # the next batch, so we have to set a break at
24856 # $i_test right now in order to get one.
24857 if ( $ii == $max_index_to_go
24858 && !$block_type_to_go[$ii]
24859 && $types_to_go[$ii] eq '{'
24861 && !$self->[_ris_excluded_lp_container_]->{$seqno} )
24863 $self->set_forced_lp_break( $ii_begin_line, $ii );
24869 } ## end sub lp_equals_break_check
24871 sub lp_decreasing_depth {
24872 my ( $self, $ii ) = @_;
24874 my $rLL = $self->[_rLL_];
24876 my $level = $levels_to_go[$ii];
24877 my $ci_level = $ci_levels_to_go[$ii];
24879 # loop to find the first entry at or completely below this level
24882 # Be sure we have not hit the stack bottom - should never
24883 # happen because only negative levels can get here, and
24884 # $level was forced to be positive above.
24885 if ( !$max_lp_stack ) {
24887 # non-fatal, just keep going except in DEVEL_MODE
24890 program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
24896 # save index of token which closes this level
24897 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
24898 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
24900 $lp_object->set_closed($ii);
24902 my $comma_count = 0;
24903 my $arrow_count = 0;
24904 my $type = $types_to_go[$ii];
24905 if ( $type eq '}' || $type eq ')' ) {
24906 my $total_depth = $nesting_depth_to_go[$ii];
24907 $comma_count = $lp_comma_count{$total_depth};
24908 $arrow_count = $lp_arrow_count{$total_depth};
24909 $comma_count = 0 unless $comma_count;
24910 $arrow_count = 0 unless $arrow_count;
24913 $lp_object->set_comma_count($comma_count);
24914 $lp_object->set_arrow_count($arrow_count);
24916 # Undo any extra indentation if we saw no commas
24917 my $available_spaces = $lp_object->get_available_spaces();
24918 my $K_start = $lp_object->get_K_begin_line();
24920 if ( $available_spaces > 0
24921 && $K_start >= $K_to_go[0]
24922 && ( $comma_count <= 0 || $arrow_count > 0 ) )
24925 my $i = $lp_object->get_lp_item_index();
24927 # Safety check for a valid stack index. It
24928 # should be ok because we just checked that the
24929 # index K of the token associated with this
24930 # indentation is in this batch.
24931 if ( $i < 0 || $i > $max_lp_object_list ) {
24932 my $KK = $K_to_go[$ii];
24933 my $lno = $rLL->[$KK]->[_LINE_INDEX_];
24934 DEVEL_MODE && Fault(<<EOM);
24935 Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
24940 if ( $arrow_count == 0 ) {
24941 $rlp_object_list->[$i]
24942 ->permanently_decrease_available_spaces(
24943 $available_spaces);
24946 $rlp_object_list->[$i]
24947 ->tentatively_decrease_available_spaces(
24948 $available_spaces);
24950 foreach my $j ( $i + 1 .. $max_lp_object_list ) {
24951 $rlp_object_list->[$j]
24952 ->decrease_SPACES($available_spaces);
24957 # go down one level
24960 my $rLP_top = $rLP->[$max_lp_stack];
24961 my $ci_lev = $rLP_top->[_lp_ci_level_];
24962 my $lev = $rLP_top->[_lp_level_];
24963 my $spaces = $rLP_top->[_lp_space_count_];
24964 if ( $rLP_top->[_lp_object_] ) {
24965 my $lp_obj = $rLP_top->[_lp_object_];
24966 ( $spaces, $lev, $ci_lev ) =
24967 @{ $lp_obj->get_spaces_level_ci() };
24970 # stop when we reach a level at or below the current
24972 if ( $lev <= $level && $ci_lev <= $ci_level ) {
24973 $space_count = $spaces;
24974 $current_level = $lev;
24975 $current_ci_level = $ci_lev;
24980 } ## end sub lp_decreasing_depth
24982 sub lp_increasing_depth {
24983 my ( $self, $ii ) = @_;
24985 my $rLL = $self->[_rLL_];
24987 my $type = $types_to_go[$ii];
24988 my $level = $levels_to_go[$ii];
24989 my $ci_level = $ci_levels_to_go[$ii];
24991 $stack_changed = 1;
24993 # Compute the standard incremental whitespace. This will be
24994 # the minimum incremental whitespace that will be used. This
24995 # choice results in a smooth transition between the gnu-style
24996 # and the standard style.
24997 my $standard_increment =
24998 ( $level - $current_level ) * $rOpts_indent_columns +
24999 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
25001 # Now we have to define how much extra incremental space
25002 # ("$available_space") we want. This extra space will be
25003 # reduced as necessary when long lines are encountered or when
25004 # it becomes clear that we do not have a good list.
25005 my $available_spaces = 0;
25006 my $align_seqno = 0;
25009 my $last_nonblank_seqno;
25010 my $last_nonblank_block_type;
25011 if ( defined($K_last_nonblank) ) {
25012 $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
25013 $last_nonblank_block_type =
25014 $last_nonblank_seqno
25015 ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno}
25019 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
25021 #-----------------------------------------------
25022 # Initialize indentation spaces on empty stack..
25023 #-----------------------------------------------
25024 if ( $max_lp_stack == 0 ) {
25025 $space_count = $level * $rOpts_indent_columns;
25028 #----------------------------------------
25029 # Add the standard space increment if ...
25030 #----------------------------------------
25033 # if this is a BLOCK, add the standard increment
25034 $last_nonblank_block_type
25036 # or if this is not a sequenced item
25037 || !$last_nonblank_seqno
25039 # or this container is excluded by user rules
25040 # or contains here-docs or multiline qw text
25041 || defined($last_nonblank_seqno)
25042 && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno}
25044 # or if last nonblank token was not structural indentation
25045 || $last_nonblank_type ne '{'
25047 # and do not start -lp under stress .. fixes b1244, b1255
25048 || !$in_lp_mode && $level >= $high_stress_level
25053 # If we have entered lp mode, use the top lp object to get
25054 # the current indentation spaces because it may have
25055 # changed. Fixes b1285, b1286.
25057 $space_count = $in_lp_mode->get_spaces();
25059 $space_count += $standard_increment;
25062 #---------------------------------------------------------------
25063 # -lp mode: try to use space to the first non-blank level change
25064 #---------------------------------------------------------------
25067 # see how much space we have available
25068 my $test_space_count = $lp_position_predictor;
25071 $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno};
25072 my $next_opening_too_far;
25074 if ( defined($min_len) ) {
25076 $test_space_count +
25078 $maximum_line_length_at_level[$level];
25079 if ( $excess > 0 ) {
25080 $test_space_count -= $excess;
25082 # will the next opening token be a long way out?
25083 $next_opening_too_far =
25084 $lp_position_predictor + $excess >
25085 $maximum_line_length_at_level[$level];
25089 my $rLP_top = $rLP->[$max_lp_stack];
25090 my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
25091 if ( $rLP_top->[_lp_object_] ) {
25092 $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces();
25094 $available_spaces = $test_space_count - $min_gnu_indentation;
25096 # Do not startup -lp indentation mode if no space ...
25097 # ... or if it puts the opening far to the right
25099 && ( $available_spaces <= 0 || $next_opening_too_far ) )
25101 $space_count += $standard_increment;
25102 $available_spaces = 0;
25107 $space_count = $test_space_count;
25110 if ( $available_spaces >= $standard_increment ) {
25111 $min_gnu_indentation += $standard_increment;
25113 elsif ( $available_spaces > 1 ) {
25114 $min_gnu_indentation += $available_spaces + 1;
25116 # The "+1" space can cause mis-alignment if there is no
25117 # blank space between the opening paren and the next
25118 # nonblank token (i.e., -pt=2) and the container does not
25119 # get broken open. So we will mark this token for later
25120 # space removal by sub 'xlp_tweak' if this container
25121 # remains intact (issue git #106).
25125 # Skip if the maximum line length is exceeded here
25128 # This is only for level changes, not ci level changes.
25129 # But note: this test is here out of caution but I have
25130 # not found a case where it is actually necessary.
25131 && $is_opening_token{$last_nonblank_token}
25133 # Be sure we are at consecutive nonblanks. This test
25134 # should be true, but it guards against future coding
25135 # changes to level values assigned to blank spaces.
25137 && $types_to_go[ $ii - 1 ] ne 'b'
25141 $K_extra_space = $K_to_go[$ii];
25144 elsif ( $is_opening_token{$last_nonblank_token} ) {
25145 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
25146 $min_gnu_indentation += 2;
25149 $min_gnu_indentation += 1;
25153 $min_gnu_indentation += $standard_increment;
25155 $available_spaces = $space_count - $min_gnu_indentation;
25157 if ( $available_spaces < 0 ) {
25158 $space_count = $min_gnu_indentation;
25159 $available_spaces = 0;
25161 $align_seqno = $last_nonblank_seqno;
25165 #-------------------------------------------
25166 # update the state, but not on a blank token
25167 #-------------------------------------------
25168 if ( $type ne 'b' ) {
25170 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
25171 $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
25175 #----------------------------------------
25176 # Create indentation object if in lp-mode
25177 #----------------------------------------
25182 # A negative level implies not to store the item in the
25184 my $lp_item_index = 0;
25185 if ( $level >= 0 ) {
25186 $lp_item_index = ++$max_lp_object_list;
25189 my $K_begin_line = 0;
25190 if ( $ii_begin_line >= 0
25191 && $ii_begin_line <= $max_index_to_go )
25193 $K_begin_line = $K_to_go[$ii_begin_line];
25196 # Minor Fix: when creating indentation at a side
25197 # comment we don't know what the space to the actual
25198 # next code token will be. We will allow a space for
25199 # sub correct_lp to move it in if necessary.
25201 && $max_index_to_go > 0
25204 $available_spaces += 1;
25207 my $standard_spaces = $leading_spaces_to_go[$ii];
25208 $lp_object = Perl::Tidy::IndentationItem->new(
25209 spaces => $space_count,
25211 ci_level => $ci_level,
25212 available_spaces => $available_spaces,
25213 lp_item_index => $lp_item_index,
25214 align_seqno => $align_seqno,
25215 stack_depth => $max_lp_stack,
25216 K_begin_line => $K_begin_line,
25217 standard_spaces => $standard_spaces,
25218 K_extra_space => $K_extra_space,
25222 my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
25223 my $token = $tokens_to_go[$ii];
25224 print STDERR <<EOM;
25225 DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
25229 if ( $level >= 0 ) {
25230 $rlp_object_list->[$max_lp_object_list] = $lp_object;
25233 if ( $is_opening_token{$last_nonblank_token}
25234 && $last_nonblank_seqno )
25236 $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} =
25241 #------------------------------------
25242 # Store this indentation on the stack
25243 #------------------------------------
25244 $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
25245 $rLP->[$max_lp_stack]->[_lp_level_] = $level;
25246 $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
25247 $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
25248 $last_nonblank_seqno;
25249 $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
25251 # If the opening paren is beyond the half-line length, then
25252 # we will use the minimum (standard) indentation. This will
25253 # help avoid problems associated with running out of space
25254 # near the end of a line. As a result, in deeply nested
25255 # lists, there will be some indentations which are limited
25256 # to this minimum standard indentation. But the most deeply
25257 # nested container will still probably be able to shift its
25258 # parameters to the right for proper alignment, so in most
25259 # cases this will not be noticeable.
25260 if ( $available_spaces > 0 && $lp_object ) {
25262 $maximum_line_length_at_level[$level] -
25263 $rOpts_maximum_line_length / 2;
25264 $lp_object->tentatively_decrease_available_spaces(
25266 if ( $space_count > $halfway );
25270 } ## end sub lp_increasing_depth
25272 sub check_for_long_gnu_style_lines {
25274 # look at the current estimated maximum line length, and
25275 # remove some whitespace if it exceeds the desired maximum
25276 my ($ii_to_go) = @_;
25278 # nothing can be done if no stack items defined for this line
25279 return if ( $max_lp_object_list < 0 );
25281 # See if we have exceeded the maximum desired line length ..
25282 # keep 2 extra free because they are needed in some cases
25283 # (result of trial-and-error testing)
25286 # But reduce tol to 0 at a terminal comma; fixes b1432
25287 if ( $tokens_to_go[$ii_to_go] eq ','
25288 && $ii_to_go < $max_index_to_go )
25290 my $in = $ii_to_go + 1;
25291 if ( $types_to_go[$in] eq 'b' && $in < $max_index_to_go ) { $in++ }
25292 if ( $is_closing_token{ $tokens_to_go[$in] } ) {
25297 my $spaces_needed =
25298 $lp_position_predictor -
25299 $maximum_line_length_at_level[ $levels_to_go[$ii_to_go] ] +
25302 return if ( $spaces_needed <= 0 );
25304 # We are over the limit, so try to remove a requested number of
25305 # spaces from leading whitespace. We are only allowed to remove
25306 # from whitespace items created on this batch, since others have
25307 # already been used and cannot be undone.
25308 my @candidates = ();
25310 # loop over all whitespace items created for the current batch
25311 foreach my $i ( 0 .. $max_lp_object_list ) {
25312 my $item = $rlp_object_list->[$i];
25314 # item must still be open to be a candidate (otherwise it
25315 # cannot influence the current token)
25316 next if ( $item->get_closed() >= 0 );
25318 my $available_spaces = $item->get_available_spaces();
25320 if ( $available_spaces > 0 ) {
25321 push( @candidates, [ $i, $available_spaces ] );
25325 return unless (@candidates);
25327 # sort by available whitespace so that we can remove whitespace
25328 # from the maximum available first.
25330 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates;
25332 # keep removing whitespace until we are done or have no more
25333 foreach my $candidate (@candidates) {
25334 my ( $i, $available_spaces ) = @{$candidate};
25335 my $deleted_spaces =
25336 ( $available_spaces > $spaces_needed )
25338 : $available_spaces;
25340 # remove the incremental space from this item
25341 $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
25345 # update the leading whitespace of this item and all items
25346 # that came after it
25348 while ( ++$i <= $max_lp_object_list ) {
25350 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
25351 if ( $old_spaces >= $deleted_spaces ) {
25352 $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
25355 # shouldn't happen except for code bug:
25357 # non-fatal, keep going except in DEVEL_MODE
25359 my $level = $rlp_object_list->[$i_debug]->get_level();
25361 $rlp_object_list->[$i_debug]->get_ci_level();
25362 my $old_level = $rlp_object_list->[$i]->get_level();
25364 $rlp_object_list->[$i]->get_ci_level();
25366 program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
25371 $lp_position_predictor -= $deleted_spaces;
25372 $spaces_needed -= $deleted_spaces;
25373 last unless ( $spaces_needed > 0 );
25376 } ## end sub check_for_long_gnu_style_lines
25378 sub undo_incomplete_lp_indentation {
25380 #------------------------------------------------------------------
25381 # Undo indentation for all incomplete -lp indentation levels of the
25382 # current batch unless -xlp is set.
25383 #------------------------------------------------------------------
25385 # This routine is called once after each output stream batch is
25386 # finished to undo indentation for all incomplete -lp indentation
25387 # levels. If this routine is called then comments and blank lines will
25388 # disrupt this indentation style. In older versions of perltidy this
25389 # was always done because it could cause problems otherwise, but recent
25390 # improvements allow fairly good results to be obtained by skipping
25391 # this step with the -xlp flag.
25393 # nothing to do if no stack items defined for this line
25394 return if ( $max_lp_object_list < 0 );
25396 # loop over all whitespace items created for the current batch
25397 foreach my $i ( 0 .. $max_lp_object_list ) {
25398 my $item = $rlp_object_list->[$i];
25400 # only look for open items
25401 next if ( $item->get_closed() >= 0 );
25403 # Tentatively remove all of the available space
25404 # (The vertical aligner will try to get it back later)
25405 my $available_spaces = $item->get_available_spaces();
25406 if ( $available_spaces > 0 ) {
25408 # delete incremental space for this item
25409 $rlp_object_list->[$i]
25410 ->tentatively_decrease_available_spaces($available_spaces);
25412 # Reduce the total indentation space of any nodes that follow
25413 # Note that any such nodes must necessarily be dependents
25415 foreach ( $i + 1 .. $max_lp_object_list ) {
25416 $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
25421 } ## end sub undo_incomplete_lp_indentation
25422 } ## end closure set_lp_indentation
25424 #----------------------------------------------------------------------
25425 # sub to set a requested break before an opening container in -lp mode.
25426 #----------------------------------------------------------------------
25427 sub set_forced_lp_break {
25429 my ( $self, $i_begin_line, $i_opening ) = @_;
25432 # $i_begin_line = index of break in the _to_go arrays
25433 # $i_opening = index of the opening container
25435 # Set any requested break at a token before this opening container
25436 # token. This is often an '=' or '=>' but can also be things like
25437 # '.', ',', 'return'. It was defined by sub set_lp_indentation.
25440 # For intact containers, call this at the closing token.
25441 # For broken containers, call this at the opening token.
25442 # This will avoid needless breaks when it turns out that the
25443 # container does not actually get broken. This isn't known until
25444 # the closing container for intact blocks.
25447 if ( $i_begin_line < 0
25448 || $i_begin_line > $max_index_to_go );
25450 # Handle request to put a break break immediately before this token.
25451 # We may not want to do that since we are also breaking after it.
25452 if ( $i_begin_line == $i_opening ) {
25454 # The following rules should be reviewed. We may want to always
25455 # allow the break. If we do not do the break, the indentation
25458 # RULE: don't break before it unless it is welded to a qw.
25459 # This works well, but we may want to relax this to allow
25460 # breaks in additional cases.
25462 if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
25463 return unless ( $types_to_go[$max_index_to_go] eq 'q' );
25466 # Only break for breakpoints at the same
25467 # indentation level as the opening paren
25468 my $test1 = $nesting_depth_to_go[$i_opening];
25469 my $test2 = $nesting_depth_to_go[$i_begin_line];
25470 return if ( $test2 != $test1 );
25472 # Back up at a blank (fixes case b932)
25473 my $ibr = $i_begin_line - 1;
25475 && $types_to_go[$ibr] eq 'b' )
25480 my $i_nonblank = $self->set_forced_breakpoint($ibr);
25482 # Crude patch to prevent sub recombine_breakpoints from undoing
25483 # this break, especially after an '='. It will leave old
25484 # breakpoints alone. See c098/x045 for some examples.
25485 if ( defined($i_nonblank) ) {
25486 $old_breakpoint_to_go[$i_nonblank] = 1;
25490 } ## end sub set_forced_lp_break
25492 sub reduce_lp_indentation {
25494 # reduce the leading whitespace at token $i if possible by $spaces_needed
25495 # (a large value of $spaces_needed will remove all excess space)
25496 # NOTE: to be called from break_lists only for a sequence of tokens
25497 # contained between opening and closing parens/braces/brackets
25499 my ( $self, $i, $spaces_wanted ) = @_;
25500 my $deleted_spaces = 0;
25502 my $item = $leading_spaces_to_go[$i];
25503 my $available_spaces = $item->get_available_spaces();
25506 $available_spaces > 0
25507 && ( ( $spaces_wanted <= $available_spaces )
25508 || !$item->get_have_child() )
25512 # we'll remove these spaces, but mark them as recoverable
25514 $item->tentatively_decrease_available_spaces($spaces_wanted);
25517 return $deleted_spaces;
25518 } ## end sub reduce_lp_indentation
25520 ###########################################################
25521 # CODE SECTION 13: Preparing batches for vertical alignment
25522 ###########################################################
25524 sub check_convey_batch_input {
25526 # Check for valid input to sub convey_batch_to_vertical_aligner. An
25527 # error here would most likely be due to an error in the calling
25528 # routine 'sub grind_batch_of_CODE'.
25529 my ( $self, $ri_first, $ri_last ) = @_;
25531 if ( !defined($ri_first) || !defined($ri_last) ) {
25533 Undefined line ranges ri_first and/r ri_last
25537 my $nmax = @{$ri_first} - 1;
25538 my $nmax_check = @{$ri_last} - 1;
25539 if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
25541 Line range index error: nmax=$nmax but nmax_check=$nmax_check
25542 These should be equal and >=0
25545 my ( $ibeg, $iend );
25546 foreach my $n ( 0 .. $nmax ) {
25547 my $ibeg_m = $ibeg;
25548 my $iend_m = $iend;
25549 $ibeg = $ri_first->[$n];
25550 $iend = $ri_last->[$n];
25551 if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
25553 Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
25554 These should have iend >= ibeg and be in the range (0..$max_index_to_go)
25557 next if ( $n == 0 );
25558 if ( $ibeg <= $iend_m ) {
25560 Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
25565 } ## end sub check_convey_batch_input
25567 sub convey_batch_to_vertical_aligner {
25571 # This routine receives a batch of code for which the final line breaks
25572 # have been defined. Here we prepare the lines for passing to the vertical
25573 # aligner. We do the following tasks:
25574 # - mark certain vertical alignment tokens, such as '=', in each line
25575 # - make final indentation adjustments
25576 # - do logical padding: insert extra blank spaces to help display certain
25577 # logical constructions
25578 # - send the line to the vertical aligner
25580 my $rLL = $self->[_rLL_];
25581 my $Klimit = $self->[_Klimit_];
25582 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
25583 my $this_batch = $self->[_this_batch_];
25585 my $do_not_pad = $this_batch->[_do_not_pad_];
25586 my $starting_in_quote = $this_batch->[_starting_in_quote_];
25587 my $ending_in_quote = $this_batch->[_ending_in_quote_];
25588 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
25589 my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
25590 my $ri_first = $this_batch->[_ri_first_];
25591 my $ri_last = $this_batch->[_ri_last_];
25593 $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
25595 my $n_last_line = @{$ri_first} - 1;
25597 my $ibeg_next = $ri_first->[0];
25598 my $iend_next = $ri_last->[0];
25600 my $type_beg_next = $types_to_go[$ibeg_next];
25601 my $type_end_next = $types_to_go[$iend_next];
25602 my $token_beg_next = $tokens_to_go[$ibeg_next];
25604 my $rindentation_list = [0]; # ref to indentations for each line
25605 my ( $cscw_block_comment, $closing_side_comment, $is_block_comment );
25607 if ( !$max_index_to_go && $type_beg_next eq '#' ) {
25608 $is_block_comment = 1;
25611 if ($rOpts_closing_side_comments) {
25612 ( $closing_side_comment, $cscw_block_comment ) =
25613 $self->add_closing_side_comment( $ri_first, $ri_last );
25616 if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) {
25617 $self->undo_ci( $ri_first, $ri_last,
25618 $this_batch->[_rix_seqno_controlling_ci_] );
25621 # for multi-line batches ...
25622 if ( $n_last_line > 0 ) {
25624 # flush before a long if statement to avoid unwanted alignment
25625 $self->flush_vertical_aligner()
25626 if ( $type_beg_next eq 'k'
25627 && $is_if_unless{$token_beg_next} );
25629 $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote )
25630 if ($rOpts_logical_padding);
25632 $self->xlp_tweak( $ri_first, $ri_last )
25633 if ($rOpts_extended_line_up_parentheses);
25636 if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
25638 # ----------------------------------------------------------
25639 # define the vertical alignments for all lines of this batch
25640 # ----------------------------------------------------------
25641 my $rline_alignments =
25642 $self->make_vertical_alignments( $ri_first, $ri_last );
25644 # ----------------------------------------------
25645 # loop to send each line to the vertical aligner
25646 # ----------------------------------------------
25647 my ( $type_beg, $type_end, $token_beg, $ljump );
25649 for my $n ( 0 .. $n_last_line ) {
25651 # ----------------------------------------------------------------
25652 # This hash will hold the args for vertical alignment of this line
25653 # We will populate it as we go.
25654 # ----------------------------------------------------------------
25655 my $rvao_args = {};
25657 my $type_beg_last = $type_beg;
25658 my $type_end_last = $type_end;
25660 my $ibeg = $ibeg_next;
25661 my $iend = $iend_next;
25662 my $Kbeg = $K_to_go[$ibeg];
25663 my $Kend = $K_to_go[$iend];
25665 $type_beg = $type_beg_next;
25666 $type_end = $type_end_next;
25667 $token_beg = $token_beg_next;
25669 # ---------------------------------------------------
25670 # Define the check value 'Kend' to send for this line
25671 # ---------------------------------------------------
25672 # The 'Kend' value is an integer for checking that lines come out of
25673 # the far end of the pipeline in the right order. It increases
25674 # linearly along the token stream. But we only send ending K values of
25675 # non-comments down the pipeline. This is equivalent to checking that
25676 # the last CODE_type is blank or equal to 'VER'. See also sub
25677 # resync_lines_and_tokens for related coding. Note that
25678 # '$batch_CODE_type' is the code type of the line to which the ending
25681 $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
25683 # Get some vars on line [n+1], if any,
25684 # and define $ljump = level jump needed by 'sub get_final_indentation'
25685 if ( $n < $n_last_line ) {
25686 $ibeg_next = $ri_first->[ $n + 1 ];
25687 $iend_next = $ri_last->[ $n + 1 ];
25689 $type_beg_next = $types_to_go[$ibeg_next];
25690 $type_end_next = $types_to_go[$iend_next];
25691 $token_beg_next = $tokens_to_go[$ibeg_next];
25693 my $Kbeg_next = $K_to_go[$ibeg_next];
25694 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
25696 elsif ( !$is_block_comment && $Kend < $Klimit ) {
25698 # Patch for git #51, a bare closing qw paren was not outdented
25699 # if the flag '-nodelete-old-newlines is set
25700 # Note that we are just looking ahead for the next nonblank
25701 # character. We could scan past an arbitrary number of block
25702 # comments or hanging side comments by calling K_next_code, but it
25703 # could add significant run time with very little to be gained.
25704 my $Kbeg_next = $Kend + 1;
25705 if ( $Kbeg_next < $Klimit
25706 && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
25711 $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
25717 # ---------------------------------------------
25718 # get the vertical alignment info for this line
25719 # ---------------------------------------------
25721 # The lines are broken into fields which can be spaced by the vertical
25722 # to achieve vertical alignment. These fields are the actual text
25723 # which will be output, so from here on no more changes can be made to
25725 my $rline_alignment = $rline_alignments->[$n];
25726 my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
25727 @{$rline_alignment};
25729 # Programming check: (shouldn't happen)
25730 # The number of tokens which separate the fields must always be
25731 # one less than the number of fields. If this is not true then
25732 # an error has been introduced in sub make_alignment_patterns.
25734 if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
25735 my $nt = @{$rtokens};
25736 my $nf = @{$rfields};
25738 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
25739 The number of tokens = $nt should be one less than number of fields: $nf
25745 # --------------------------------------
25746 # get the final indentation of this line
25747 # --------------------------------------
25754 $is_outdented_line,
25756 ) = $self->get_final_indentation(
25764 $rindentation_list,
25766 $starting_in_quote,
25767 $is_static_block_comment,
25771 # --------------------------------
25772 # define flag 'outdent_long_lines'
25773 # --------------------------------
25775 # we will allow outdenting of long lines..
25776 # which are long quotes, if allowed
25777 ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
25779 # which are long block comments, if allowed
25782 && $rOpts_outdent_long_comments
25784 # but not if this is a static block comment
25785 && !$is_static_block_comment
25789 $rvao_args->{outdent_long_lines} = 1;
25791 # convert -lp indentation objects to spaces to allow outdenting
25792 if ( ref($indentation) ) {
25793 $indentation = $indentation->get_spaces();
25797 # --------------------------------------------------
25798 # define flags 'break_alignment_before' and '_after'
25799 # --------------------------------------------------
25801 # These flags tell the vertical aligner to stop alignment before or
25803 if ($is_outdented_line) {
25804 $rvao_args->{break_alignment_before} = 1;
25805 $rvao_args->{break_alignment_after} = 1;
25807 elsif ($do_not_pad) {
25808 $rvao_args->{break_alignment_before} = 1;
25811 # flush at an 'if' which follows a line with (1) terminal semicolon
25812 # or (2) terminal block_type which is not an 'if'. This prevents
25813 # unwanted alignment between the lines.
25814 elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
25819 my $Km = $Kbeg - 1;
25820 $type_m = $rLL->[$Km]->[_TYPE_];
25821 if ( $type_m eq 'b' && $Km > 0 ) {
25823 $type_m = $rLL->[$Km]->[_TYPE_];
25825 if ( $type_m eq '#' && $Km > 0 ) {
25827 $type_m = $rLL->[$Km]->[_TYPE_];
25828 if ( $type_m eq 'b' && $Km > 0 ) {
25830 $type_m = $rLL->[$Km]->[_TYPE_];
25834 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
25836 $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m};
25840 # break after anything that is not if-like
25843 || ( $type_m eq '}'
25845 && $block_type_m ne 'if'
25846 && $block_type_m ne 'unless'
25847 && $block_type_m ne 'elsif'
25848 && $block_type_m ne 'else' )
25851 $rvao_args->{break_alignment_before} = 1;
25855 # ----------------------------------
25856 # define 'rvertical_tightness_flags'
25857 # ----------------------------------
25858 # These flags tell the vertical aligner if/when to combine consecutive
25859 # lines, based on the user input parameters.
25860 $rvao_args->{rvertical_tightness_flags} =
25861 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
25862 $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
25863 unless ( $is_block_comment
25864 || $self->[_no_vertical_tightness_flags_] );
25866 # ----------------------------------
25867 # define 'is_terminal_ternary' flag
25868 # ----------------------------------
25870 # This flag is set at the final ':' of a ternary chain to request
25871 # vertical alignment of the final term. Here is a slightly complex
25874 # $self->{_text} = (
25876 # : $type eq 'item' ? "the $section entry"
25877 # : "the section on $section"
25881 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
25882 # : ' elsewhere in this document'
25885 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
25887 my $is_terminal_ternary = 0;
25888 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
25889 my $terminal_type = $types_to_go[$i_terminal];
25890 if ( $terminal_type ne ';'
25891 && $n_last_line > $n
25892 && $level_end == $lev )
25894 my $Kbeg_next = $K_to_go[$ibeg_next];
25895 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
25896 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
25899 $last_leading_type eq ':'
25900 && ( ( $terminal_type eq ';' && $level_end <= $lev )
25901 || ( $terminal_type ne ':' && $level_end < $lev ) )
25905 # the terminal term must not contain any ternary terms, as in
25907 # $Is_MSWin32 ? ".\\echo$$"
25908 # : $Is_MacOS ? ":echo$$"
25909 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
25911 $is_terminal_ternary = 1;
25913 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
25914 while ( defined($KP) && $KP <= $Kend ) {
25915 my $type_KP = $rLL->[$KP]->[_TYPE_];
25916 if ( $type_KP eq '?' || $type_KP eq ':' ) {
25917 $is_terminal_ternary = 0;
25920 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
25923 $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
25926 # -------------------------------------------------
25927 # add any new closing side comment to the last line
25928 # -------------------------------------------------
25929 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
25931 $rfields->[-1] .= " $closing_side_comment";
25933 # NOTE: Patch for csc. We can just use 1 for the length of the csc
25934 # because its length should not be a limiting factor from here on.
25935 $rfield_lengths->[-1] += 2;
25939 [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
25942 # ------------------------
25943 # define flag 'list_seqno'
25944 # ------------------------
25946 # This flag indicates if this line is contained in a multi-line list
25947 if ( !$is_block_comment ) {
25948 my $parent_seqno = $parent_seqno_to_go[$ibeg];
25949 $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
25952 # The alignment tokens have been marked with nesting_depths, so we need
25953 # to pass nesting depths to the vertical aligner. They remain invariant
25954 # under all formatting operations. Previously, level values were sent
25955 # to the aligner. But they can be altered in welding and other
25956 # operations, and this can lead to alignment errors.
25957 my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
25958 my $nesting_depth_end = $nesting_depth_to_go[$iend];
25960 # A quirk in the definition of nesting depths is that the closing token
25961 # has the same depth as internal tokens. The vertical aligner is
25962 # programmed to expect them to have the lower depth, so we fix this.
25963 if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
25964 if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
25966 # Adjust nesting depths to keep -lp indentation for qw lists. This is
25967 # required because qw lists contained in brackets do not get nesting
25968 # depths, but the vertical aligner is watching nesting depth changes to
25969 # decide if a -lp block is intact. Without this patch, qw lists
25970 # enclosed in angle brackets will not get the correct -lp indentation.
25972 # Looking for line with isolated qw ...
25973 if ( $rOpts_line_up_parentheses
25974 && $type_beg eq 'q'
25975 && $ibeg == $iend )
25978 # ... which is part of a multiline qw
25979 my $Km = $self->K_previous_nonblank($Kbeg);
25980 my $Kp = $self->K_next_nonblank($Kbeg);
25981 if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
25982 || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
25984 $nesting_depth_beg++;
25985 $nesting_depth_end++;
25989 # ---------------------------------
25990 # define flag 'forget_side_comment'
25991 # ---------------------------------
25993 # This flag tells the vertical aligner to reset the side comment
25994 # location if we are entering a new block from level 0. This is
25995 # intended to keep side comments from drifting too far to the right.
25996 if ( $block_type_to_go[$i_terminal]
25997 && $nesting_depth_end > $nesting_depth_beg )
25999 $rvao_args->{forget_side_comment} =
26000 !$self->[_radjusted_levels_]->[$Kbeg];
26003 # -----------------------------------
26004 # Store the remaining non-flag values
26005 # -----------------------------------
26006 $rvao_args->{Kend} = $Kend_code;
26007 $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
26008 $rvao_args->{indentation} = $indentation;
26009 $rvao_args->{level_end} = $nesting_depth_end;
26010 $rvao_args->{level} = $nesting_depth_beg;
26011 $rvao_args->{rline_alignment} = $rline_alignment;
26012 $rvao_args->{maximum_line_length} =
26013 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
26015 # --------------------------------------
26016 # send this line to the vertical aligner
26017 # --------------------------------------
26018 my $vao = $self->[_vertical_aligner_object_];
26019 $vao->valign_input($rvao_args);
26023 } ## end of loop to output each line
26025 # Set flag indicating if the last line ends in an opening
26026 # token and is very short, so that a blank line is not
26027 # needed if the subsequent line is a comment.
26028 # Examples of what we are looking for:
26034 $self->[_last_output_short_opening_token_]
26036 # line ends in opening token
26038 = $is_opening_type{$type_end}
26042 # line has either single opening token
26043 $iend_next == $ibeg_next
26045 # or is a single token followed by opening token.
26046 # Note that sub identifiers have blanks like 'sub doit'
26047 # $token_beg !~ /\s+/
26048 || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 )
26051 # and limit total to 10 character widths
26052 && token_sequence_length( $ibeg_next, $iend_next ) <= 10;
26054 # remember indentation of lines containing opening containers for
26055 # later use by sub get_final_indentation
26056 $self->save_opening_indentation( $ri_first, $ri_last,
26057 $rindentation_list, $this_batch->[_runmatched_opening_indexes_] )
26058 if ( $this_batch->[_runmatched_opening_indexes_]
26059 || $types_to_go[$max_index_to_go] eq 'q' );
26061 # output any new -cscw block comment
26062 if ($cscw_block_comment) {
26063 $self->flush_vertical_aligner();
26064 my $file_writer_object = $self->[_file_writer_object_];
26065 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
26068 } ## end sub convey_batch_to_vertical_aligner
26070 sub check_batch_summed_lengths {
26072 my ( $self, $msg ) = @_;
26073 $msg = EMPTY_STRING unless defined($msg);
26074 my $rLL = $self->[_rLL_];
26076 # Verify that the summed lengths are correct. We want to be sure that
26077 # errors have not been introduced by programming changes. Summed lengths
26078 # are defined in sub store_token. Operations like padding and unmasking
26079 # semicolons can change token lengths, but those operations are expected to
26080 # update the summed lengths when they make changes. So the summed lengths
26081 # should always be correct.
26082 foreach my $i ( 0 .. $max_index_to_go ) {
26084 $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
26085 my $len_tok_i = $token_lengths_to_go[$i];
26086 my $KK = $K_to_go[$i];
26089 # For --indent-only, there is not always agreement between
26090 # token lengths in _rLL_ and token_lengths_to_go, so skip that check.
26091 if ( defined($KK) && !$rOpts_indent_only ) {
26092 $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_];
26094 if ( $len_by_sum != $len_tok_i
26095 || defined($len_tok_K) && $len_by_sum != $len_tok_K )
26097 my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
26098 $KK = 'undef' unless defined($KK);
26099 my $tok = $tokens_to_go[$i];
26100 my $type = $types_to_go[$i];
26102 Summed lengths are appear to be incorrect. $msg
26103 lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
26104 near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
26109 } ## end sub check_batch_summed_lengths
26111 { ## begin closure set_vertical_alignment_markers
26112 my %is_vertical_alignment_type;
26113 my %is_not_vertical_alignment_token;
26114 my %is_vertical_alignment_keyword;
26115 my %is_terminal_alignment_type;
26116 my %is_low_level_alignment_token;
26122 # Replaced =~ and // in the list. // had been removed in RT 119588
26124 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
26125 { ? : => && || ~~ !~~ =~ !~ // <=> ->
26127 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
26129 # These 'tokens' are not aligned. We need this to remove [
26130 # from the above list because it has type ='{'
26132 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
26134 # these are the only types aligned at a line end
26136 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
26138 # these tokens only align at line level
26140 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
26142 # eq and ne were removed from this list to improve alignment chances
26143 @q = qw(if unless and or err for foreach while until);
26144 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
26147 my $ralignment_type_to_go;
26148 my $ralignment_counts;
26149 my $ralignment_hash_by_line;
26151 sub set_vertical_alignment_markers {
26153 my ( $self, $ri_first, $ri_last ) = @_;
26155 #----------------------------------------------------------------------
26156 # This routine looks at output lines for certain tokens which can serve
26157 # as vertical alignment markers (such as an '=').
26158 #----------------------------------------------------------------------
26160 # Input parameters:
26161 # $ri_first = ref to list of starting line indexes in _to_go arrays
26162 # $ri_last = ref to list of ending line indexes in _to_go arrays
26164 # Method: We look at each token $i in this output batch and set
26165 # $ralignment_type_to_go->[$i] equal to those tokens at which we would
26166 # accept vertical alignment.
26168 # Initialize closure (and return) variables:
26169 $ralignment_type_to_go = [];
26170 $ralignment_counts = [];
26171 $ralignment_hash_by_line = [];
26173 # NOTE: closing side comments can insert up to 2 additional tokens
26174 # beyond the original $max_index_to_go, so we need to check ri_last for
26176 my $max_line = @{$ri_first} - 1;
26177 my $max_i = $ri_last->[$max_line];
26178 if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
26180 # -----------------------------------------------------------------
26182 # - no alignments if there is only 1 token.
26183 # - and nothing to do if we aren't allowed to change whitespace.
26184 # -----------------------------------------------------------------
26185 if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
26189 # -------------------------------
26190 # First handle any side comment.
26191 # -------------------------------
26192 my $i_terminal = $max_i;
26193 if ( $types_to_go[$max_i] eq '#' ) {
26195 # We know $max_i > 0 if we get here.
26197 if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
26201 my $token = $tokens_to_go[$max_i];
26202 my $KK = $K_to_go[$max_i];
26204 # Do not align various special side comments
26205 my $do_not_align = (
26207 # it is any specially marked side comment
26208 ( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} )
26210 # or it is a static side comment
26211 || ( $rOpts->{'static-side-comments'}
26212 && $token =~ /$static_side_comment_pattern/ )
26214 # or a closing side comment
26215 || ( $types_to_go[$i_terminal] eq '}'
26216 && $tokens_to_go[$i_terminal] eq '}'
26217 && $token =~ /$closing_side_comment_prefix_pattern/ )
26220 # - For the specific combination -vc -nvsc, we put all side comments
26221 # at fixed locations. Note that we will lose hanging side comment
26222 # alignments. Otherwise, hsc's can move to strange locations.
26223 # - For -nvc -nvsc we make all side comments vertical alignments
26224 # because the vertical aligner will check for -nvsc and be able
26225 # to reduce the final padding to the side comments for long lines.
26226 # and keep hanging side comments aligned.
26227 if ( !$do_not_align
26228 && !$rOpts_valign_side_comments
26229 && $rOpts_valign_code )
26233 my $ipad = $max_i - 1;
26234 if ( $types_to_go[$ipad] eq 'b' ) {
26236 $rOpts->{'minimum-space-to-comment'} -
26237 $token_lengths_to_go[$ipad];
26238 $self->pad_token( $ipad, $pad_spaces );
26242 if ( !$do_not_align ) {
26243 $ralignment_type_to_go->[$max_i] = '#';
26244 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
26245 $ralignment_counts->[$max_line]++;
26249 # ----------------------------------------------
26250 # Nothing more to do on this line if -nvc is set
26251 # ----------------------------------------------
26252 if ( !$rOpts_valign_code ) {
26256 # -------------------------------------
26257 # Loop over each line of this batch ...
26258 # -------------------------------------
26260 foreach my $line ( 0 .. $max_line ) {
26262 my $ibeg = $ri_first->[$line];
26263 my $iend = $ri_last->[$line];
26265 next if ( $iend <= $ibeg );
26267 # back up before any side comment
26268 if ( $iend > $i_terminal ) { $iend = $i_terminal }
26270 #----------------------------------
26271 # Loop over all tokens on this line
26272 #----------------------------------
26273 $self->set_vertical_alignment_markers_token_loop( $line, $ibeg,
26278 return ( $ralignment_type_to_go, $ralignment_counts,
26279 $ralignment_hash_by_line );
26280 } ## end sub set_vertical_alignment_markers
26282 sub set_vertical_alignment_markers_token_loop {
26283 my ( $self, $line, $ibeg, $iend ) = @_;
26285 # Set vertical alignment markers for the tokens on one line
26286 # of the current output batch. This is done by updating the
26287 # three closure variables:
26288 # $ralignment_type_to_go
26289 # $ralignment_counts
26290 # $ralignment_hash_by_line
26292 # Input parameters:
26293 # $line = index of this line in the current batch
26294 # $ibeg, $iend = index range of tokens to check in the _to_go arrays
26296 my $level_beg = $levels_to_go[$ibeg];
26297 my $token_beg = $tokens_to_go[$ibeg];
26298 my $type_beg = $types_to_go[$ibeg];
26299 my $type_beg_special_char =
26300 ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
26302 my $last_vertical_alignment_BEFORE_index = -1;
26303 my $vert_last_nonblank_type = $type_beg;
26304 my $vert_last_nonblank_token = $token_beg;
26306 # ----------------------------------------------------------------
26307 # Initialization code merged from 'sub delete_needless_alignments'
26308 # ----------------------------------------------------------------
26309 my $i_good_paren = -1;
26310 my $i_elsif_close = $ibeg - 1;
26311 my $i_elsif_open = $iend + 1;
26313 if ( $type_beg eq 'k' ) {
26315 # Initialization for paren patch: mark a location of a paren we
26316 # should keep, such as one following something like a leading
26318 $i_good_paren = $ibeg + 1;
26319 if ( $types_to_go[$i_good_paren] eq 'b' ) {
26323 # Initialization for 'elsif' patch: remember the paren range of
26324 # an elsif, and do not make alignments within them because this
26325 # can cause loss of padding and overall brace alignment in the
26326 # vertical aligner.
26327 if ( $token_beg eq 'elsif'
26328 && $i_good_paren < $iend
26329 && $tokens_to_go[$i_good_paren] eq '(' )
26331 $i_elsif_open = $i_good_paren;
26332 $i_elsif_close = $mate_index_to_go[$i_good_paren];
26333 if ( !defined($i_elsif_close) ) { $i_elsif_close = -1 }
26335 } ## end if ( $type_beg eq 'k' )
26337 # --------------------------------------------
26338 # Loop over each token in this output line ...
26339 # --------------------------------------------
26340 foreach my $i ( $ibeg + 1 .. $iend ) {
26342 next if ( $types_to_go[$i] eq 'b' );
26344 my $type = $types_to_go[$i];
26345 my $token = $tokens_to_go[$i];
26346 my $alignment_type = EMPTY_STRING;
26348 # ----------------------------------------------
26349 # Check for 'paren patch' : Remove excess parens
26350 # ----------------------------------------------
26352 # Excess alignment of parens can prevent other good alignments.
26353 # For example, note the parens in the first two rows of the
26354 # following snippet. They would normally get marked for
26355 # alignment and aligned as follows:
26357 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
26358 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
26359 # my $img = new Gimp::Image( $w, $h, RGB );
26361 # This causes unnecessary paren alignment and prevents the
26362 # third equals from aligning. If we remove the unwanted
26363 # alignments we get:
26365 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
26366 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
26367 # my $img = new Gimp::Image( $w, $h, RGB );
26369 # A rule for doing this which works well is to remove alignment
26370 # of parens whose containers do not contain other aligning
26371 # tokens, with the exception that we always keep alignment of
26372 # the first opening paren on a line (for things like 'if' and
26373 # 'elsif' statements).
26374 if ( $token eq ')' && @imatch_list ) {
26376 # undo the corresponding opening paren if:
26377 # - it is at the top of the stack
26378 # - and not the first overall opening paren
26379 # - does not follow a leading keyword on this line
26380 my $imate = $mate_index_to_go[$i];
26381 if ( !defined($imate) ) { $imate = -1 }
26382 if ( $imatch_list[-1] eq $imate
26383 && ( $ibeg > 1 || @imatch_list > 1 )
26384 && $imate > $i_good_paren )
26386 if ( $ralignment_type_to_go->[$imate] ) {
26387 $ralignment_type_to_go->[$imate] = EMPTY_STRING;
26388 $ralignment_counts->[$line]--;
26389 delete $ralignment_hash_by_line->[$line]->{$imate};
26395 # do not align tokens at lower level than start of line
26396 # except for side comments
26397 if ( $levels_to_go[$i] < $level_beg ) {
26401 #--------------------------------------------------------
26402 # First see if we want to align BEFORE this token
26403 #--------------------------------------------------------
26405 # The first possible token that we can align before
26406 # is index 2 because: 1) it doesn't normally make sense to
26407 # align before the first token and 2) the second
26408 # token must be a blank if we are to align before
26410 if ( $i < $ibeg + 2 ) { }
26412 # must follow a blank token
26413 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
26415 # otherwise, do not align two in a row to create a
26417 elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
26419 # align before one of these keywords
26420 # (within a line, since $i>1)
26421 elsif ( $type eq 'k' ) {
26423 # /^(if|unless|and|or|eq|ne)$/
26424 if ( $is_vertical_alignment_keyword{$token} ) {
26425 $alignment_type = $token;
26429 # align qw in a 'use' statement (issue git #93)
26430 elsif ( $type eq 'q' ) {
26431 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
26432 $alignment_type = $type;
26436 # align before one of these types..
26437 elsif ( $is_vertical_alignment_type{$type}
26438 && !$is_not_vertical_alignment_token{$token} )
26440 $alignment_type = $token;
26442 # Do not align a terminal token. Although it might
26443 # occasionally look ok to do this, this has been found to be
26444 # a good general rule. The main problems are:
26445 # (1) that the terminal token (such as an = or :) might get
26446 # moved far to the right where it is hard to see because
26447 # nothing follows it, and
26448 # (2) doing so may prevent other good alignments.
26449 # Current exceptions are && and || and =>
26450 if ( $i == $iend ) {
26451 $alignment_type = EMPTY_STRING
26452 unless ( $is_terminal_alignment_type{$type} );
26455 # Do not align leading ': (' or '. ('. This would prevent
26456 # alignment in something like the following:
26458 # ( $input_line_number < 10 ) ? " "
26459 # : ( $input_line_number < 100 ) ? " "
26463 # ( $case_matters ? $accessor : " lc($accessor) " )
26464 # . ( $yesno ? " eq " : " ne " )
26466 # Also, do not align a ( following a leading ? so we can
26467 # align something like this:
26468 # $converter{$_}->{ushortok} =
26469 # $PDL::IO::Pic::biggrays
26470 # ? ( m/GIF/ ? 0 : 1 )
26471 # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
26472 if ( $type_beg_special_char
26474 && $types_to_go[ $i - 1 ] eq 'b' )
26476 $alignment_type = EMPTY_STRING;
26479 # Certain tokens only align at the same level as the
26480 # initial line level
26481 if ( $is_low_level_alignment_token{$token}
26482 && $levels_to_go[$i] != $level_beg )
26484 $alignment_type = EMPTY_STRING;
26487 if ( $token eq '(' ) {
26489 # For a paren after keyword, only align if-like parens,
26492 # elsif ( $b ) { &b }
26493 # ^-------------------aligned parens
26494 if ( $vert_last_nonblank_type eq 'k'
26495 && !$is_if_unless_elsif{$vert_last_nonblank_token} )
26497 $alignment_type = EMPTY_STRING;
26500 # Do not align a spaced-function-paren if requested.
26501 # Issue git #53, #73.
26502 if ( !$rOpts_function_paren_vertical_alignment ) {
26503 my $seqno = $type_sequence_to_go[$i];
26504 $alignment_type = EMPTY_STRING
26505 if ( $self->[_ris_function_call_paren_]->{$seqno} );
26508 # make () align with qw in a 'use' statement (git #93)
26509 if ( $tokens_to_go[0] eq 'use'
26510 && $types_to_go[0] eq 'k'
26511 && defined( $mate_index_to_go[$i] )
26512 && $mate_index_to_go[$i] == $i + 1 )
26514 $alignment_type = 'q';
26516 ## Note on discussion git #101. We could make this
26517 ## a separate type '()' to separate it from qw's:
26518 ## $alignment_type =
26519 ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()';
26523 # be sure the alignment tokens are unique
26524 # This experiment didn't work well: reason not determined
26525 # if ($token ne $type) {$alignment_type .= $type}
26528 # NOTE: This is deactivated because it causes the previous
26529 # if/elsif alignment to fail
26530 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
26531 #{ $alignment_type = $type; }
26533 if ($alignment_type) {
26534 $last_vertical_alignment_BEFORE_index = $i;
26537 #--------------------------------------------------------
26538 # Next see if we want to align AFTER the previous nonblank
26539 #--------------------------------------------------------
26541 # We want to line up ',' and interior ';' tokens, with the added
26542 # space AFTER these tokens. (Note: interior ';' is included
26543 # because it may occur in short blocks).
26546 # previous token IS one of these:
26548 $vert_last_nonblank_type eq ','
26549 || $vert_last_nonblank_type eq ';'
26552 # and it follows a blank
26553 && $types_to_go[ $i - 1 ] eq 'b'
26555 # and it's NOT one of these
26556 && !$is_closing_token{$type}
26558 # then go ahead and align
26562 $alignment_type = $vert_last_nonblank_type;
26565 #-----------------------
26566 # Set the alignment type
26567 #-----------------------
26568 if ($alignment_type) {
26570 # but do not align the opening brace of an anonymous sub
26572 && $block_type_to_go[$i]
26573 && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
26578 # and do not make alignments within 'elsif' parens
26579 elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
26583 # and ignore any tokens which have leading padded spaces
26584 # example: perl527/lop.t
26585 elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
26590 $ralignment_type_to_go->[$i] = $alignment_type;
26591 $ralignment_hash_by_line->[$line]->{$i} = $alignment_type;
26592 $ralignment_counts->[$line]++;
26593 push @imatch_list, $i;
26597 $vert_last_nonblank_type = $type;
26598 $vert_last_nonblank_token = $token;
26601 } ## end sub set_vertical_alignment_markers_token_loop
26603 } ## end closure set_vertical_alignment_markers
26605 sub make_vertical_alignments {
26606 my ( $self, $ri_first, $ri_last ) = @_;
26608 #----------------------------
26609 # Shortcut for a single token
26610 #----------------------------
26611 if ( $max_index_to_go == 0 ) {
26612 if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
26614 my $rfields = [ $tokens_to_go[0] ];
26615 my $rpatterns = [ $types_to_go[0] ];
26616 my $rfield_lengths =
26617 [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
26618 return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
26621 # Strange line packing, not fatal but should not happen
26622 elsif (DEVEL_MODE) {
26623 my $max_line = @{$ri_first} - 1;
26624 my $ibeg = $ri_first->[0];
26625 my $iend = $ri_last->[0];
26626 my $tok_b = $tokens_to_go[$ibeg];
26627 my $tok_e = $tokens_to_go[$iend];
26628 my $type_b = $types_to_go[$ibeg];
26629 my $type_e = $types_to_go[$iend];
26631 "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
26636 #---------------------------------------------------------
26637 # Step 1: Define the alignment tokens for the entire batch
26638 #---------------------------------------------------------
26639 my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line );
26641 # We only need to make this call if vertical alignment of code is
26642 # requested or if a line might have a side comment.
26643 if ( $rOpts_valign_code
26644 || $types_to_go[$max_index_to_go] eq '#' )
26646 ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
26647 = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
26650 #----------------------------------------------
26651 # Step 2: Break each line into alignment fields
26652 #----------------------------------------------
26653 my $rline_alignments = [];
26654 my $max_line = @{$ri_first} - 1;
26655 foreach my $line ( 0 .. $max_line ) {
26657 my $ibeg = $ri_first->[$line];
26658 my $iend = $ri_last->[$line];
26660 my $rtok_fld_pat_len = $self->make_alignment_patterns(
26661 $ibeg, $iend, $ralignment_type_to_go,
26662 $ralignment_counts->[$line],
26663 $ralignment_hash_by_line->[$line]
26665 push @{$rline_alignments}, $rtok_fld_pat_len;
26667 return $rline_alignments;
26668 } ## end sub make_vertical_alignments
26672 # get opening and closing sequence numbers of a token for the vertical
26673 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
26674 # to be treated somewhat like opening and closing tokens for stacking
26675 # tokens by the vertical aligner.
26676 my ( $self, $ii, $ending_in_quote ) = @_;
26678 my $rLL = $self->[_rLL_];
26680 my $KK = $K_to_go[$ii];
26681 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
26683 if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
26685 my $token = $rLL->[$KK]->[_TOKEN_];
26687 $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
26690 if ( !$ending_in_quote ) {
26691 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
26696 } ## end sub get_seqno
26699 my %undo_extended_ci;
26701 sub initialize_undo_ci {
26702 %undo_extended_ci = ();
26708 # Undo continuation indentation in certain sequences
26709 my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
26710 my ( $line_1, $line_2, $lev_last );
26711 my $max_line = @{$ri_first} - 1;
26713 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
26715 # Prepare a list of controlling indexes for each line if required.
26716 # This is used for efficient processing below. Note: this is
26717 # critical for speed. In the initial implementation I just looped
26718 # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
26719 # found that this routine was causing a huge run time in large lists.
26720 # On a very large list test case, this new coding dropped the run time
26721 # of this routine from 30 seconds to 169 milliseconds.
26722 my @i_controlling_ci;
26723 if ( $rix_seqno_controlling_ci && @{$rix_seqno_controlling_ci} ) {
26724 my @tmp = reverse @{$rix_seqno_controlling_ci};
26725 my $ix_next = pop @tmp;
26726 foreach my $line ( 0 .. $max_line ) {
26727 my $iend = $ri_last->[$line];
26728 while ( defined($ix_next) && $ix_next <= $iend ) {
26729 push @{ $i_controlling_ci[$line] }, $ix_next;
26730 $ix_next = pop @tmp;
26735 # Loop over all lines of the batch ...
26737 # Workaround originally created for problem c007, in which the
26738 # combination -lp -xci could produce a "Program bug" message in unusual
26740 my $skip_SECTION_1;
26741 if ( $rOpts_line_up_parentheses
26742 && $rOpts_extended_continuation_indentation )
26745 # Only set this flag if -lp is actually used here
26746 foreach my $line ( 0 .. $max_line ) {
26747 my $ibeg = $ri_first->[$line];
26748 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
26749 $skip_SECTION_1 = 1;
26755 foreach my $line ( 0 .. $max_line ) {
26757 my $ibeg = $ri_first->[$line];
26758 my $iend = $ri_last->[$line];
26759 my $lev = $levels_to_go[$ibeg];
26761 #-----------------------------------
26762 # SECTION 1: Undo needless common CI
26763 #-----------------------------------
26765 # We are looking at leading tokens and looking for a sequence all
26766 # at the same level and all at a higher level than enclosing lines.
26768 # For example, we can undo continuation indentation in sort/map/grep
26771 # my $dat1 = pack( "n*",
26772 # map { $_, $lookup->{$_} }
26773 # sort { $a <=> $b }
26774 # grep { $lookup->{$_} ne $default } keys %$lookup );
26778 # my $dat1 = pack( "n*",
26779 # map { $_, $lookup->{$_} }
26780 # sort { $a <=> $b }
26781 # grep { $lookup->{$_} ne $default } keys %$lookup );
26783 if ( $line > 0 && !$skip_SECTION_1 ) {
26785 # if we have started a chain..
26788 # see if it continues..
26789 if ( $lev == $lev_last ) {
26790 if ( $types_to_go[$ibeg] eq 'k'
26791 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
26794 # chain continues...
26795 # check for chain ending at end of a statement
26796 my $is_semicolon_terminated = (
26799 $types_to_go[$iend] eq ';'
26801 # with possible side comment
26802 || ( $types_to_go[$iend] eq '#'
26803 && $iend - $ibeg >= 2
26804 && $types_to_go[ $iend - 2 ] eq ';'
26805 && $types_to_go[ $iend - 1 ] eq 'b' )
26810 if ($is_semicolon_terminated);
26818 elsif ( $lev < $lev_last ) {
26820 # chain ends with previous line
26821 $line_2 = $line - 1;
26823 elsif ( $lev > $lev_last ) {
26829 # undo the continuation indentation if a chain ends
26830 if ( defined($line_2) && defined($line_1) ) {
26831 my $continuation_line_count = $line_2 - $line_1 + 1;
26832 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
26833 = (0) x ($continuation_line_count)
26834 if ( $continuation_line_count >= 0 );
26835 @leading_spaces_to_go[ @{$ri_first}
26836 [ $line_1 .. $line_2 ] ] =
26837 @reduced_spaces_to_go[ @{$ri_first}
26838 [ $line_1 .. $line_2 ] ];
26843 # not in a chain yet..
26846 # look for start of a new sort/map/grep chain
26847 if ( $lev > $lev_last ) {
26848 if ( $types_to_go[$ibeg] eq 'k'
26849 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
26857 #-------------------------------------
26858 # SECTION 2: Undo ci at cuddled blocks
26859 #-------------------------------------
26861 # Note that sub get_final_indentation will be called later to
26862 # actually do this, but for now we will tentatively mark cuddled
26863 # lines with ci=0 so that the the -xci loop which follows will be
26864 # correct at cuddles.
26866 $types_to_go[$ibeg] eq '}'
26867 && ( $nesting_depth_to_go[$iend] + 1 ==
26868 $nesting_depth_to_go[$ibeg] )
26871 my $terminal_type = $types_to_go[$iend];
26872 if ( $terminal_type eq '#' && $iend > $ibeg ) {
26873 $terminal_type = $types_to_go[ $iend - 1 ];
26874 if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
26875 $terminal_type = $types_to_go[ $iend - 2 ];
26879 # Patch for rt144979, part 2. Coordinated with part 1.
26880 # Skip cuddled braces.
26881 my $seqno_beg = $type_sequence_to_go[$ibeg];
26882 my $is_cuddled_closing_brace = $seqno_beg
26883 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
26885 if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) {
26886 $ci_levels_to_go[$ibeg] = 0;
26890 #--------------------------------------------------------
26891 # SECTION 3: Undo ci set by sub extended_ci if not needed
26892 #--------------------------------------------------------
26894 # Undo the ci of the leading token if its controlling token
26895 # went out on a previous line without ci
26896 if ( $ci_levels_to_go[$ibeg] ) {
26897 my $Kbeg = $K_to_go[$ibeg];
26898 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
26899 if ( $seqno && $undo_extended_ci{$seqno} ) {
26901 # but do not undo ci set by the -lp flag
26902 if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
26903 $ci_levels_to_go[$ibeg] = 0;
26904 $leading_spaces_to_go[$ibeg] =
26905 $reduced_spaces_to_go[$ibeg];
26910 # Flag any controlling opening tokens in lines without ci. This
26911 # will be used later in the above if statement to undo the ci which
26912 # they added. The array i_controlling_ci[$line] was prepared at
26913 # the top of this routine.
26914 if ( !$ci_levels_to_go[$ibeg]
26915 && defined( $i_controlling_ci[$line] ) )
26917 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
26918 my $seqno = $type_sequence_to_go[$i];
26919 $undo_extended_ci{$seqno} = 1;
26927 } ## end sub undo_ci
26930 { ## begin closure set_logical_padding
26935 my @q = qw( + - * / );
26936 @is_math_op{@q} = (1) x scalar(@q);
26939 sub set_logical_padding {
26941 # Look at a batch of lines and see if extra padding can improve the
26942 # alignment when there are certain leading operators. Here is an
26943 # example, in which some extra space is introduced before
26944 # '( $year' to make it line up with the subsequent lines:
26946 # if ( ( $Year < 1601 )
26947 # || ( $Year > 2899 )
26948 # || ( $EndYear < 1601 )
26949 # || ( $EndYear > 2899 ) )
26951 # &Error_OutOfRange;
26954 my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_;
26955 my $max_line = @{$ri_first} - 1;
26957 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
26958 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
26960 # Patch to produce padding in the first line of short code blocks.
26961 # This is part of an update to fix cases b562 .. b983.
26962 # This is needed to compensate for a change which was made in 'sub
26963 # starting_one_line_block' to prevent blinkers. Previously, that sub
26964 # would not look at the total block size and rely on sub
26965 # break_long_lines to break up long blocks. Consequently, the
26966 # first line of those batches would end in the opening block brace of a
26967 # sort/map/grep/eval block. When this was changed to immediately check
26968 # for blocks which were too long, the opening block brace would go out
26969 # in a single batch, and the block contents would go out as the next
26970 # batch. This caused the logic in this routine which decides if the
26971 # first line should be padded to be incorrect. To fix this, we set a
26972 # flag if the previous batch ended in an opening sort/map/grep/eval
26973 # block brace, and use it to adjust the logic to compensate.
26975 # For example, the following would have previously been a single batch
26976 # but now is two batches. We want to pad the line starting in '$dir':
26977 # my (@indices) = # batch n-1 (prev batch n)
26978 # sort { # batch n-1 (prev batch n)
26979 # $dir eq 'left' # batch n
26980 # ? $cells[$a] <=> $cells[$b] # batch n
26981 # : $cells[$b] <=> $cells[$a]; # batch n
26982 # } ( 0 .. $#cells ); # batch n
26984 my $rLL = $self->[_rLL_];
26985 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
26987 my $is_short_block;
26988 if ( $K_to_go[0] > 0 ) {
26989 my $Kp = $K_to_go[0] - 1;
26990 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
26993 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
26995 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
26999 my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
27001 my $block_type = $rblock_type_of_seqno->{$seqno};
27003 $is_short_block = $is_sort_map_grep_eval{$block_type};
27004 $is_short_block ||= $want_one_line_block{$block_type};
27009 # looking at each line of this batch..
27010 foreach my $line ( 0 .. $max_line - 1 ) {
27012 # see if the next line begins with a logical operator
27013 $ibeg = $ri_first->[$line];
27014 $iend = $ri_last->[$line];
27015 $ibeg_next = $ri_first->[ $line + 1 ];
27016 $tok_next = $tokens_to_go[$ibeg_next];
27017 $type_next = $types_to_go[$ibeg_next];
27019 $has_leading_op_next = ( $tok_next =~ /^\w/ )
27020 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
27021 : $is_chain_operator{$type_next}; # and, or
27023 next unless ($has_leading_op_next);
27025 # next line must not be at lesser depth
27027 if ( $nesting_depth_to_go[$ibeg] >
27028 $nesting_depth_to_go[$ibeg_next] );
27030 # identify the token in this line to be padded on the left
27033 # handle lines at same depth...
27034 if ( $nesting_depth_to_go[$ibeg] ==
27035 $nesting_depth_to_go[$ibeg_next] )
27038 # if this is not first line of the batch ...
27041 # and we have leading operator..
27042 next if $has_leading_op;
27044 # Introduce padding if..
27045 # 1. the previous line is at lesser depth, or
27046 # 2. the previous line ends in an assignment
27047 # 3. the previous line ends in a 'return'
27048 # 4. the previous line ends in a comma
27049 # Example 1: previous line at lesser depth
27050 # if ( ( $Year < 1601 ) # <- we are here but
27051 # || ( $Year > 2899 ) # list has not yet
27052 # || ( $EndYear < 1601 ) # collapsed vertically
27053 # || ( $EndYear > 2899 ) )
27056 # Example 2: previous line ending in assignment:
27058 # $year % 4 ? 0 # <- We are here
27059 # : $year % 100 ? 1
27060 # : $year % 400 ? 0
27063 # Example 3: previous line ending in comma:
27070 # be sure levels agree (never indent after an indented 'if')
27072 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
27074 # allow padding on first line after a comma but only if:
27075 # (1) this is line 2 and
27076 # (2) there are at more than three lines and
27077 # (3) lines 3 and 4 have the same leading operator
27078 # These rules try to prevent padding within a long
27079 # comma-separated list.
27081 if ( $types_to_go[$iendm] eq ','
27085 my $ibeg_next_next = $ri_first->[ $line + 2 ];
27086 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
27087 $ok_comma = $tok_next_next eq $tok_next;
27092 $is_assignment{ $types_to_go[$iendm] }
27094 || ( $nesting_depth_to_go[$ibegm] <
27095 $nesting_depth_to_go[$ibeg] )
27096 || ( $types_to_go[$iendm] eq 'k'
27097 && $tokens_to_go[$iendm] eq 'return' )
27100 # we will add padding before the first token
27104 # for first line of the batch..
27107 # WARNING: Never indent if first line is starting in a
27108 # continued quote, which would change the quote.
27109 next if $starting_in_quote;
27111 # if this is text after closing '}'
27112 # then look for an interior token to pad
27113 if ( $types_to_go[$ibeg] eq '}' ) {
27117 # otherwise, we might pad if it looks really good
27118 elsif ($is_short_block) {
27123 # we might pad token $ibeg, so be sure that it
27124 # is at the same depth as the next line.
27126 if ( $nesting_depth_to_go[$ibeg] !=
27127 $nesting_depth_to_go[$ibeg_next] );
27129 # We can pad on line 1 of a statement if at least 3
27130 # lines will be aligned. Otherwise, it
27131 # can look very confusing.
27133 # We have to be careful not to pad if there are too few
27134 # lines. The current rule is:
27135 # (1) in general we require at least 3 consecutive lines
27136 # with the same leading chain operator token,
27137 # (2) but an exception is that we only require two lines
27138 # with leading colons if there are no more lines. For example,
27139 # the first $i in the following snippet would get padding
27140 # by the second rule:
27142 # $i == 1 ? ( "First", "Color" )
27143 # : $i == 2 ? ( "Then", "Rarity" )
27144 # : ( "Then", "Name" );
27146 next if ( $max_line <= 1 );
27148 my $leading_token = $tokens_to_go[$ibeg_next];
27151 # never indent line 1 of a '.' series because
27152 # previous line is most likely at same level.
27153 # TODO: we should also look at the leading_spaces
27154 # of the last output line and skip if it is same
27156 next if ( $leading_token eq '.' );
27159 foreach my $l ( 2 .. 3 ) {
27160 last if ( $line + $l > $max_line );
27162 my $ibeg_next_next = $ri_first->[ $line + $l ];
27164 if ( $tokens_to_go[$ibeg_next_next] eq
27166 $tokens_differ = 1;
27169 next if ($tokens_differ);
27170 next if ( $count < 3 && $leading_token ne ':' );
27176 # find interior token to pad if necessary
27177 if ( !defined($ipad) ) {
27179 foreach my $i ( $ibeg .. $iend - 1 ) {
27181 # find any unclosed container
27183 unless ( $type_sequence_to_go[$i]
27184 && defined( $mate_index_to_go[$i] )
27185 && $mate_index_to_go[$i] > $iend );
27187 # find next nonblank token to pad
27188 $ipad = $inext_to_go[$i];
27191 last if ( !$ipad || $ipad > $iend );
27194 # We cannot pad the first leading token of a file because
27195 # it could cause a bug in which the starting indentation
27196 # level is guessed incorrectly each time the code is run
27197 # though perltidy, thus causing the code to march off to
27198 # the right. For example, the following snippet would have
27201 ## ov_method mycan( $package, '(""' ), $package
27202 ## or ov_method mycan( $package, '(0+' ), $package
27203 ## or ov_method mycan( $package, '(bool' ), $package
27204 ## or ov_method mycan( $package, '(nomethod' ), $package;
27206 # If this snippet is within a block this won't happen
27207 # unless the user just processes the snippet alone within
27208 # an editor. In that case either the user will see and
27209 # fix the problem or it will be corrected next time the
27210 # entire file is processed with perltidy.
27211 my $this_batch = $self->[_this_batch_];
27212 my $peak_batch_size = $this_batch->[_peak_batch_size_];
27213 next if ( $ipad == 0 && $peak_batch_size <= 1 );
27215 # next line must not be at greater depth
27216 my $iend_next = $ri_last->[ $line + 1 ];
27218 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
27219 $nesting_depth_to_go[$ipad] );
27221 # lines must be somewhat similar to be padded..
27222 my $inext_next = $inext_to_go[$ibeg_next];
27223 my $type = $types_to_go[$ipad];
27225 # see if there are multiple continuation lines
27226 my $logical_continuation_lines = 1;
27227 if ( $line + 2 <= $max_line ) {
27228 my $leading_token = $tokens_to_go[$ibeg_next];
27229 my $ibeg_next_next = $ri_first->[ $line + 2 ];
27230 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
27231 && $nesting_depth_to_go[$ibeg_next] eq
27232 $nesting_depth_to_go[$ibeg_next_next] )
27234 $logical_continuation_lines++;
27238 # see if leading types match
27239 my $types_match = $types_to_go[$inext_next] eq $type;
27240 my $matches_without_bang;
27242 # if first line has leading ! then compare the following token
27243 if ( !$types_match && $type eq '!' ) {
27244 $types_match = $matches_without_bang =
27245 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
27249 # either we have multiple continuation lines to follow
27250 # and we are not padding the first token
27252 $logical_continuation_lines > 1
27253 && ( $ipad > 0 || $is_short_block )
27262 # and keywords must match if keyword
27265 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
27271 #----------------------begin special checks--------------
27274 # A check is needed before we can make the pad.
27275 # If we are in a list with some long items, we want each
27276 # item to stand out. So in the following example, the
27277 # first line beginning with '$casefold->' would look good
27278 # padded to align with the next line, but then it
27279 # would be indented more than the last line, so we
27283 # $casefold->{code} eq '0041'
27284 # && $casefold->{status} eq 'C'
27285 # && $casefold->{mapping} eq '0061',
27290 # It would be faster, and almost as good, to use a comma
27291 # count, and not pad if comma_count > 1 and the previous
27292 # line did not end with a comma.
27296 my $ibg = $ri_first->[ $line + 1 ];
27297 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
27299 # just use simplified formula for leading spaces to avoid
27300 # needless sub calls
27301 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
27303 # look at each line beyond the next ..
27305 foreach my $ltest ( $line + 2 .. $max_line ) {
27307 my $ibeg_t = $ri_first->[$l];
27309 # quit looking at the end of this container
27311 if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth )
27312 || ( $nesting_depth_to_go[$ibeg_t] < $depth );
27314 # cannot do the pad if a later line would be
27316 if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] <
27324 # don't pad if we end in a broken list
27325 if ( $l == $max_line ) {
27326 my $i2 = $ri_last->[$l];
27327 if ( $types_to_go[$i2] eq '#' ) {
27328 my $i1 = $ri_first->[$l];
27329 next if terminal_type_i( $i1, $i2 ) eq ',';
27334 # a minus may introduce a quoted variable, and we will
27335 # add the pad only if this line begins with a bare word,
27336 # such as for the word 'Button' here:
27338 # Button => "Print letter \"~$_\"",
27339 # -command => [ sub { print "$_[0]\n" }, $_ ],
27340 # -accelerator => "Meta+$_"
27343 # On the other hand, if 'Button' is quoted, it looks best
27346 # 'Button' => "Print letter \"~$_\"",
27347 # -command => [ sub { print "$_[0]\n" }, $_ ],
27348 # -accelerator => "Meta+$_"
27350 if ( $types_to_go[$ibeg_next] eq 'm' ) {
27351 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
27354 next unless $ok_to_pad;
27356 #----------------------end special check---------------
27358 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
27359 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
27360 $pad_spaces = $length_2 - $length_1;
27362 # If the first line has a leading ! and the second does
27363 # not, then remove one space to try to align the next
27364 # leading characters, which are often the same. For example:
27366 # || $ts == $self->Holder
27367 # || $self->Holder->Type eq "Arena" )
27369 # This usually helps readability, but if there are subsequent
27370 # ! operators things will still get messed up. For example:
27372 # if ( !exists $Net::DNS::typesbyname{$qtype}
27373 # && exists $Net::DNS::classesbyname{$qtype}
27374 # && !exists $Net::DNS::classesbyname{$qclass}
27375 # && exists $Net::DNS::typesbyname{$qclass} )
27376 # We can't fix that.
27377 if ($matches_without_bang) { $pad_spaces-- }
27379 # make sure this won't change if -lp is used
27380 my $indentation_1 = $leading_spaces_to_go[$ibeg];
27381 if ( ref($indentation_1)
27382 && $indentation_1->get_recoverable_spaces() == 0 )
27384 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
27385 if ( ref($indentation_2)
27386 && $indentation_2->get_recoverable_spaces() != 0 )
27392 # we might be able to handle a pad of -1 by removing a blank
27394 if ( $pad_spaces < 0 ) {
27396 # Deactivated for -kpit due to conflict. This block deletes
27397 # a space in an attempt to improve alignment in some cases,
27398 # but it may conflict with user spacing requests. For now
27399 # it is just deactivated if the -kpit option is used.
27400 if ( $pad_spaces == -1 ) {
27402 && $types_to_go[ $ipad - 1 ] eq 'b'
27403 && !%keyword_paren_inner_tightness )
27405 $self->pad_token( $ipad - 1, $pad_spaces );
27411 # now apply any padding for alignment
27412 if ( $ipad >= 0 && $pad_spaces ) {
27414 my $length_t = total_line_length( $ibeg, $iend );
27415 if ( $pad_spaces + $length_t <=
27416 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
27418 $self->pad_token( $ipad, $pad_spaces );
27426 $has_leading_op = $has_leading_op_next;
27427 } ## end of loop over lines
27429 } ## end sub set_logical_padding
27430 } ## end closure set_logical_padding
27434 # insert $pad_spaces before token number $ipad
27435 my ( $self, $ipad, $pad_spaces ) = @_;
27436 my $rLL = $self->[_rLL_];
27437 my $KK = $K_to_go[$ipad];
27438 my $tok = $rLL->[$KK]->[_TOKEN_];
27439 my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
27441 if ( $pad_spaces > 0 ) {
27442 $tok = SPACE x $pad_spaces . $tok;
27443 $tok_len += $pad_spaces;
27445 elsif ( $pad_spaces == 0 ) {
27448 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
27449 $tok = EMPTY_STRING;
27456 && Fault("unexpected request for pad spaces = $pad_spaces\n");
27460 $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
27461 $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
27463 $token_lengths_to_go[$ipad] += $pad_spaces;
27464 $tokens_to_go[$ipad] = $tok;
27466 foreach my $i ( $ipad .. $max_index_to_go ) {
27467 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
27470 } ## end sub pad_token
27474 # Remove one indentation space from unbroken containers marked with
27475 # 'K_extra_space'. These are mostly two-line lists with short names
27476 # formatted with -xlp -pt=2.
27478 # Before this fix (extra space in line 2):
27479 # is($module->VERSION, $expected,
27480 # "$main_module->VERSION matches $module->VERSION ($expected)");
27483 # is($module->VERSION, $expected,
27484 # "$main_module->VERSION matches $module->VERSION ($expected)");
27487 # - This fixes issue git #106
27488 # - This must be called after 'set_logical_padding'.
27489 # - This is currently only applied to -xlp. It would also work for -lp
27490 # but that style is essentially frozen.
27492 my ( $self, $ri_first, $ri_last ) = @_;
27494 # Must be 2 or more lines
27495 return unless ( @{$ri_first} > 1 );
27497 # Pull indentation object from start of second line
27498 my $ibeg_1 = $ri_first->[1];
27499 my $lp_object = $leading_spaces_to_go[$ibeg_1];
27500 return if ( !ref($lp_object) );
27502 # This only applies to an indentation object with a marked token
27503 my $K_extra_space = $lp_object->get_K_extra_space();
27504 return unless ($K_extra_space);
27506 # Look for the marked token within the first line of this batch
27507 my $ibeg_0 = $ri_first->[0];
27508 my $iend_0 = $ri_last->[0];
27509 my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0];
27510 return if ( $ii <= $ibeg_0 || $ii > $iend_0 );
27512 # Skip padded tokens, they have already been aligned
27513 my $tok = $tokens_to_go[$ii];
27514 return if ( substr( $tok, 0, 1 ) eq SPACE );
27516 # Skip 'if'-like statements, this does not improve them
27518 if ( $types_to_go[$ibeg_0] eq 'k'
27519 && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } );
27521 # Looks okay, reduce indentation by 1 space if possible
27522 my $spaces = $lp_object->get_spaces();
27523 if ( $spaces > 0 ) {
27524 $lp_object->decrease_SPACES(1);
27528 } ## end sub xlp_tweak
27530 { ## begin closure make_alignment_patterns
27535 my %is_my_local_our;
27538 my %is_binary_type;
27539 my %is_binary_keyword;
27544 # Note: %block_type_map is now global to enable the -gal=s option
27546 # map certain keywords to the same 'if' class to align
27547 # long if/elsif sequences. [elsif.pl]
27553 'default' => 'given',
27554 'case' => 'switch',
27556 # treat an 'undef' similar to numbers and quotes
27560 # map certain operators to the same class for pattern matching
27575 # leading keywords which to skip for efficiency when making parenless
27577 my @q = qw( my local our return );
27578 @{is_my_local_our}{@q} = (1) x scalar(@q);
27580 # leading keywords where we should just join one token to form
27583 @{is_use_like}{@q} = (1) x scalar(@q);
27585 # leading token types which may be used to make a container name
27587 @{is_kwU}{@q} = (1) x scalar(@q);
27589 # token types which prevent using leading word as a container name
27591 x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
27592 &= // >> ~. &. |. ^.
27593 **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
27596 @{is_binary_type}{@q} = (1) x scalar(@q);
27598 # token keywords which prevent using leading word as a container name
27599 @q = qw(and or err eq ne cmp);
27600 @is_binary_keyword{@q} = (1) x scalar(@q);
27602 # Some common function calls whose args can be aligned. These do not
27603 # give good alignments if the lengths differ significantly.
27605 'unlike' => 'like',
27607 ##'is_deeply' => 'is', # poor; names lengths too different
27612 sub make_alignment_patterns {
27614 my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
27618 #------------------------------------------------------------------
27619 # This sub creates arrays of vertical alignment info for one output
27621 #------------------------------------------------------------------
27623 # Input parameters:
27624 # $ibeg, $iend - index range of this line in the _to_go arrays
27625 # $ralignment_type_to_go - alignment type of tokens, like '=', if any
27626 # $alignment_count - number of alignment tokens in the line
27627 # $ralignment_hash - this contains all of the alignments for this
27628 # line. It is not yet used but is available for future coding in
27629 # case there is a need to do a preliminary scan of alignment tokens.
27631 # The arrays which are created contain strings that can be tested by
27632 # the vertical aligner to see if consecutive lines can be aligned
27635 # The four arrays are indexed on the vertical
27636 # alignment fields and are:
27637 # @tokens - a list of any vertical alignment tokens for this line.
27638 # These are tokens, such as '=' '&&' '#' etc which
27639 # we want to might align vertically. These are
27640 # decorated with various information such as
27641 # nesting depth to prevent unwanted vertical
27642 # alignment matches.
27643 # @fields - the actual text of the line between the vertical alignment
27645 # @patterns - a modified list of token types, one for each alignment
27646 # field. These should normally each match before alignment is
27647 # allowed, even when the alignment tokens match.
27648 # @field_lengths - the display width of each field
27652 if ( defined($ralignment_hash) ) {
27653 $new_count = keys %{$ralignment_hash};
27655 my $old_count = $alignment_count;
27656 $old_count = 0 unless ($old_count);
27657 if ( $new_count != $old_count ) {
27658 my $K = $K_to_go[$ibeg];
27659 my $rLL = $self->[_rLL_];
27660 my $lnl = $rLL->[$K]->[_LINE_INDEX_];
27662 "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
27667 # -------------------------------------
27668 # Shortcut for lines without alignments
27669 # -------------------------------------
27670 if ( !$alignment_count ) {
27672 my $rfield_lengths =
27673 [ $summed_lengths_to_go[ $iend + 1 ] -
27674 $summed_lengths_to_go[$ibeg] ];
27677 if ( $ibeg == $iend ) {
27678 $rfields = [ $tokens_to_go[$ibeg] ];
27679 $rpatterns = [ $types_to_go[$ibeg] ];
27683 [ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ];
27685 [ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ];
27687 return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
27690 my $i_start = $ibeg;
27692 my $i_depth_prev = $i_start;
27693 my $depth_prev = $depth;
27694 my %container_name = ( 0 => EMPTY_STRING );
27699 my @field_lengths = ();
27701 #-------------------------------------------------------------
27702 # Make a container name for any uncontained commas, issue c089
27703 #-------------------------------------------------------------
27704 # This is a generalization of the fix for rt136416 which was a
27705 # specialized patch just for 'use Module' statements.
27706 # We restrict this to semicolon-terminated statements; that way
27707 # we know that the top level commas are not in a list container.
27708 if ( $ibeg == 0 && $iend == $max_index_to_go ) {
27709 my $iterm = $max_index_to_go;
27710 if ( $types_to_go[$iterm] eq '#' ) {
27711 $iterm = iprev_to_go($iterm);
27714 # Alignment lines ending like '=> sub {'; fixes issue c093
27715 my $term_type_ok = $types_to_go[$iterm] eq ';';
27717 $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
27719 if ( $iterm > $ibeg
27721 && !$is_my_local_our{ $tokens_to_go[$ibeg] }
27722 && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
27724 $container_name{'0'} =
27725 make_uncontained_comma_name( $iterm, $ibeg, $iend );
27729 #--------------------------------
27730 # Begin main loop over all tokens
27731 #--------------------------------
27732 my $j = 0; # field index
27734 $patterns[0] = EMPTY_STRING;
27736 for my $i ( $ibeg .. $iend ) {
27738 #-------------------------------------------------------------
27739 # Part 1: keep track of containers balanced on this line only.
27740 #-------------------------------------------------------------
27741 # These are used below to prevent unwanted cross-line alignments.
27742 # Unbalanced containers already avoid aligning across
27743 # container boundaries.
27744 my $type = $types_to_go[$i];
27745 if ( $type_sequence_to_go[$i] ) {
27746 my $token = $tokens_to_go[$i];
27747 if ( $is_opening_token{$token} ) {
27749 # if container is balanced on this line...
27750 my $i_mate = $mate_index_to_go[$i];
27751 if ( !defined($i_mate) ) { $i_mate = -1 }
27752 if ( $i_mate > $i && $i_mate <= $iend ) {
27753 $i_depth_prev = $i;
27754 $depth_prev = $depth;
27757 # Append the previous token name to make the container name
27758 # more unique. This name will also be given to any commas
27759 # within this container, and it helps avoid undesirable
27760 # alignments of different types of containers.
27762 # Containers beginning with { and [ are given those names
27763 # for uniqueness. That way commas in different containers
27764 # will not match. Here is an example of what this prevents:
27765 # a => [ 1, 2, 3 ],
27766 # b => { b1 => 4, b2 => 5 },
27767 # Here is another example of what we avoid by labeling the
27770 # is_d( [ $a, $a ], [ $b, $c ] );
27771 # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
27772 # is_d( [ \$a, \$a ], [ \$b, \$c ] );
27775 $token eq '(' ? $self->make_paren_name($i) : $token;
27777 # name cannot be '.', so change to something else if so
27778 if ( $name eq '.' ) { $name = 'dot' }
27780 $container_name{$depth} = "+" . $name;
27782 # Make the container name even more unique if necessary.
27783 # If we are not vertically aligning this opening paren,
27784 # append a character count to avoid bad alignment since
27785 # it usually looks bad to align commas within containers
27786 # for which the opening parens do not align. Here
27787 # is an example very BAD alignment of commas (because
27788 # the atan2 functions are not all aligned):
27790 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
27791 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
27792 # $X * atan2( $X, 1 ) -
27793 # $Y * atan2( $Y, 1 );
27795 # On the other hand, it is usually okay to align commas
27796 # if opening parens align, such as:
27797 # glVertex3d( $cx + $s * $xs, $cy, $z );
27798 # glVertex3d( $cx, $cy + $s * $ys, $z );
27799 # glVertex3d( $cx - $s * $xs, $cy, $z );
27800 # glVertex3d( $cx, $cy - $s * $ys, $z );
27802 # To distinguish between these situations, we append
27803 # the length of the line from the previous matching
27804 # token, or beginning of line, to the function name.
27805 # This will allow the vertical aligner to reject
27806 # undesirable matches.
27808 # if we are not aligning on this paren...
27809 if ( !$ralignment_type_to_go->[$i] ) {
27811 my $len = length_tag( $i, $ibeg, $i_start );
27813 # tack this length onto the container name to try
27814 # to make a unique token name
27815 $container_name{$depth} .= "-" . $len;
27816 } ## end if ( !$ralignment_type_to_go...)
27817 } ## end if ( $i_mate > $i && $i_mate...)
27818 } ## end if ( $is_opening_token...)
27820 elsif ( $is_closing_type{$token} ) {
27821 $i_depth_prev = $i;
27822 $depth_prev = $depth;
27823 $depth-- if $depth > 0;
27825 } ## end if ( $type_sequence_to_go...)
27827 #------------------------------------------------------------
27828 # Part 2: if we find a new synchronization token, we are done
27830 #------------------------------------------------------------
27831 if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
27833 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
27835 # map similar items
27836 my $tok_map = $operator_map{$tok};
27837 $tok = $tok_map if ($tok_map);
27839 # make separators in different nesting depths unique
27840 # by appending the nesting depth digit.
27841 if ( $raw_tok ne '#' ) {
27842 $tok .= "$nesting_depth_to_go[$i]";
27845 # also decorate commas with any container name to avoid
27846 # unwanted cross-line alignments.
27847 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
27849 # If we are at an opening token which increased depth, we have
27850 # to use the name from the previous depth.
27851 my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth;
27853 ( $depth_last < $depth ? $depth_last : $depth );
27854 if ( $container_name{$depth_p} ) {
27855 $tok .= $container_name{$depth_p};
27859 # Patch to avoid aligning leading and trailing if, unless.
27860 # Mark trailing if, unless statements with container names.
27861 # This makes them different from leading if, unless which
27862 # are not so marked at present. If we ever need to name
27863 # them too, we could use ci to distinguish them.
27864 # Example problem to avoid:
27865 # return ( 2, "DBERROR" )
27866 # if ( $retval == 2 );
27867 # if ( scalar @_ ) {
27868 # my ( $a, $b, $c, $d, $e, $f ) = @_;
27870 if ( $raw_tok eq '(' ) {
27871 if ( $ci_levels_to_go[$ibeg]
27872 && $container_name{$depth} =~ /^\+(if|unless)/ )
27874 $tok .= $container_name{$depth};
27878 # Decorate block braces with block types to avoid
27879 # unwanted alignments such as the following:
27880 # foreach ( @{$routput_array} ) { $fh->print($_) }
27881 # eval { $fh->close() };
27882 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
27883 my $block_type = $block_type_to_go[$i];
27885 # map certain related block types to allow
27886 # else blocks to align
27887 $block_type = $block_type_map{$block_type}
27888 if ( defined( $block_type_map{$block_type} ) );
27890 # remove sub names to allow one-line sub braces to align
27891 # regardless of name
27892 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
27894 # allow all control-type blocks to align
27895 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
27897 $tok .= $block_type;
27900 # Mark multiple copies of certain tokens with the copy number
27901 # This will allow the aligner to decide if they are matched.
27902 # For now, only do this for equals. For example, the two
27903 # equals on the next line will be labeled '=0' and '=0.2'.
27904 # Later, the '=0.2' will be ignored in alignment because it
27907 # $| = $debug = 1 if $opt_d;
27908 # $full_index = 1 if $opt_i;
27910 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
27911 $token_count{$tok}++;
27912 if ( $token_count{$tok} > 1 ) {
27913 $tok .= '.' . $token_count{$tok};
27917 # concatenate the text of the consecutive tokens to form
27920 join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) );
27922 push @field_lengths,
27923 $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
27925 # store the alignment token for this field
27926 push( @tokens, $tok );
27928 # get ready for the next batch
27931 $patterns[$j] = EMPTY_STRING;
27932 } ## end if ( new synchronization token
27934 #-----------------------------------------------
27935 # Part 3: continue accumulating the next pattern
27936 #-----------------------------------------------
27938 # for keywords we have to use the actual text
27939 if ( $type eq 'k' ) {
27941 my $tok_fix = $tokens_to_go[$i];
27943 # but map certain keywords to a common string to allow
27945 $tok_fix = $keyword_map{$tok_fix}
27946 if ( defined( $keyword_map{$tok_fix} ) );
27947 $patterns[$j] .= $tok_fix;
27950 elsif ( $type eq 'b' ) {
27951 $patterns[$j] .= $type;
27954 # Mark most things before arrows as a quote to
27955 # get them to line up. Testfile: mixed.pl.
27957 # handle $type =~ /^[wnC]$/
27958 elsif ( $is_w_n_C{$type} ) {
27960 my $type_fix = $type;
27962 if ( $i < $iend - 1 ) {
27963 my $next_type = $types_to_go[ $i + 1 ];
27964 my $i_next_nonblank =
27965 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
27967 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
27970 # Patch to ignore leading minus before words,
27971 # by changing pattern 'mQ' into just 'Q',
27972 # so that we can align things like this:
27973 # Button => "Print letter \"~$_\"",
27974 # -command => [ sub { print "$_[0]\n" }, $_ ],
27975 if ( $patterns[$j] eq 'm' ) {
27976 $patterns[$j] = EMPTY_STRING;
27981 # Convert a bareword within braces into a quote for
27982 # matching. This will allow alignment of expressions like
27984 # local ( $SIG{'INT'} ) = IGNORE;
27985 # local ( $SIG{ALRM} ) = 'POSTMAN';
27989 && $types_to_go[ $i - 1 ] eq 'L'
27990 && $types_to_go[ $i + 1 ] eq 'R' )
27995 # patch to make numbers and quotes align
27996 if ( $type eq 'n' ) { $type_fix = 'Q' }
27998 $patterns[$j] .= $type_fix;
27999 } ## end elsif ( $is_w_n_C{$type} )
28001 # ignore any ! in patterns
28002 elsif ( $type eq '!' ) { }
28006 $patterns[$j] .= $type;
28008 # remove any zero-level name at first fat comma
28009 if ( $depth == 0 && $type eq '=>' ) {
28010 $container_name{$depth} = EMPTY_STRING;
28014 } ## end for my $i ( $ibeg .. $iend)
28016 #---------------------------------------------------------------
28017 # End of main loop .. join text of tokens to make the last field
28018 #---------------------------------------------------------------
28020 join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
28021 push @field_lengths,
28022 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
28024 return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
28025 } ## end sub make_alignment_patterns
28027 sub make_uncontained_comma_name {
28028 my ( $iterm, $ibeg, $iend ) = @_;
28030 # Make a container name by combining all leading barewords,
28031 # keywords and functions.
28032 my $name = EMPTY_STRING;
28037 for ( $ibeg .. $iterm ) {
28038 my $type = $types_to_go[$_];
28040 if ( $type eq 'b' ) {
28045 my $token = $tokens_to_go[$_];
28047 # Give up if we find an opening paren, binary operator or
28048 # comma within or after the proposed container name.
28050 || $is_binary_type{$type}
28051 || $type eq 'k' && $is_binary_keyword{$token} )
28053 $name = EMPTY_STRING;
28057 # The container name is only built of certain types:
28058 last if ( !$is_kwU{$type} );
28060 # Normally it is made of one word, but two words for 'use'
28061 if ( $count == 0 ) {
28063 && $is_use_like{ $tokens_to_go[$_] } )
28071 elsif ( defined($count_max) && $count >= $count_max ) {
28075 if ( defined( $name_map{$token} ) ) {
28076 $token = $name_map{$token};
28079 $name .= SPACE . $token;
28084 # Require a space after the container name token(s)
28086 && defined($ilast_blank)
28087 && $ilast_blank > $iname_end )
28089 $name = substr( $name, 1 );
28092 } ## end sub make_uncontained_comma_name
28096 my ( $i, $ibeg, $i_start ) = @_;
28098 # Generate a line length to be used as a tag for rejecting bad
28099 # alignments. The tag is the length of the line from the previous
28100 # matching token, or beginning of line, to the function name. This
28101 # will allow the vertical aligner to reject undesirable matches.
28103 # The basic method: sum length from previous alignment
28104 my $len = token_sequence_length( $i_start, $i - 1 );
28106 # Minor patch: do not include the length of any '!'.
28107 # Otherwise, commas in the following line will not
28109 # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
28110 # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
28111 if ( grep { $_ eq '!' } @types_to_go[ $i_start .. $i - 1 ] ) {
28115 if ( $i_start == $ibeg ) {
28117 # For first token, use distance from start of
28118 # line but subtract off the indentation due to
28119 # level. Otherwise, results could vary with
28122 leading_spaces_to_go($ibeg) -
28123 $levels_to_go[$i_start] * $rOpts_indent_columns;
28125 if ( $len < 0 ) { $len = 0 }
28127 } ## end sub length_tag
28129 } ## end closure make_alignment_patterns
28131 sub make_paren_name {
28132 my ( $self, $i ) = @_;
28134 # The token at index $i is a '('.
28135 # Create an alignment name for it to avoid incorrect alignments.
28137 # Start with the name of the previous nonblank token...
28138 my $name = EMPTY_STRING;
28140 return EMPTY_STRING if ( $im < 0 );
28141 if ( $types_to_go[$im] eq 'b' ) { $im--; }
28142 return EMPTY_STRING if ( $im < 0 );
28143 $name = $tokens_to_go[$im];
28145 # Prepend any sub name to an isolated -> to avoid unwanted alignments
28146 # [test case is test8/penco.pl]
28147 if ( $name eq '->' ) {
28149 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
28150 $name = $tokens_to_go[$im] . $name;
28154 # Finally, remove any leading arrows
28155 if ( substr( $name, 0, 2 ) eq '->' ) {
28156 $name = substr( $name, 2 );
28159 } ## end sub make_paren_name
28161 { ## begin closure get_final_indentation
28163 my ( $last_indentation_written, $last_unadjusted_indentation,
28164 $last_leading_token );
28166 sub initialize_get_final_indentation {
28167 $last_indentation_written = 0;
28168 $last_unadjusted_indentation = 0;
28169 $last_leading_token = EMPTY_STRING;
28171 } ## end sub initialize_get_final_indentation
28173 sub get_final_indentation {
28184 $rindentation_list,
28186 $starting_in_quote,
28187 $is_static_block_comment,
28191 #--------------------------------------------------------------
28192 # This routine makes any necessary adjustments to get the final
28193 # indentation of a line in the Formatter.
28194 #--------------------------------------------------------------
28196 # It starts with the basic indentation which has been defined for the
28197 # leading token, and then takes into account any options that the user
28198 # has set regarding special indenting and outdenting.
28200 # This routine has to resolve a number of complex interacting issues,
28202 # 1. The various -cti=n type flags, which contain the desired change in
28203 # indentation for lines ending in commas and semicolons, should be
28205 # 2. qw quotes require special processing and do not fit perfectly
28206 # with normal containers,
28207 # 3. formatting with -wn can complicate things, especially with qw
28209 # 4. formatting with the -lp option is complicated, and does not
28210 # work well with qw quotes and with -wn formatting.
28211 # 5. a number of special situations, such as 'cuddled' formatting.
28212 # 6. This routine is mainly concerned with outdenting closing tokens
28213 # but note that there is some overlap with the functions of sub
28214 # undo_ci, which was processed earlier, so care has to be taken to
28215 # keep them coordinated.
28217 # Find the last code token of this line
28218 my $i_terminal = $iend;
28219 my $terminal_type = $types_to_go[$iend];
28220 if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
28222 $terminal_type = $types_to_go[$i_terminal];
28223 if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
28225 $terminal_type = $types_to_go[$i_terminal];
28229 my $is_outdented_line;
28231 my $type_beg = $types_to_go[$ibeg];
28232 my $token_beg = $tokens_to_go[$ibeg];
28233 my $level_beg = $levels_to_go[$ibeg];
28234 my $block_type_beg = $block_type_to_go[$ibeg];
28235 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
28236 my $seqno_beg = $type_sequence_to_go[$ibeg];
28237 my $is_closing_type_beg = $is_closing_type{$type_beg};
28239 # QW INDENTATION PATCH 3:
28240 my $seqno_qw_closing;
28241 if ( $type_beg eq 'q' && $ibeg == 0 ) {
28242 my $KK = $K_to_go[$ibeg];
28243 $seqno_qw_closing =
28244 $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
28247 my $is_semicolon_terminated = $terminal_type eq ';'
28248 && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
28249 || $seqno_qw_closing );
28251 # NOTE: A future improvement would be to make it semicolon terminated
28252 # even if it does not have a semicolon but is followed by a closing
28253 # block brace. This would undo ci even for something like the
28254 # following, in which the final paren does not have a semicolon because
28255 # it is a possible weld location:
28257 # if ($BOLD_MATH) {
28259 # $labels, $comment,
28260 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
28265 # MOJO patch: Set a flag if this lines begins with ')->'
28266 my $leading_paren_arrow = (
28267 $is_closing_type_beg
28268 && $token_beg eq ')'
28270 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
28271 || ( $ibeg < $i_terminal - 1
28272 && $types_to_go[ $ibeg + 1 ] eq 'b'
28273 && $types_to_go[ $ibeg + 2 ] eq '->' )
28277 #---------------------------------------------------------
28278 # Section 1: set a flag and a default indentation
28280 # Most lines are indented according to the initial token.
28281 # But it is common to outdent to the level just after the
28282 # terminal token in certain cases...
28283 # adjust_indentation flag:
28284 # 0 - do not adjust
28286 # 2 - vertically align with opening token
28288 #---------------------------------------------------------
28290 my $adjust_indentation = 0;
28291 my $default_adjust_indentation = 0;
28293 # Parameters needed for option 2, aligning with opening token:
28295 $opening_indentation, $opening_offset,
28296 $is_leading, $opening_exists
28299 #-------------------------------------
28301 # if line starts with a sequenced item
28302 #-------------------------------------
28303 if ( $seqno_beg || $seqno_qw_closing ) {
28305 # This can be tedious so we let a sub do it
28307 $adjust_indentation,
28308 $default_adjust_indentation,
28309 $opening_indentation,
28314 ) = $self->get_closing_token_indentation(
28320 $rindentation_list,
28323 $is_semicolon_terminated,
28329 #--------------------------------------------------------
28331 # if at ');', '};', '>;', and '];' of a terminal qw quote
28332 #--------------------------------------------------------
28334 substr( $rpatterns->[0], 0, 2 ) eq 'qb'
28335 && substr( $rfields->[0], -1, 1 ) eq ';'
28336 ## $rpatterns->[0] =~ /^qb*;$/
28337 && $rfields->[0] =~ /^([\)\}\]\>]);$/
28340 if ( $closing_token_indentation{$1} == 0 ) {
28341 $adjust_indentation = 1;
28344 $adjust_indentation = 3;
28348 #---------------------------------------------------------
28349 # Section 2: set indentation according to flag set above
28351 # Select the indentation object to define leading
28352 # whitespace. If we are outdenting something like '} } );'
28353 # then we want to use one level below the last token
28354 # ($i_terminal) in order to get it to fully outdent through
28356 #---------------------------------------------------------
28359 my $level_end = $levels_to_go[$iend];
28361 #------------------------------------
28362 # Section 2A: adjust_indentation == 0
28363 # No change in indentation
28364 #------------------------------------
28365 if ( $adjust_indentation == 0 ) {
28366 $indentation = $leading_spaces_beg;
28370 #-------------------------------------------------------------------
28371 # Secton 2B: adjust_indentation == 1
28372 # Change the indentation to be that of a different token on the line
28373 #-------------------------------------------------------------------
28374 elsif ( $adjust_indentation == 1 ) {
28376 # Previously, the indentation of the terminal token was used:
28378 # $indentation = $reduced_spaces_to_go[$i_terminal];
28379 # $lev = $levels_to_go[$i_terminal];
28381 # Generalization for MOJO patch:
28382 # Use the lowest level indentation of the tokens on the line.
28383 # For example, here we can use the indentation of the ending ';':
28384 # } until ($selection > 0 and $selection < 10); # ok to use ';'
28385 # But this will not outdent if we use the terminal indentation:
28386 # )->then( sub { # use indentation of the ->, not the {
28387 # Warning: reduced_spaces_to_go[] may be a reference, do not
28388 # do numerical checks with it
28391 $indentation = $reduced_spaces_to_go[$i_ind];
28392 $lev = $levels_to_go[$i_ind];
28393 while ( $i_ind < $i_terminal ) {
28395 if ( $levels_to_go[$i_ind] < $lev ) {
28396 $indentation = $reduced_spaces_to_go[$i_ind];
28397 $lev = $levels_to_go[$i_ind];
28402 #--------------------------------------------------------------
28403 # Secton 2C: adjust_indentation == 2
28404 # Handle indented closing token which aligns with opening token
28405 #--------------------------------------------------------------
28406 elsif ( $adjust_indentation == 2 ) {
28408 # handle option to align closing token with opening token
28411 # calculate spaces needed to align with opening token
28413 get_spaces($opening_indentation) + $opening_offset;
28415 # Indent less than the previous line.
28417 # Problem: For -lp we don't exactly know what it was if there
28418 # were recoverable spaces sent to the aligner. A good solution
28419 # would be to force a flush of the vertical alignment buffer, so
28420 # that we would know. For now, this rule is used for -lp:
28422 # When the last line did not start with a closing token we will
28423 # be optimistic that the aligner will recover everything wanted.
28425 # This rule will prevent us from breaking a hierarchy of closing
28426 # tokens, and in a worst case will leave a closing paren too far
28427 # indented, but this is better than frequently leaving it not
28429 my $last_spaces = get_spaces($last_indentation_written);
28431 if ( ref($last_indentation_written)
28432 && !$is_closing_token{$last_leading_token} )
28435 get_recoverable_spaces($last_indentation_written);
28438 # reset the indentation to the new space count if it works
28439 # only options are all or none: nothing in-between looks good
28442 my $diff = $last_spaces - $space_count;
28444 $indentation = $space_count;
28448 # We need to fix things ... but there is no good way to do it.
28449 # The best solution is for the user to use a longer maximum
28450 # line length. We could get a smooth variation if we just move
28451 # the paren in using
28452 # $space_count -= ( 1 - $diff );
28453 # But unfortunately this can give a rather unbalanced look.
28455 # For -xlp we currently allow a tolerance of one indentation
28456 # level and then revert to a simpler default. This will jump
28457 # suddenly but keeps a balanced look.
28458 if ( $rOpts_extended_line_up_parentheses
28459 && $diff >= -$rOpts_indent_columns
28460 && $space_count > $leading_spaces_beg )
28462 $indentation = $space_count;
28465 # Otherwise revert to defaults
28466 elsif ( $default_adjust_indentation == 0 ) {
28467 $indentation = $leading_spaces_beg;
28469 elsif ( $default_adjust_indentation == 1 ) {
28470 $indentation = $reduced_spaces_to_go[$i_terminal];
28471 $lev = $levels_to_go[$i_terminal];
28476 #-------------------------------------------------------------
28477 # Secton 2D: adjust_indentation == 3
28478 # Full indentation of closing tokens (-icb and -icp or -cti=2)
28479 #-------------------------------------------------------------
28482 # handle -icb (indented closing code block braces)
28483 # Updated method for indented block braces: indent one full level if
28484 # there is no continuation indentation. This will occur for major
28485 # structures such as sub, if, else, but not for things like map
28488 # Note: only code blocks without continuation indentation are
28489 # handled here (if, else, unless, ..). In the following snippet,
28490 # the terminal brace of the sort block will have continuation
28491 # indentation as shown so it will not be handled by the coding
28492 # here. We would have to undo the continuation indentation to do
28493 # this, but it probably looks ok as is. This is a possible future
28494 # update for semicolon terminated lines.
28496 # if ($sortby eq 'date' or $sortby eq 'size') {
28498 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
28503 if ( $block_type_beg
28504 && $ci_levels_to_go[$i_terminal] == 0 )
28506 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
28507 $indentation = $spaces + $rOpts_indent_columns;
28509 # NOTE: for -lp we could create a new indentation object, but
28510 # there is probably no need to do it
28513 # handle -icp and any -icb block braces which fall through above
28514 # test such as the 'sort' block mentioned above.
28517 # There are currently two ways to handle -icp...
28518 # One way is to use the indentation of the previous line:
28519 # $indentation = $last_indentation_written;
28521 # The other way is to use the indentation that the previous line
28522 # would have had if it hadn't been adjusted:
28523 $indentation = $last_unadjusted_indentation;
28525 # Current method: use the minimum of the two. This avoids
28526 # inconsistent indentation.
28527 if ( get_spaces($last_indentation_written) <
28528 get_spaces($indentation) )
28530 $indentation = $last_indentation_written;
28534 # use previous indentation but use own level
28535 # to cause list to be flushed properly
28539 #-------------------------------------------------------------
28540 # Remember indentation except for multi-line quotes, which get
28542 #-------------------------------------------------------------
28543 if ( !( $ibeg == 0 && $starting_in_quote ) ) {
28544 $last_indentation_written = $indentation;
28545 $last_unadjusted_indentation = $leading_spaces_beg;
28546 $last_leading_token = $token_beg;
28548 # Patch to make a line which is the end of a qw quote work with the
28549 # -lp option. Make $token_beg look like a closing token as some
28550 # type even if it is not. This variable will become
28551 # $last_leading_token at the end of this loop. Then, if the -lp
28552 # style is selected, and the next line is also a
28553 # closing token, it will not get more indentation than this line.
28554 # We need to do this because qw quotes (at present) only get
28555 # continuation indentation, not one level of indentation, so we
28556 # need to turn off the -lp indentation.
28558 # ... a picture is worth a thousand words:
28560 # perltidy -wn -gnu (Without this patch):
28562 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
28566 # perltidy -wn -gnu (With this patch):
28568 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
28571 if ( $seqno_qw_closing
28572 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
28574 $last_leading_token = ')';
28578 #---------------------------------------------------------------------
28579 # Rule: lines with leading closing tokens should not be outdented more
28580 # than the line which contained the corresponding opening token.
28581 #---------------------------------------------------------------------
28583 # Updated per bug report in alex_bug.pl: we must not
28584 # mess with the indentation of closing logical braces, so
28585 # we must treat something like '} else {' as if it were
28586 # an isolated brace
28587 my $is_isolated_block_brace = $block_type_beg
28588 && ( $i_terminal == $ibeg
28589 || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
28592 # only do this for a ':; which is aligned with its leading '?'
28593 my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
28596 defined($opening_indentation)
28597 && !$leading_paren_arrow # MOJO patch
28598 && !$is_isolated_block_brace
28599 && !$is_unaligned_colon
28602 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
28603 $indentation = $opening_indentation;
28607 #----------------------------------------------------
28608 # remember the indentation of each line of this batch
28609 #----------------------------------------------------
28610 push @{$rindentation_list}, $indentation;
28612 #---------------------------------------------
28613 # outdent lines with certain leading tokens...
28614 #---------------------------------------------
28617 # must be first word of this batch
28623 # certain leading keywords if requested
28624 $rOpts_outdent_keywords
28625 && $type_beg eq 'k'
28626 && $outdent_keyword{$token_beg}
28628 # or labels if requested
28629 || $rOpts_outdent_labels && $type_beg eq 'J'
28631 # or static block comments if requested
28632 || $is_static_block_comment
28633 && $rOpts_outdent_static_block_comments
28637 my $space_count = leading_spaces_to_go($ibeg);
28638 if ( $space_count > 0 ) {
28639 $space_count -= $rOpts_continuation_indentation;
28640 $is_outdented_line = 1;
28641 if ( $space_count < 0 ) { $space_count = 0 }
28643 # do not promote a spaced static block comment to non-spaced;
28644 # this is not normally necessary but could be for some
28645 # unusual user inputs (such as -ci = -i)
28646 if ( $type_beg eq '#' && $space_count == 0 ) {
28650 $indentation = $space_count;
28660 $is_outdented_line,
28663 } ## end sub get_final_indentation
28665 sub get_closing_token_indentation {
28667 # Determine indentation adjustment for a line with a leading closing
28668 # token - i.e. one of these: ) ] } :
28677 $rindentation_list,
28680 $is_semicolon_terminated,
28685 my $adjust_indentation = 0;
28686 my $default_adjust_indentation = $adjust_indentation;
28687 my $terminal_type = $types_to_go[$i_terminal];
28689 my $type_beg = $types_to_go[$ibeg];
28690 my $token_beg = $tokens_to_go[$ibeg];
28691 my $level_beg = $levels_to_go[$ibeg];
28692 my $block_type_beg = $block_type_to_go[$ibeg];
28693 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
28694 my $seqno_beg = $type_sequence_to_go[$ibeg];
28695 my $is_closing_type_beg = $is_closing_type{$type_beg};
28698 $opening_indentation, $opening_offset,
28699 $is_leading, $opening_exists
28702 # Honor any flag to reduce -ci set by the -bbxi=n option
28703 if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) {
28705 # if this is an opening, it must be alone on the line ...
28706 if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
28707 $adjust_indentation = 1;
28710 # ... or a single welded unit (fix for b1173)
28711 elsif ($total_weld_count) {
28712 my $K_beg = $K_to_go[$ibeg];
28713 my $Kterm = $K_to_go[$i_terminal];
28714 my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm};
28715 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
28716 $Kterm = $Kterm_test;
28718 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
28722 my $ris_bli_container = $self->[_ris_bli_container_];
28723 my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
28725 # Update the $is_bli flag as we go. It is initially 1.
28726 # We note seeing a leading opening brace by setting it to 2.
28727 # If we get to the closing brace without seeing the opening then we
28728 # turn it off. This occurs if the opening brace did not get output
28729 # at the start of a line, so we will then indent the closing brace
28730 # in the default way.
28731 if ( $is_bli_beg && $is_bli_beg == 1 ) {
28732 my $K_opening_container = $self->[_K_opening_container_];
28733 my $K_opening = $K_opening_container->{$seqno_beg};
28734 my $K_beg = $K_to_go[$ibeg];
28735 if ( $K_beg eq $K_opening ) {
28736 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
28738 else { $is_bli_beg = 0 }
28741 # QW PATCH for the combination -lp -wn
28742 # For -lp formatting use $ibeg_weld_fix to get around the problem
28743 # that with -lp type formatting the opening and closing tokens to not
28744 # have sequence numbers.
28745 my $ibeg_weld_fix = $ibeg;
28746 if ( $seqno_qw_closing && $total_weld_count ) {
28747 my $i_plus = $inext_to_go[$ibeg];
28748 if ( $i_plus <= $max_index_to_go ) {
28749 my $K_plus = $K_to_go[$i_plus];
28750 if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) {
28751 $ibeg_weld_fix = $i_plus;
28756 # if we are at a closing token of some type..
28757 if ( $is_closing_type_beg || $seqno_qw_closing ) {
28759 my $K_beg = $K_to_go[$ibeg];
28761 # get the indentation of the line containing the corresponding
28764 $opening_indentation, $opening_offset,
28765 $is_leading, $opening_exists
28767 = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
28768 $ri_last, $rindentation_list, $seqno_qw_closing );
28770 # Patch for rt144979, part 1. Coordinated with part 2.
28771 # Do not undo ci for a cuddled closing brace control; it
28772 # needs to be treated exactly the same ci as an isolated
28774 my $is_cuddled_closing_brace = $seqno_beg
28775 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
28777 # First set the default behavior:
28780 # default behavior is to outdent closing lines
28781 # of the form: "); }; ]; )->xxx;"
28782 $is_semicolon_terminated
28784 # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT
28785 # #123749]: the TYPES here were incorrectly ')' and '('. The
28786 # corrected TYPES are '}' and '{'. But skip a cuddled block.
28788 $terminal_type eq '{'
28789 && $type_beg eq '}'
28790 && ( $nesting_depth_to_go[$iend] + 1 ==
28791 $nesting_depth_to_go[$ibeg] )
28792 && !$is_cuddled_closing_brace
28795 # remove continuation indentation for any line like
28797 # or without ending '{' and unbalanced, such as
28798 # such as '}->{$operator}'
28802 && ( $types_to_go[$iend] eq '{'
28803 || $levels_to_go[$iend] < $level_beg )
28805 # but not if a cuddled block
28806 && !$is_cuddled_closing_brace
28809 # and when the next line is at a lower indentation level...
28811 # PATCH #1: and only if the style allows undoing continuation
28812 # for all closing token types. We should really wait until
28813 # the indentation of the next line is known and then make
28814 # a decision, but that would require another pass.
28816 # PATCH #2: and not if this token is under -xci control
28817 || ( $level_jump < 0
28818 && !$some_closing_token_indentation
28819 && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} )
28821 # Patch for -wn=2, multiple welded closing tokens
28822 || ( $i_terminal > $ibeg
28823 && $is_closing_type{ $types_to_go[$iend] } )
28825 # Alternate Patch for git #51, isolated closing qw token not
28826 # outdented if no-delete-old-newlines is set. This works, but
28827 # a more general patch elsewhere fixes the real problem: ljump.
28828 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
28832 $adjust_indentation = 1;
28835 # outdent something like '),'
28837 $terminal_type eq ','
28839 # Removed this constraint for -wn
28840 # OLD: allow just one character before the comma
28841 # && $i_terminal == $ibeg + 1
28843 # require LIST environment; otherwise, we may outdent too much -
28844 # this can happen in calls without parentheses (overload.t);
28845 && $self->is_in_list_by_i($i_terminal)
28848 $adjust_indentation = 1;
28851 # undo continuation indentation of a terminal closing token if
28852 # it is the last token before a level decrease. This will allow
28853 # a closing token to line up with its opening counterpart, and
28854 # avoids an indentation jump larger than 1 level.
28855 my $rLL = $self->[_rLL_];
28856 my $Klimit = $self->[_Klimit_];
28857 if ( $i_terminal == $ibeg
28858 && $is_closing_type_beg
28860 && $K_beg < $Klimit )
28862 my $K_plus = $K_beg + 1;
28863 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
28865 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
28866 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
28869 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
28870 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
28871 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
28872 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
28875 # Note: we have skipped past just one comment (perhaps a
28876 # side comment). There could be more, and we could easily
28877 # skip past all the rest with the following code, or with a
28878 # while loop. It would be rare to have to do this, and
28879 # those block comments would still be indented, so it would
28880 # to leave them indented. So it seems best to just stop at
28881 # a maximum of one comment.
28882 ##if ($type_plus eq '#') {
28883 ## $K_plus = $self->K_next_code($K_plus);
28887 if ( !$is_bli_beg && defined($K_plus) ) {
28888 my $lev = $level_beg;
28889 my $level_next = $rLL->[$K_plus]->[_LEVEL_];
28891 # and do not undo ci if it was set by the -xci option
28892 $adjust_indentation = 1
28893 if ( $level_next < $lev
28894 && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} );
28897 # Patch for RT #96101, in which closing brace of anonymous subs
28898 # was not outdented. We should look ahead and see if there is
28899 # a level decrease at the next token (i.e., a closing token),
28900 # but right now we do not have that information. For now
28901 # we see if we are in a list, and this works well.
28902 # See test files 'sub*.t' for good test cases.
28903 if ( !$rOpts_indent_closing_brace
28905 && $self->[_ris_asub_block_]->{$seqno_beg}
28906 && $self->is_in_list_by_i($i_terminal) )
28909 $opening_indentation, $opening_offset,
28910 $is_leading, $opening_exists
28912 = $self->get_opening_indentation( $ibeg, $ri_first,
28913 $ri_last, $rindentation_list );
28914 my $indentation = $leading_spaces_beg;
28915 if ( defined($opening_indentation)
28916 && get_spaces($indentation) >
28917 get_spaces($opening_indentation) )
28919 $adjust_indentation = 1;
28924 # YVES patch 1 of 2:
28925 # Undo ci of line with leading closing eval brace,
28926 # but not beyond the indentation of the line with
28927 # the opening brace.
28928 if ( $block_type_beg
28929 && $block_type_beg eq 'eval'
28930 && !ref($leading_spaces_beg)
28931 && !$rOpts_indent_closing_brace )
28934 $opening_indentation, $opening_offset,
28935 $is_leading, $opening_exists
28937 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
28938 $rindentation_list );
28939 my $indentation = $leading_spaces_beg;
28940 if ( defined($opening_indentation)
28941 && get_spaces($indentation) >
28942 get_spaces($opening_indentation) )
28944 $adjust_indentation = 1;
28948 # patch for issue git #40: -bli setting has priority
28949 $adjust_indentation = 0 if ($is_bli_beg);
28951 $default_adjust_indentation = $adjust_indentation;
28953 # Now modify default behavior according to user request:
28954 # handle option to indent non-blocks of the form ); }; ];
28955 # But don't do special indentation to something like ')->pack('
28956 if ( !$block_type_beg ) {
28958 # Note that logical padding has already been applied, so we may
28959 # need to remove some spaces to get a valid hash key.
28960 my $tok = $token_beg;
28961 my $cti = $closing_token_indentation{$tok};
28963 # Fix the value of 'cti' for an isolated non-welded closing qw
28965 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
28967 # A quote delimiter which is not a container will not have
28968 # a cti value defined. In this case use the style of a
28969 # paren. For example
28977 if ( !defined($cti) && length($tok) == 1 ) {
28979 # something other than ')', '}', ']' ; use flag for ')'
28980 $cti = $closing_token_indentation{')'};
28982 # But for now, do not outdent non-container qw
28983 # delimiters because it would would change existing
28985 if ( $tok ne '>' ) { $cti = 3 }
28988 # A non-welded closing qw cannot currently use -cti=1
28989 # because that option requires a sequence number to find
28990 # the opening indentation, and qw quote delimiters are not
28992 if ( defined($cti) && $cti == 1 ) { $cti = 0 }
28995 if ( !defined($cti) ) {
28997 # $cti may not be defined for several reasons.
28998 # -padding may have been applied so the character
29000 # - we may have welded to a closing quote token.
29001 # Here is an example (perltidy -wn):
29002 # __PACKAGE__->load_components( qw(
29006 $adjust_indentation = 0;
29009 elsif ( $cti == 1 ) {
29010 if ( $i_terminal <= $ibeg + 1
29011 || $is_semicolon_terminated )
29013 $adjust_indentation = 2;
29016 $adjust_indentation = 0;
29019 elsif ( $cti == 2 ) {
29020 if ($is_semicolon_terminated) {
29021 $adjust_indentation = 3;
29024 $adjust_indentation = 0;
29027 elsif ( $cti == 3 ) {
29028 $adjust_indentation = 3;
29032 # handle option to indent blocks
29035 $rOpts_indent_closing_brace
29037 $i_terminal == $ibeg # isolated terminal '}'
29038 || $is_semicolon_terminated
29042 $adjust_indentation = 3;
29045 } ## end if ( $is_closing_type_beg || $seqno_qw_closing )
29047 # if line begins with a ':', align it with any
29048 # previous line leading with corresponding ?
29049 elsif ( $type_beg eq ':' ) {
29051 $opening_indentation, $opening_offset,
29052 $is_leading, $opening_exists
29054 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
29055 $rindentation_list );
29056 if ($is_leading) { $adjust_indentation = 2; }
29061 $adjust_indentation,
29062 $default_adjust_indentation,
29063 $opening_indentation,
29069 } ## end sub get_closing_token_indentation
29070 } ## end closure get_final_indentation
29072 sub get_opening_indentation {
29074 # get the indentation of the line which output the opening token
29075 # corresponding to a given closing token in the current output batch.
29078 # $i_closing - index in this line of a closing token ')' '}' or ']'
29080 # $ri_first - reference to list of the first index $i for each output
29081 # line in this batch
29082 # $ri_last - reference to list of the last index $i for each output line
29084 # $rindentation_list - reference to a list containing the indentation
29085 # used for each line.
29086 # $qw_seqno - optional sequence number to use if normal seqno not defined
29087 # (NOTE: would be more general to just look this up from index i)
29090 # -the indentation of the line which contained the opening token
29091 # which matches the token at index $i_opening
29092 # -and its offset (number of columns) from the start of the line
29094 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
29097 # first, see if the opening token is in the current batch
29098 my $i_opening = $mate_index_to_go[$i_closing];
29099 my ( $indent, $offset, $is_leading, $exists );
29101 if ( defined($i_opening) && $i_opening >= 0 ) {
29103 # it is..look up the indentation
29104 ( $indent, $offset, $is_leading ) =
29105 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
29106 $rindentation_list );
29109 # if not, it should have been stored in the hash by a previous batch
29111 my $seqno = $type_sequence_to_go[$i_closing];
29112 $seqno = $qw_seqno unless ($seqno);
29113 ( $indent, $offset, $is_leading, $exists ) =
29114 get_saved_opening_indentation($seqno);
29116 return ( $indent, $offset, $is_leading, $exists );
29117 } ## end sub get_opening_indentation
29119 sub examine_vertical_tightness_flags {
29122 # For efficiency, we will set a flag to skip all calls to sub
29123 # 'set_vertical_tightness_flags' if vertical tightness is not possible with
29124 # the user input parameters. If vertical tightness is possible, we will
29125 # simply leave the flag undefined and return.
29127 # Vertical tightness is never possible with --freeze-whitespace
29128 if ($rOpts_freeze_whitespace) {
29129 $self->[_no_vertical_tightness_flags_] = 1;
29133 # This sub is coordinated with sub set_vertical_tightness_flags.
29134 # The Section numbers in the following comments are the sections
29135 # in sub set_vertical_tightness_flags:
29137 # Examine controls for Section 1a:
29138 return if ($rOpts_line_up_parentheses);
29140 foreach my $key ( keys %opening_vertical_tightness ) {
29141 return if ( $opening_vertical_tightness{$key} );
29144 # Examine controls for Section 1b:
29145 foreach my $key ( keys %closing_vertical_tightness ) {
29146 return if ( $closing_vertical_tightness{$key} );
29149 # Examine controls for Section 1c:
29150 foreach my $key ( keys %opening_token_right ) {
29151 return if ( $opening_token_right{$key} );
29154 # Examine controls for Section 1d:
29155 foreach my $key ( keys %stack_opening_token ) {
29156 return if ( $stack_opening_token{$key} );
29158 foreach my $key ( keys %stack_closing_token ) {
29159 return if ( $stack_closing_token{$key} );
29162 # Examine controls for Section 2:
29163 return if ($rOpts_block_brace_vertical_tightness);
29165 # Examine controls for Section 3:
29166 return if ($rOpts_stack_closing_block_brace);
29168 # None of the controls used for vertical tightness are set, so
29169 # we can skip all calls to sub set_vertical_tightness_flags
29170 $self->[_no_vertical_tightness_flags_] = 1;
29172 } ## end sub examine_vertical_tightness_flags
29174 sub set_vertical_tightness_flags {
29176 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
29177 $ending_in_quote, $closing_side_comment )
29180 # Define vertical tightness controls for the nth line of a batch.
29181 # Note: do not call this sub for a block comment or if
29182 # $rOpts_freeze_whitespace is set.
29184 # These parameters are passed to the vertical aligner to indicated
29185 # if we should combine this line with the next line to achieve the
29186 # desired vertical tightness. This was previously an array but
29187 # has been converted to a hash:
29192 # 0 _vt_type: 1=opening non-block 2=closing non-block
29193 # 3=opening block brace 4=closing block brace
29195 # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
29196 # 1b _vt_closing_flag: spaces of padding to use if closing
29197 # 2 _vt_seqno: sequence number of container
29198 # 3 _vt_valid flag: do not append if this flag is false. Will be
29199 # true if appropriate -vt flag is set. Otherwise, Will be
29200 # made true only for 2 line container in parens with -lp
29201 # 4 _vt_seqno_beg: sequence number of first token of line
29202 # 5 _vt_seqno_end: sequence number of last token of line
29203 # 6 _vt_min_lines: min number of lines for joining opening cache,
29205 # 7 _vt_max_lines: max number of lines for joining opening cache,
29208 # The vertical tightness mechanism can add whitespace, so whitespace can
29209 # continually increase if we allowed it when the -fws flag is set.
29210 # See case b499 for an example.
29212 # Define these values...
29214 my $vt_opening_flag = 0;
29215 my $vt_closing_flag = 0;
29217 my $vt_valid_flag = 0;
29218 my $vt_seqno_beg = 0;
29219 my $vt_seqno_end = 0;
29220 my $vt_min_lines = 0;
29221 my $vt_max_lines = 0;
29223 # Uses these global parameters:
29224 # $rOpts_block_brace_tightness
29225 # $rOpts_block_brace_vertical_tightness
29226 # $rOpts_stack_closing_block_brace
29227 # $rOpts_line_up_parentheses
29228 # %opening_vertical_tightness
29229 # %closing_vertical_tightness
29230 # %opening_token_right
29231 # %stack_closing_token
29232 # %stack_opening_token
29234 #--------------------------------------------------------------
29235 # Vertical Tightness Flags Section 1:
29236 # Handle Lines 1 .. n-1 but not the last line
29237 # For non-BLOCK tokens, we will need to examine the next line
29238 # too, so we won't consider the last line.
29239 #--------------------------------------------------------------
29240 if ( $n < $n_last_line ) {
29242 #--------------------------------------------------------------
29243 # Vertical Tightness Flags Section 1a:
29244 # Look for Type 1, last token of this line is a non-block opening token
29245 #--------------------------------------------------------------
29246 my $ibeg_next = $ri_first->[ $n + 1 ];
29247 my $token_end = $tokens_to_go[$iend];
29248 my $iend_next = $ri_last->[ $n + 1 ];
29251 $type_sequence_to_go[$iend]
29252 && !$block_type_to_go[$iend]
29253 && $is_opening_token{$token_end}
29255 $opening_vertical_tightness{$token_end} > 0
29257 # allow 2-line method call to be closed up
29258 || ( $rOpts_line_up_parentheses
29259 && $token_end eq '('
29260 && $self->[_rlp_object_by_seqno_]
29261 ->{ $type_sequence_to_go[$iend] }
29263 && $types_to_go[ $iend - 1 ] ne 'b' )
29267 # avoid multiple jumps in nesting depth in one line if
29269 my $ovt = $opening_vertical_tightness{$token_end};
29271 # Turn off the -vt flag if the next line ends in a weld.
29272 # This avoids an instability with one-line welds (fixes b1183).
29273 my $type_end_next = $types_to_go[$iend_next];
29275 if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
29276 && $is_closing_type{$type_end_next} );
29278 # The flag '_rbreak_container_' avoids conflict of -bom and -pt=1
29279 # or -pt=2; fixes b1270. See similar patch above for $cvt.
29280 my $seqno = $type_sequence_to_go[$iend];
29283 && $self->[_rbreak_container_]->{$seqno} )
29288 # The flag '_rmax_vertical_tightness_' avoids welding conflicts.
29289 if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) {
29291 min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} );
29296 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
29297 $nesting_depth_to_go[$ibeg_next] )
29301 # If -vt flag has not been set, mark this as invalid
29302 # and aligner will validate it if it sees the closing paren
29304 my $valid_flag = $ovt;
29307 $vt_opening_flag = $ovt;
29308 $vt_seqno = $type_sequence_to_go[$iend];
29309 $vt_valid_flag = $valid_flag;
29313 #--------------------------------------------------------------
29314 # Vertical Tightness Flags Section 1b:
29315 # Look for Type 2, first token of next line is a non-block closing
29316 # token .. and be sure this line does not have a side comment
29317 #--------------------------------------------------------------
29318 my $token_next = $tokens_to_go[$ibeg_next];
29319 if ( $type_sequence_to_go[$ibeg_next]
29320 && !$block_type_to_go[$ibeg_next]
29321 && $is_closing_token{$token_next}
29322 && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
29324 my $cvt = $closing_vertical_tightness{$token_next};
29326 # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
29327 # See similar patch above for $ovt.
29328 my $seqno = $type_sequence_to_go[$ibeg_next];
29329 if ( $cvt && $self->[_rbreak_container_]->{$seqno} ) {
29333 # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
29334 # otherwise. Added for rt136417.
29336 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
29339 # The unusual combination -pvtc=2 -dws -naws can be unstable.
29340 # This fixes b1282, b1283. This can be moved to set_options.
29342 && $rOpts_delete_old_whitespace
29343 && !$rOpts_add_whitespace )
29348 # Fix for b1379, b1380, b1381, b1382, b1384 part 2,
29349 # instablility with adding and deleting trailing commas:
29350 # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380.
29351 # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382.
29352 # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384
29354 && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} )
29361 # Never append a trailing line like ')->pack(' because it
29362 # will throw off later alignment. So this line must start at a
29363 # deeper level than the next line (fix1 for welding, git #45).
29365 $nesting_depth_to_go[$ibeg_next] >=
29366 $nesting_depth_to_go[ $iend_next + 1 ] + 1
29371 !$self->is_in_list_by_i($ibeg_next)
29375 # allow closing up 2-line method calls
29376 || ( $rOpts_line_up_parentheses
29377 && $token_next eq ')'
29378 && $type_sequence_to_go[$ibeg_next]
29379 && $self->[_rlp_object_by_seqno_]
29380 ->{ $type_sequence_to_go[$ibeg_next] } )
29387 # decide which trailing closing tokens to append..
29389 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
29391 my $str = join( EMPTY_STRING,
29392 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
29394 # append closing token if followed by comment or ';'
29395 # or another closing token (fix2 for welding, git #45)
29396 if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
29400 my $valid_flag = $cvt;
29404 # Fix for b1187 and b1188: Blinking can occur if we allow
29405 # welded tokens to re-form into one-line blocks during
29406 # vertical alignment when -lp used. So for this case we
29407 # set the minimum number of lines to be 1 instead of 0.
29408 # The maximum should be 1 if -vtc is not used. If -vtc is
29409 # used, we turn the valid
29410 # flag off and set the maximum to 0. This is equivalent to
29411 # using a large number.
29412 my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
29413 if ( $rOpts_line_up_parentheses
29414 && $total_weld_count
29415 && $seqno_ibeg_next
29416 && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
29417 && $self->is_welded_at_seqno($seqno_ibeg_next) )
29420 $max_lines = $cvt ? 0 : 1;
29425 $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
29426 $vt_seqno = $type_sequence_to_go[$ibeg_next];
29427 $vt_valid_flag = $valid_flag;
29428 $vt_min_lines = $min_lines;
29429 $vt_max_lines = $max_lines;
29434 #--------------------------------------------------------------
29435 # Vertical Tightness Flags Section 1c:
29436 # Implement the Opening Token Right flag (Type 2)..
29437 # If requested, move an isolated trailing opening token to the end of
29438 # the previous line which ended in a comma. We could do this
29439 # in sub recombine_breakpoints but that would cause problems
29440 # with -lp formatting. The problem is that indentation will
29441 # quickly move far to the right in nested expressions. By
29442 # doing it after indentation has been set, we avoid changes
29443 # to the indentation. Actual movement of the token takes place
29444 # in sub valign_output_step_B.
29446 # Note added 4 May 2021: the man page suggests that the -otr flags
29447 # are mainly for opening tokens following commas. But this seems
29448 # to have been generalized long ago to include other situations.
29449 # I checked the coding back to 2012 and it is essentially the same
29450 # as here, so it is best to leave this unchanged for now.
29451 #--------------------------------------------------------------
29453 $opening_token_right{ $tokens_to_go[$ibeg_next] }
29455 # previous line is not opening
29456 # (use -sot to combine with it)
29457 && !$is_opening_token{$token_end}
29459 # previous line ended in one of these
29460 # (add other cases if necessary; '=>' and '.' are not necessary
29461 && !$block_type_to_go[$ibeg_next]
29463 # this is a line with just an opening token
29464 && ( $iend_next == $ibeg_next
29465 || $iend_next == $ibeg_next + 2
29466 && $types_to_go[$iend_next] eq '#' )
29468 # Fix for case b1060 when both -baoo and -otr are set:
29469 # to avoid blinking, honor the -baoo flag over the -otr flag.
29470 && $token_end ne '||' && $token_end ne '&&'
29472 # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
29473 # Generalized from '=' to $is_assignment to fix b1375.
29475 $is_assignment{ $types_to_go[$iend] }
29476 && $rOpts_line_up_parentheses
29477 && $type_sequence_to_go[$ibeg_next]
29478 && $self->[_rlp_object_by_seqno_]
29479 ->{ $type_sequence_to_go[$ibeg_next] }
29482 # looks bad if we align vertically with the wrong container
29483 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
29485 # give -kba priority over -otr (b1445)
29486 && !$self->[_rbreak_after_Klast_]->{ $K_to_go[$iend] }
29489 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
29492 $vt_closing_flag = $spaces;
29493 $vt_seqno = $type_sequence_to_go[$ibeg_next];
29494 $vt_valid_flag = 1;
29497 #--------------------------------------------------------------
29498 # Vertical Tightness Flags Section 1d:
29499 # Stacking of opening and closing tokens (Type 2)
29500 #--------------------------------------------------------------
29502 my $token_beg_next = $tokens_to_go[$ibeg_next];
29504 # patch to make something like 'qw(' behave like an opening paren
29506 if ( $types_to_go[$ibeg_next] eq 'q' ) {
29507 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
29508 $token_beg_next = $1;
29512 if ( $is_closing_token{$token_end}
29513 && $is_closing_token{$token_beg_next} )
29516 # avoid instability of combo -bom and -sct; b1179
29517 my $seq_next = $type_sequence_to_go[$ibeg_next];
29518 $stackable = $stack_closing_token{$token_beg_next}
29519 unless ( $block_type_to_go[$ibeg_next]
29520 || $seq_next && $self->[_rbreak_container_]->{$seq_next} );
29522 elsif ($is_opening_token{$token_end}
29523 && $is_opening_token{$token_beg_next} )
29525 $stackable = $stack_opening_token{$token_beg_next}
29526 unless ( $block_type_to_go[$ibeg_next] )
29527 ; # shouldn't happen; just checking
29532 my $is_semicolon_terminated;
29533 if ( $n + 1 == $n_last_line ) {
29534 my ( $terminal_type, $i_terminal ) =
29535 terminal_type_i( $ibeg_next, $iend_next );
29536 $is_semicolon_terminated = $terminal_type eq ';'
29537 && $nesting_depth_to_go[$iend_next] <
29538 $nesting_depth_to_go[$ibeg_next];
29541 # this must be a line with just an opening token
29542 # or end in a semicolon
29544 $is_semicolon_terminated
29545 || ( $iend_next == $ibeg_next
29546 || $iend_next == $ibeg_next + 2
29547 && $types_to_go[$iend_next] eq '#' )
29550 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
29553 $vt_closing_flag = $spaces;
29554 $vt_seqno = $type_sequence_to_go[$ibeg_next];
29555 $vt_valid_flag = 1;
29561 #--------------------------------------------------------------
29562 # Vertical Tightness Flags Section 2:
29563 # Handle type 3, opening block braces on last line of the batch
29564 # Check for a last line with isolated opening BLOCK curly
29565 #--------------------------------------------------------------
29566 elsif ($rOpts_block_brace_vertical_tightness
29568 && $types_to_go[$iend] eq '{'
29569 && $block_type_to_go[$iend]
29570 && $block_type_to_go[$iend] =~
29571 /$block_brace_vertical_tightness_pattern/ )
29574 $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
29576 $vt_valid_flag = 1;
29579 #--------------------------------------------------------------
29580 # Vertical Tightness Flags Section 3:
29581 # Handle type 4, a closing block brace on the last line of the batch Check
29582 # for a last line with isolated closing BLOCK curly
29583 # Patch: added a check for any new closing side comment which the
29584 # -csc option may generate. If it exists, there will be a side comment
29585 # so we cannot combine with a brace on the next line. This issue
29586 # occurs for the combination -scbb and -csc is used.
29587 #--------------------------------------------------------------
29588 elsif ($rOpts_stack_closing_block_brace
29590 && $block_type_to_go[$iend]
29591 && $types_to_go[$iend] eq '}'
29592 && ( !$closing_side_comment || $n < $n_last_line ) )
29594 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
29597 $vt_closing_flag = $spaces;
29598 $vt_seqno = $type_sequence_to_go[$iend];
29599 $vt_valid_flag = 1;
29603 # get the sequence numbers of the ends of this line
29604 $vt_seqno_beg = $type_sequence_to_go[$ibeg];
29605 if ( !$vt_seqno_beg ) {
29606 if ( $types_to_go[$ibeg] eq 'q' ) {
29607 $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
29609 else { $vt_seqno_beg = EMPTY_STRING }
29612 $vt_seqno_end = $type_sequence_to_go[$iend];
29613 if ( !$vt_seqno_end ) {
29614 if ( $types_to_go[$iend] eq 'q' ) {
29615 $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
29617 else { $vt_seqno_end = EMPTY_STRING }
29620 if ( !defined($vt_seqno) ) { $vt_seqno = EMPTY_STRING }
29622 my $rvertical_tightness_flags = {
29623 _vt_type => $vt_type,
29624 _vt_opening_flag => $vt_opening_flag,
29625 _vt_closing_flag => $vt_closing_flag,
29626 _vt_seqno => $vt_seqno,
29627 _vt_valid_flag => $vt_valid_flag,
29628 _vt_seqno_beg => $vt_seqno_beg,
29629 _vt_seqno_end => $vt_seqno_end,
29630 _vt_min_lines => $vt_min_lines,
29631 _vt_max_lines => $vt_max_lines,
29634 return ($rvertical_tightness_flags);
29635 } ## end sub set_vertical_tightness_flags
29637 ##########################################################
29638 # CODE SECTION 14: Code for creating closing side comments
29639 ##########################################################
29641 { ## begin closure accumulate_csc_text
29643 # These routines are called once per batch when the --closing-side-comments flag
29646 my %block_leading_text;
29647 my %block_opening_line_number;
29648 my $csc_new_statement_ok;
29649 my $csc_last_label;
29650 my %csc_block_label;
29651 my $accumulating_text_for_block;
29652 my $leading_block_text;
29653 my $rleading_block_if_elsif_text;
29654 my $leading_block_text_level;
29655 my $leading_block_text_length_exceeded;
29656 my $leading_block_text_line_length;
29657 my $leading_block_text_line_number;
29659 sub initialize_csc_vars {
29660 %block_leading_text = ();
29661 %block_opening_line_number = ();
29662 $csc_new_statement_ok = 1;
29663 $csc_last_label = EMPTY_STRING;
29664 %csc_block_label = ();
29665 $rleading_block_if_elsif_text = [];
29666 $accumulating_text_for_block = EMPTY_STRING;
29667 reset_block_text_accumulator();
29669 } ## end sub initialize_csc_vars
29671 sub reset_block_text_accumulator {
29673 # save text after 'if' and 'elsif' to append after 'else'
29674 if ($accumulating_text_for_block) {
29676 ## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
29677 if ( $is_if_elsif{$accumulating_text_for_block} ) {
29678 push @{$rleading_block_if_elsif_text}, $leading_block_text;
29681 $accumulating_text_for_block = EMPTY_STRING;
29682 $leading_block_text = EMPTY_STRING;
29683 $leading_block_text_level = 0;
29684 $leading_block_text_length_exceeded = 0;
29685 $leading_block_text_line_number = 0;
29686 $leading_block_text_line_length = 0;
29688 } ## end sub reset_block_text_accumulator
29690 sub set_block_text_accumulator {
29691 my ( $self, $i ) = @_;
29692 $accumulating_text_for_block = $tokens_to_go[$i];
29693 if ( $accumulating_text_for_block !~ /^els/ ) {
29694 $rleading_block_if_elsif_text = [];
29696 $leading_block_text = EMPTY_STRING;
29697 $leading_block_text_level = $levels_to_go[$i];
29698 $leading_block_text_line_number = $self->get_output_line_number();
29699 $leading_block_text_length_exceeded = 0;
29701 # this will contain the column number of the last character
29702 # of the closing side comment
29703 $leading_block_text_line_length =
29704 length($csc_last_label) +
29705 length($accumulating_text_for_block) +
29706 length( $rOpts->{'closing-side-comment-prefix'} ) +
29707 $leading_block_text_level * $rOpts_indent_columns + 3;
29709 } ## end sub set_block_text_accumulator
29711 sub accumulate_block_text {
29712 my ( $self, $i ) = @_;
29714 # accumulate leading text for -csc, ignoring any side comments
29715 if ( $accumulating_text_for_block
29716 && !$leading_block_text_length_exceeded
29717 && $types_to_go[$i] ne '#' )
29720 my $added_length = $token_lengths_to_go[$i];
29721 $added_length += 1 if $i == 0;
29722 my $new_line_length =
29723 $leading_block_text_line_length + $added_length;
29725 # we can add this text if we don't exceed some limits..
29728 # we must not have already exceeded the text length limit
29729 length($leading_block_text) <
29730 $rOpts_closing_side_comment_maximum_text
29733 # the new total line length must be below the line length limit
29734 # or the new length must be below the text length limit
29735 # (ie, we may allow one token to exceed the text length limit)
29738 $maximum_line_length_at_level[$leading_block_text_level]
29740 || length($leading_block_text) + $added_length <
29741 $rOpts_closing_side_comment_maximum_text
29744 # UNLESS: we are adding a closing paren before the brace we seek.
29745 # This is an attempt to avoid situations where the ... to be
29746 # added are longer than the omitted right paren, as in:
29748 # foreach my $item (@a_rather_long_variable_name_here) {
29750 # } ## end foreach my $item (@a_rather_long_variable_name_here...
29753 $tokens_to_go[$i] eq ')'
29756 $i + 1 <= $max_index_to_go
29757 && $block_type_to_go[ $i + 1 ]
29758 && $block_type_to_go[ $i + 1 ] eq
29759 $accumulating_text_for_block
29761 || ( $i + 2 <= $max_index_to_go
29762 && $block_type_to_go[ $i + 2 ]
29763 && $block_type_to_go[ $i + 2 ] eq
29764 $accumulating_text_for_block )
29770 # add an extra space at each newline
29771 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
29772 $leading_block_text .= SPACE;
29775 # add the token text
29776 $leading_block_text .= $tokens_to_go[$i];
29777 $leading_block_text_line_length = $new_line_length;
29780 # show that text was truncated if necessary
29781 elsif ( $types_to_go[$i] ne 'b' ) {
29782 $leading_block_text_length_exceeded = 1;
29783 $leading_block_text .= '...';
29787 } ## end sub accumulate_block_text
29789 sub accumulate_csc_text {
29793 # called once per output buffer when -csc is used. Accumulates
29794 # the text placed after certain closing block braces.
29795 # Defines and returns the following for this buffer:
29797 my $block_leading_text =
29798 EMPTY_STRING; # the leading text of the last '}'
29799 my $rblock_leading_if_elsif_text;
29800 my $i_block_leading_text =
29801 -1; # index of token owning block_leading_text
29802 my $block_line_count = 100; # how many lines the block spans
29803 my $terminal_type = 'b'; # type of last nonblank token
29804 my $i_terminal = 0; # index of last nonblank token
29805 my $terminal_block_type = EMPTY_STRING;
29807 # update most recent statement label
29808 $csc_last_label = EMPTY_STRING unless ($csc_last_label);
29809 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
29810 my $block_label = $csc_last_label;
29812 # Loop over all tokens of this batch
29813 for my $i ( 0 .. $max_index_to_go ) {
29814 my $type = $types_to_go[$i];
29815 my $block_type = $block_type_to_go[$i];
29816 my $token = $tokens_to_go[$i];
29817 $block_type = EMPTY_STRING unless ($block_type);
29819 # remember last nonblank token type
29820 if ( $type ne '#' && $type ne 'b' ) {
29821 $terminal_type = $type;
29822 $terminal_block_type = $block_type;
29826 my $type_sequence = $type_sequence_to_go[$i];
29827 if ( $block_type && $type_sequence ) {
29829 if ( $token eq '}' ) {
29831 # restore any leading text saved when we entered this block
29832 if ( defined( $block_leading_text{$type_sequence} ) ) {
29833 ( $block_leading_text, $rblock_leading_if_elsif_text )
29834 = @{ $block_leading_text{$type_sequence} };
29835 $i_block_leading_text = $i;
29836 delete $block_leading_text{$type_sequence};
29837 $rleading_block_if_elsif_text =
29838 $rblock_leading_if_elsif_text;
29841 if ( defined( $csc_block_label{$type_sequence} ) ) {
29842 $block_label = $csc_block_label{$type_sequence};
29843 delete $csc_block_label{$type_sequence};
29846 # if we run into a '}' then we probably started accumulating
29847 # at something like a trailing 'if' clause..no harm done.
29848 if ( $accumulating_text_for_block
29849 && $levels_to_go[$i] <= $leading_block_text_level )
29851 my $lev = $levels_to_go[$i];
29852 reset_block_text_accumulator();
29855 if ( defined( $block_opening_line_number{$type_sequence} ) )
29857 my $output_line_number =
29858 $self->get_output_line_number();
29859 $block_line_count =
29860 $output_line_number -
29861 $block_opening_line_number{$type_sequence} + 1;
29862 delete $block_opening_line_number{$type_sequence};
29866 # Error: block opening line undefined for this line..
29867 # This shouldn't be possible, but it is not a
29868 # significant problem.
29872 elsif ( $token eq '{' ) {
29874 my $line_number = $self->get_output_line_number();
29875 $block_opening_line_number{$type_sequence} = $line_number;
29877 # set a label for this block, except for
29878 # a bare block which already has the label
29879 # A label can only be used on the next {
29880 if ( $block_type =~ /:$/ ) {
29881 $csc_last_label = EMPTY_STRING;
29883 $csc_block_label{$type_sequence} = $csc_last_label;
29884 $csc_last_label = EMPTY_STRING;
29886 if ( $accumulating_text_for_block
29887 && $levels_to_go[$i] == $leading_block_text_level )
29890 if ( $accumulating_text_for_block eq $block_type ) {
29892 # save any leading text before we enter this block
29893 $block_leading_text{$type_sequence} = [
29894 $leading_block_text,
29895 $rleading_block_if_elsif_text
29897 $block_opening_line_number{$type_sequence} =
29898 $leading_block_text_line_number;
29899 reset_block_text_accumulator();
29903 # shouldn't happen, but not a serious error.
29904 # We were accumulating -csc text for block type
29905 # $accumulating_text_for_block and unexpectedly
29906 # encountered a '{' for block type $block_type.
29913 && $csc_new_statement_ok
29914 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
29915 && $token =~ /$closing_side_comment_list_pattern/ )
29917 $self->set_block_text_accumulator($i);
29921 # note: ignoring type 'q' because of tricks being played
29922 # with 'q' for hanging side comments
29923 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
29924 $csc_new_statement_ok =
29925 ( $block_type || $type eq 'J' || $type eq ';' );
29928 && $accumulating_text_for_block
29929 && $levels_to_go[$i] == $leading_block_text_level )
29931 reset_block_text_accumulator();
29934 $self->accumulate_block_text($i);
29939 # Treat an 'else' block specially by adding preceding 'if' and
29940 # 'elsif' text. Otherwise, the 'end else' is not helpful,
29941 # especially for cuddled-else formatting.
29942 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
29943 $block_leading_text =
29944 $self->make_else_csc_text( $i_terminal, $terminal_block_type,
29945 $block_leading_text, $rblock_leading_if_elsif_text );
29948 # if this line ends in a label then remember it for the next pass
29949 $csc_last_label = EMPTY_STRING;
29950 if ( $terminal_type eq 'J' ) {
29951 $csc_last_label = $tokens_to_go[$i_terminal];
29954 return ( $terminal_type, $i_terminal, $i_block_leading_text,
29955 $block_leading_text, $block_line_count, $block_label );
29956 } ## end sub accumulate_csc_text
29958 sub make_else_csc_text {
29960 # create additional -csc text for an 'else' and optionally 'elsif',
29961 # depending on the value of switch
29963 # = 0 add 'if' text to trailing else
29964 # = 1 same as 0 plus:
29965 # add 'if' to 'elsif's if can fit in line length
29966 # add last 'elsif' to trailing else if can fit in one line
29967 # = 2 same as 1 but do not check if exceed line length
29969 # $rif_elsif_text = a reference to a list of all previous closing
29970 # side comments created for this if block
29972 my ( $self, $i_terminal, $block_type, $block_leading_text,
29975 my $csc_text = $block_leading_text;
29977 if ( $block_type eq 'elsif'
29978 && $rOpts_closing_side_comment_else_flag == 0 )
29983 my $count = @{$rif_elsif_text};
29984 return $csc_text unless ($count);
29986 my $if_text = '[ if' . $rif_elsif_text->[0];
29988 # always show the leading 'if' text on 'else'
29989 if ( $block_type eq 'else' ) {
29990 $csc_text .= $if_text;
29993 # see if that's all
29994 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
29998 my $last_elsif_text = EMPTY_STRING;
29999 if ( $count > 1 ) {
30000 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
30001 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
30004 # tentatively append one more item
30005 my $saved_text = $csc_text;
30006 if ( $block_type eq 'else' ) {
30007 $csc_text .= $last_elsif_text;
30010 $csc_text .= SPACE . $if_text;
30013 # all done if no length checks requested
30014 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
30018 # undo it if line length exceeded
30020 length($csc_text) +
30021 length($block_type) +
30022 length( $rOpts->{'closing-side-comment-prefix'} ) +
30023 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
30025 $length > $maximum_line_length_at_level[$leading_block_text_level] )
30027 $csc_text = $saved_text;
30030 } ## end sub make_else_csc_text
30031 } ## end closure accumulate_csc_text
30033 { ## begin closure balance_csc_text
30035 # Some additional routines for handling the --closing-side-comments option
30050 sub balance_csc_text {
30052 # Append characters to balance a closing side comment so that editors
30053 # such as vim can correctly jump through code.
30055 # input = ## end foreach my $foo ( sort { $b ...
30056 # output = ## end foreach my $foo ( sort { $b ...})
30058 # NOTE: This routine does not currently filter out structures within
30059 # quoted text because the bounce algorithms in text editors do not
30060 # necessarily do this either (a version of vim was checked and
30061 # did not do this).
30063 # Some complex examples which will cause trouble for some editors:
30064 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
30065 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
30066 # if ( $1 eq '{' ) {
30067 # test file test1/braces.pl has many such examples.
30071 # loop to examine characters one-by-one, RIGHT to LEFT and
30072 # build a balancing ending, LEFT to RIGHT.
30073 foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) {
30075 my $char = substr( $csc, $pos, 1 );
30077 # ignore everything except structural characters
30078 next unless ( $matching_char{$char} );
30080 # pop most recently appended character
30081 my $top = chop($csc);
30083 # push it back plus the mate to the newest character
30084 # unless they balance each other.
30085 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
30088 # return the balanced string
30090 } ## end sub balance_csc_text
30091 } ## end closure balance_csc_text
30093 sub add_closing_side_comment {
30095 my ( $self, $ri_first, $ri_last ) = @_;
30096 my $rLL = $self->[_rLL_];
30098 # add closing side comments after closing block braces if -csc used
30099 my ( $closing_side_comment, $cscw_block_comment );
30101 #---------------------------------------------------------------
30102 # Step 1: loop through all tokens of this line to accumulate
30103 # the text needed to create the closing side comments. Also see
30104 # how the line ends.
30105 #---------------------------------------------------------------
30107 my ( $terminal_type, $i_terminal, $i_block_leading_text,
30108 $block_leading_text, $block_line_count, $block_label )
30109 = $self->accumulate_csc_text();
30111 #---------------------------------------------------------------
30112 # Step 2: make the closing side comment if this ends a block
30113 #---------------------------------------------------------------
30114 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
30116 # if this line might end in a block closure..
30118 $terminal_type eq '}'
30120 # Fix 1 for c091, this is only for blocks
30121 && $block_type_to_go[$i_terminal]
30126 # the block is long enough
30127 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
30129 # or there is an existing comment to check
30130 || ( $have_side_comment
30131 && $rOpts->{'closing-side-comment-warnings'} )
30134 # .. and if this is one of the types of interest
30135 && $block_type_to_go[$i_terminal] =~
30136 /$closing_side_comment_list_pattern/
30138 # .. but not an anonymous sub
30139 # These are not normally of interest, and their closing braces are
30140 # often followed by commas or semicolons anyway. This also avoids
30141 # possible erratic output due to line numbering inconsistencies
30142 # in the cases where their closing braces terminate a line.
30143 && $block_type_to_go[$i_terminal] ne 'sub'
30145 # ..and the corresponding opening brace must is not in this batch
30146 # (because we do not need to tag one-line blocks, although this
30147 # should also be caught with a positive -csci value)
30148 && !defined( $mate_index_to_go[$i_terminal] )
30153 # this is the last token (line doesn't have a side comment)
30154 !$have_side_comment
30156 # or the old side comment is a closing side comment
30157 || $tokens_to_go[$max_index_to_go] =~
30158 /$closing_side_comment_prefix_pattern/
30163 # then make the closing side comment text
30164 if ($block_label) { $block_label .= SPACE }
30166 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
30168 # append any extra descriptive text collected above
30169 if ( $i_block_leading_text == $i_terminal ) {
30170 $token .= $block_leading_text;
30173 $token = balance_csc_text($token)
30174 if $rOpts->{'closing-side-comments-balanced'};
30176 $token =~ s/\s*$//; # trim any trailing whitespace
30178 # handle case of existing closing side comment
30179 if ($have_side_comment) {
30181 # warn if requested and tokens differ significantly
30182 if ( $rOpts->{'closing-side-comment-warnings'} ) {
30183 my $old_csc = $tokens_to_go[$max_index_to_go];
30184 my $new_csc = $token;
30185 $new_csc =~ s/\s+//g; # trim all whitespace
30186 $old_csc =~ s/\s+//g; # trim all whitespace
30187 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
30188 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
30189 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
30190 my $new_trailing_dots = $1;
30191 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
30193 # Patch to handle multiple closing side comments at
30194 # else and elsif's. These have become too complicated
30195 # to check, so if we see an indication of
30196 # '[ if' or '[ # elsif', then assume they were made
30198 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
30199 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
30201 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
30202 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
30205 # if old comment is contained in new comment,
30206 # only compare the common part.
30207 if ( length($new_csc) > length($old_csc) ) {
30208 $new_csc = substr( $new_csc, 0, length($old_csc) );
30211 # if the new comment is shorter and has been limited,
30212 # only compare the common part.
30213 if ( length($new_csc) < length($old_csc)
30214 && $new_trailing_dots )
30216 $old_csc = substr( $old_csc, 0, length($new_csc) );
30219 # any remaining difference?
30220 if ( $new_csc ne $old_csc ) {
30222 # just leave the old comment if we are below the threshold
30223 # for creating side comments
30224 if ( $block_line_count <
30225 $rOpts->{'closing-side-comment-interval'} )
30230 # otherwise we'll make a note of it
30234 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
30237 # save the old side comment in a new trailing block
30239 my $timestamp = EMPTY_STRING;
30240 if ( $rOpts->{'timestamp'} ) {
30241 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
30244 $timestamp = "$year-$month-$day";
30246 $cscw_block_comment =
30247 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
30251 # No differences.. we can safely delete old comment if we
30252 # are below the threshold
30253 elsif ( $block_line_count <
30254 $rOpts->{'closing-side-comment-interval'} )
30256 # Since the line breaks have already been set, we have
30257 # to remove the token from the _to_go array and also
30258 # from the line range (this fixes issue c081).
30259 # Note that we can only get here if -cscw has been set
30260 # because otherwise the old comment is already deleted.
30262 my $ibeg = $ri_first->[-1];
30263 my $iend = $ri_last->[-1];
30265 && $iend == $max_index_to_go
30266 && $types_to_go[$max_index_to_go] eq '#' )
30269 $max_index_to_go--;
30271 && $types_to_go[$max_index_to_go] eq 'b' )
30274 $max_index_to_go--;
30276 $ri_last->[-1] = $iend;
30281 # switch to the new csc (unless we deleted it!)
30284 my $len_tok = length($token); # NOTE: length no longer important
30286 $len_tok - $token_lengths_to_go[$max_index_to_go];
30288 $tokens_to_go[$max_index_to_go] = $token;
30289 $token_lengths_to_go[$max_index_to_go] = $len_tok;
30290 my $K = $K_to_go[$max_index_to_go];
30291 $rLL->[$K]->[_TOKEN_] = $token;
30292 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
30293 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
30297 # handle case of NO existing closing side comment
30300 # To avoid inserting a new token in the token arrays, we
30301 # will just return the new side comment so that it can be
30302 # inserted just before it is needed in the call to the
30303 # vertical aligner.
30304 $closing_side_comment = $token;
30307 return ( $closing_side_comment, $cscw_block_comment );
30308 } ## end sub add_closing_side_comment
30310 ############################
30311 # CODE SECTION 15: Summarize
30312 ############################
30316 # This is the last routine called when a file is formatted.
30317 # Flush buffer and write any informative messages
30318 my ( $self, $severe_error ) = @_;
30321 my $file_writer_object = $self->[_file_writer_object_];
30322 $file_writer_object->decrement_output_line_number()
30323 ; # fix up line number since it was incremented
30324 we_are_at_the_last_line();
30326 my $max_depth = $self->[_maximum_BLOCK_level_];
30327 my $at_line = $self->[_maximum_BLOCK_level_at_line_];
30328 write_logfile_entry(
30329 "Maximum leading structural depth is $max_depth in input at line $at_line\n"
30332 my $added_semicolon_count = $self->[_added_semicolon_count_];
30333 my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
30334 my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
30336 if ( $added_semicolon_count > 0 ) {
30337 my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
30339 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
30340 write_logfile_entry("$added_semicolon_count $what added:\n");
30341 write_logfile_entry(
30342 " $first at input line $first_added_semicolon_at\n");
30344 if ( $added_semicolon_count > 1 ) {
30345 write_logfile_entry(
30346 " Last at input line $last_added_semicolon_at\n");
30348 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
30349 write_logfile_entry("\n");
30352 my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
30353 my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
30354 my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
30355 if ( $deleted_semicolon_count > 0 ) {
30356 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
30358 ( $deleted_semicolon_count > 1 )
30359 ? "semicolons were"
30361 write_logfile_entry(
30362 "$deleted_semicolon_count unnecessary $what deleted:\n");
30363 write_logfile_entry(
30364 " $first at input line $first_deleted_semicolon_at\n");
30366 if ( $deleted_semicolon_count > 1 ) {
30367 write_logfile_entry(
30368 " Last at input line $last_deleted_semicolon_at\n");
30370 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
30371 write_logfile_entry("\n");
30374 my $embedded_tab_count = $self->[_embedded_tab_count_];
30375 my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
30376 my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
30377 if ( $embedded_tab_count > 0 ) {
30378 my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING;
30380 ( $embedded_tab_count > 1 )
30381 ? "quotes or patterns"
30382 : "quote or pattern";
30383 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
30384 write_logfile_entry(
30385 "This means the display of this script could vary with device or software\n"
30387 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
30389 if ( $embedded_tab_count > 1 ) {
30390 write_logfile_entry(
30391 " Last at input line $last_embedded_tab_at\n");
30393 write_logfile_entry("\n");
30396 my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
30397 my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
30398 my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
30399 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
30401 if ($first_tabbing_disagreement) {
30402 write_logfile_entry(
30403 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
30407 my $first_btd = $self->[_first_brace_tabbing_disagreement_];
30410 "First closing brace indentation disagreement started at input line $first_btd\n";
30411 write_logfile_entry($msg);
30413 # leave a hint in the .ERR file if there was a brace error
30414 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
30417 my $in_btd = $self->[_in_brace_tabbing_disagreement_];
30420 "Ending with brace indentation disagreement which started at input line $in_btd\n";
30421 write_logfile_entry($msg);
30423 # leave a hint in the .ERR file if there was a brace error
30424 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
30427 if ($in_tabbing_disagreement) {
30429 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
30430 write_logfile_entry($msg);
30434 if ($last_tabbing_disagreement) {
30436 write_logfile_entry(
30437 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
30441 write_logfile_entry("No indentation disagreement seen\n");
30445 if ($first_tabbing_disagreement) {
30446 write_logfile_entry(
30447 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
30450 write_logfile_entry("\n");
30452 my $vao = $self->[_vertical_aligner_object_];
30453 $vao->report_anything_unusual();
30455 $file_writer_object->report_line_length_errors();
30457 # Define the formatter self-check for convergence.
30458 $self->[_converged_] =
30460 || $file_writer_object->get_convergence_check()
30461 || $rOpts->{'indent-only'};
30464 } ## end sub wrapup
30466 } ## end package Perl::Tidy::Formatter