1 #####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 #####################################################################
9 # CODE SECTION 1: Preliminary code, global definitions and sub new
11 # CODE SECTION 2: Some Basic Utilities
12 # CODE SECTION 3: Check and process options
14 # CODE SECTION 4: Receive lines from the tokenizer
16 # CODE SECTION 5: Pre-process the entire file
17 # sub finish_formatting
18 # CODE SECTION 6: Process line-by-line
19 # sub process_all_lines
20 # CODE SECTION 7: Process lines of code
21 # process_line_of_CODE
22 # CODE SECTION 8: Utilities for setting breakpoints
23 # sub set_forced_breakpoint
24 # CODE SECTION 9: Process batches of code
25 # sub grind_batch_of_CODE
26 # CODE SECTION 10: Code to break long statments
27 # sub break_long_lines
28 # CODE SECTION 11: Code to break long lists
30 # CODE SECTION 12: Code for setting indentation
31 # CODE SECTION 13: Preparing batch of lines for vertical alignment
32 # sub convey_batch_to_vertical_aligner
33 # CODE SECTION 14: Code for creating closing side comments
34 # sub add_closing_side_comment
35 # CODE SECTION 15: Summarize
38 #######################################################################
39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
40 #######################################################################
42 package Perl::Tidy::Formatter;
46 # This flag gets switched on during automated testing for extra checking
47 use constant DEVEL_MODE => 0;
49 { #<<< A non-indenting brace to contain all lexical variables
52 our $VERSION = '20220217';
54 # The Tokenizer will be loaded with the Formatter
55 ##use Perl::Tidy::Tokenizer; # for is_keyword()
59 # Catch any undefined sub calls so that we are sure to get
60 # some diagnostic information. This sub should never be called
61 # except for a programming error.
63 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
64 my ( $pkg, $fname, $lno ) = caller();
65 my $my_package = __PACKAGE__;
67 ======================================================================
68 Error detected in package '$my_package', version $VERSION
69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
70 Called from package: '$pkg'
71 Called from File '$fname' at line '$lno'
72 This error is probably due to a recent programming change
73 ======================================================================
80 $self->_decrement_count();
86 Perl::Tidy::Die($msg);
87 croak "unexpected return from Perl::Tidy::Die";
92 Perl::Tidy::Warn($msg);
99 # This routine is called for errors that really should not occur
100 # except if there has been a bug introduced by a recent program change.
101 # Please add comments at calls to Fault to explain why the call
102 # should not occur, and where to look to fix it.
103 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
104 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
105 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
106 my $input_stream_name = get_input_stream_name();
109 ==============================================================================
110 While operating on input stream with name: '$input_stream_name'
111 A fault was detected at line $line0 of sub '$subroutine1'
113 which was called from line $line1 of sub '$subroutine2'
115 This is probably an error introduced by a recent programming change.
116 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
117 ==============================================================================
120 # We shouldn't get here, but this return is to keep Perl-Critic from
127 Perl::Tidy::Exit($msg);
128 croak "unexpected return from Perl::Tidy::Exit";
131 # Global variables ...
134 #-----------------------------------------------------------------
135 # Section 1: Global variables which are either always constant or
136 # are constant after being configured by user-supplied
137 # parameters. They remain constant as a file is being processed.
138 #-----------------------------------------------------------------
140 # user parameters and shortcuts
143 $rOpts_add_whitespace,
144 $rOpts_blank_lines_after_opening_block,
145 $rOpts_block_brace_tightness,
146 $rOpts_block_brace_vertical_tightness,
147 $rOpts_break_after_labels,
148 $rOpts_break_at_old_attribute_breakpoints,
149 $rOpts_break_at_old_comma_breakpoints,
150 $rOpts_break_at_old_keyword_breakpoints,
151 $rOpts_break_at_old_logical_breakpoints,
152 $rOpts_break_at_old_semicolon_breakpoints,
153 $rOpts_break_at_old_ternary_breakpoints,
154 $rOpts_break_open_paren_list,
155 $rOpts_closing_side_comments,
156 $rOpts_closing_side_comment_else_flag,
157 $rOpts_closing_side_comment_maximum_text,
158 $rOpts_comma_arrow_breakpoints,
159 $rOpts_continuation_indentation,
160 $rOpts_delete_closing_side_comments,
161 $rOpts_delete_old_whitespace,
162 $rOpts_delete_side_comments,
163 $rOpts_extended_continuation_indentation,
164 $rOpts_format_skipping,
165 $rOpts_freeze_whitespace,
166 $rOpts_function_paren_vertical_alignment,
167 $rOpts_fuzzy_line_length,
168 $rOpts_ignore_old_breakpoints,
169 $rOpts_ignore_side_comment_lengths,
170 $rOpts_indent_closing_brace,
171 $rOpts_indent_columns,
173 $rOpts_keep_interior_semicolons,
174 $rOpts_line_up_parentheses,
175 $rOpts_logical_padding,
176 $rOpts_maximum_consecutive_blank_lines,
177 $rOpts_maximum_fields_per_table,
178 $rOpts_maximum_line_length,
179 $rOpts_one_line_block_semicolons,
180 $rOpts_opening_brace_always_on_right,
181 $rOpts_outdent_keywords,
182 $rOpts_outdent_labels,
183 $rOpts_outdent_long_comments,
184 $rOpts_outdent_long_quotes,
185 $rOpts_outdent_static_block_comments,
187 $rOpts_short_concatenation_item_length,
188 $rOpts_stack_closing_block_brace,
189 $rOpts_static_block_comments,
190 $rOpts_sub_alias_list,
191 $rOpts_tee_block_comments,
193 $rOpts_tee_side_comments,
194 $rOpts_variable_maximum_line_length,
197 $rOpts_valign_side_comments,
198 $rOpts_whitespace_cycle,
199 $rOpts_extended_line_up_parentheses,
201 # Static hashes initialized in a BEGIN block
203 %is_if_unless_and_or_last_next_redo_return,
204 %is_if_elsif_else_unless_while_until_for_foreach,
205 %is_if_unless_while_until_for_foreach,
206 %is_last_next_redo_return,
210 %is_block_without_semicolon,
211 %ok_to_add_semicolon_for_block_type,
216 %is_equal_or_fat_comma,
218 %is_opening_sequence_token,
219 %is_closing_sequence_token,
220 %is_container_label_type,
224 # Initialized in check_options. These are constants and could
225 # just as well be initialized in a BEGIN block.
227 %is_if_brace_follower,
228 %is_else_brace_follower,
229 %is_anon_sub_brace_follower,
230 %is_anon_sub_1_brace_follower,
231 %is_other_brace_follower,
233 # Initialized and re-initialized in sub initialize_grep_and_friends;
234 # These can be modified by grep-alias-list
236 %is_sort_map_grep_eval,
237 %is_sort_map_grep_eval_do,
239 %is_keyword_returning_list,
242 # Initialized in sub initialize_whitespace_hashes;
243 # Some can be modified according to user parameters.
248 # Configured in sub initialize_bond_strength_hashes
249 %right_bond_strength,
252 # Hashes for -kbb=s and -kba=s
253 %keep_break_before_type,
254 %keep_break_after_type,
256 # Initialized in check_options, modified by prepare_cuddled_block_types:
257 %want_one_line_block,
259 # Initialized in sub prepare_cuddled_block_types
260 $rcuddled_block_types,
262 # Initialized and configured in check_optioms
264 %keyword_paren_inner_tightness,
268 %break_before_container_types,
269 %container_indentation_options,
271 %space_after_keyword,
276 %opening_vertical_tightness,
277 %closing_vertical_tightness,
278 %closing_token_indentation,
279 $some_closing_token_indentation,
281 %opening_token_right,
282 %stack_opening_token,
283 %stack_closing_token,
285 %weld_nested_exclusion_rules,
286 %line_up_parentheses_control_hash,
287 $line_up_parentheses_control_is_lxpl,
289 # regex patterns for text identification.
290 # Most are initialized in a sub make_**_pattern during configuration.
291 # Most can be configured by user parameters.
294 $static_block_comment_pattern,
295 $static_side_comment_pattern,
296 $format_skipping_pattern_begin,
297 $format_skipping_pattern_end,
298 $non_indenting_brace_pattern,
299 $bl_exclusion_pattern,
301 $bli_exclusion_pattern,
303 $block_brace_vertical_tightness_pattern,
304 $blank_lines_after_opening_block_pattern,
305 $blank_lines_before_closing_block_pattern,
306 $keyword_group_list_pattern,
307 $keyword_group_list_comment_pattern,
308 $closing_side_comment_prefix_pattern,
309 $closing_side_comment_list_pattern,
311 # Table to efficiently find indentation and max line length
313 @maximum_line_length_at_level,
314 @maximum_text_length_at_level,
318 # Total number of sequence items in a weld, for quick checks
321 #--------------------------------------------------------
322 # Section 2: Work arrays for the current batch of tokens.
323 #--------------------------------------------------------
325 # These are re-initialized for each batch of code
326 # in sub initialize_batch_variables.
329 @type_sequence_to_go,
330 @bond_strength_to_go,
331 @forced_breakpoint_to_go,
332 @token_lengths_to_go,
333 @summed_lengths_to_go,
335 @leading_spaces_to_go,
336 @reduced_spaces_to_go,
337 @standard_spaces_to_go,
340 @nesting_depth_to_go,
342 @old_breakpoint_to_go,
354 # Index names for token variables.
355 # Do not combine with other BEGIN blocks (c101).
359 _CUMULATIVE_LENGTH_ => $i++,
360 _LINE_INDEX_ => $i++,
361 _KNEXT_SEQ_ITEM_ => $i++,
364 _TOKEN_LENGTH_ => $i++,
366 _TYPE_SEQUENCE_ => $i++,
368 # Number of token variables; must be last in list:
375 # Index names for $self variables.
376 # Do not combine with other BEGIN blocks (c101).
380 _rlines_new_ => $i++,
383 _rdepth_of_opening_seqno_ => $i++,
385 _Iss_opening_ => $i++,
386 _Iss_closing_ => $i++,
387 _rblock_type_of_seqno_ => $i++,
388 _ris_asub_block_ => $i++,
389 _ris_sub_block_ => $i++,
390 _K_opening_container_ => $i++,
391 _K_closing_container_ => $i++,
392 _K_opening_ternary_ => $i++,
393 _K_closing_ternary_ => $i++,
394 _K_first_seq_item_ => $i++,
395 _rK_phantom_semicolons_ => $i++,
396 _rtype_count_by_seqno_ => $i++,
397 _ris_function_call_paren_ => $i++,
398 _rlec_count_by_seqno_ => $i++,
399 _ris_broken_container_ => $i++,
400 _ris_permanently_broken_ => $i++,
402 _rhas_broken_list_ => $i++,
403 _rhas_broken_list_with_lec_ => $i++,
404 _rhas_code_block_ => $i++,
405 _rhas_broken_code_block_ => $i++,
406 _rhas_ternary_ => $i++,
407 _ris_excluded_lp_container_ => $i++,
408 _rlp_object_by_seqno_ => $i++,
409 _rwant_reduced_ci_ => $i++,
410 _rno_xci_by_seqno_ => $i++,
411 _rbrace_left_ => $i++,
412 _ris_bli_container_ => $i++,
413 _rparent_of_seqno_ => $i++,
414 _rchildren_of_seqno_ => $i++,
415 _ris_list_by_seqno_ => $i++,
416 _rbreak_container_ => $i++,
417 _rshort_nested_ => $i++,
418 _length_function_ => $i++,
419 _is_encoded_data_ => $i++,
421 _sink_object_ => $i++,
422 _file_writer_object_ => $i++,
423 _vertical_aligner_object_ => $i++,
424 _logger_object_ => $i++,
425 _radjusted_levels_ => $i++,
426 _this_batch_ => $i++,
428 _last_output_short_opening_token_ => $i++,
430 _last_line_leading_type_ => $i++,
431 _last_line_leading_level_ => $i++,
432 _last_last_line_leading_level_ => $i++,
434 _added_semicolon_count_ => $i++,
435 _first_added_semicolon_at_ => $i++,
436 _last_added_semicolon_at_ => $i++,
438 _deleted_semicolon_count_ => $i++,
439 _first_deleted_semicolon_at_ => $i++,
440 _last_deleted_semicolon_at_ => $i++,
442 _embedded_tab_count_ => $i++,
443 _first_embedded_tab_at_ => $i++,
444 _last_embedded_tab_at_ => $i++,
446 _first_tabbing_disagreement_ => $i++,
447 _last_tabbing_disagreement_ => $i++,
448 _tabbing_disagreement_count_ => $i++,
449 _in_tabbing_disagreement_ => $i++,
450 _first_brace_tabbing_disagreement_ => $i++,
451 _in_brace_tabbing_disagreement_ => $i++,
453 _saw_VERSION_in_this_file_ => $i++,
454 _saw_END_or_DATA_ => $i++,
456 _rK_weld_left_ => $i++,
457 _rK_weld_right_ => $i++,
458 _rweld_len_right_at_K_ => $i++,
460 _rspecial_side_comment_type_ => $i++,
462 _rseqno_controlling_my_ci_ => $i++,
463 _ris_seqno_controlling_ci_ => $i++,
464 _save_logfile_ => $i++,
465 _maximum_level_ => $i++,
466 _maximum_level_at_line_ => $i++,
467 _maximum_BLOCK_level_ => $i++,
468 _maximum_BLOCK_level_at_line_ => $i++,
470 _rKrange_code_without_comments_ => $i++,
471 _rbreak_before_Kfirst_ => $i++,
472 _rbreak_after_Klast_ => $i++,
473 _rwant_container_open_ => $i++,
476 _rstarting_multiline_qw_seqno_by_K_ => $i++,
477 _rending_multiline_qw_seqno_by_K_ => $i++,
478 _rKrange_multiline_qw_by_seqno_ => $i++,
479 _rmultiline_qw_has_extra_level_ => $i++,
481 _rcollapsed_length_by_seqno_ => $i++,
482 _rbreak_before_container_by_seqno_ => $i++,
483 _ris_essential_old_breakpoint_ => $i++,
484 _roverride_cab3_ => $i++,
485 _ris_assigned_structure_ => $i++,
487 _LAST_SELF_INDEX_ => $i - 1,
493 # Index names for batch variables.
494 # Do not combine with other BEGIN blocks (c101).
495 # These are stored in _this_batch_, which is a sub-array of $self.
498 _starting_in_quote_ => $i++,
499 _ending_in_quote_ => $i++,
500 _is_static_block_comment_ => $i++,
503 _do_not_pad_ => $i++,
504 _peak_batch_size_ => $i++,
505 _max_index_to_go_ => $i++,
506 _batch_count_ => $i++,
507 _rix_seqno_controlling_ci_ => $i++,
508 _batch_CODE_type_ => $i++,
509 _ri_starting_one_line_block_ => $i++,
515 # Sequence number assigned to the root of sequence tree.
516 # The minimum of the actual sequences numbers is 4, so we can use 1
517 use constant SEQ_ROOT => 1;
519 # Codes for insertion and deletion of blanks
520 use constant DELETE => 0;
521 use constant STABLE => 1;
522 use constant INSERT => 2;
525 use constant WS_YES => 1;
526 use constant WS_OPTIONAL => 0;
527 use constant WS_NO => -1;
529 # Token bond strengths.
530 use constant NO_BREAK => 10000;
531 use constant VERY_STRONG => 100;
532 use constant STRONG => 2.1;
533 use constant NOMINAL => 1.1;
534 use constant WEAK => 0.8;
535 use constant VERY_WEAK => 0.55;
537 # values for testing indexes in output array
538 use constant UNDEFINED_INDEX => -1;
540 # Maximum number of little messages; probably need not be changed.
541 use constant MAX_NAG_MESSAGES => 6;
543 # This is the decimal range of printable characters in ASCII. It is used to
544 # make quick preliminary checks before resorting to using a regex.
545 use constant ORD_PRINTABLE_MIN => 33;
546 use constant ORD_PRINTABLE_MAX => 126;
548 # Initialize constant hashes ...
552 = **= += *= &= <<= &&=
557 @is_assignment{@q} = (1) x scalar(@q);
559 @q = qw(is if unless and or err last next redo return);
560 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
562 # These block types may have text between the keyword and opening
563 # curly. Note: 'else' does not, but must be included to allow trailing
564 # if/elsif text to be appended.
565 # patch for SWITCH/CASE: added 'case' and 'when'
566 @q = qw(if elsif else unless while until for foreach case when catch);
567 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
570 @q = qw(if unless while until for foreach);
571 @is_if_unless_while_until_for_foreach{@q} =
574 @q = qw(last next redo return);
575 @is_last_next_redo_return{@q} = (1) x scalar(@q);
577 # Map related block names into a common name to allow vertical alignment
578 # used by sub make_alignment_patterns. Note: this is normally unchanged,
579 # but it contains 'grep' and can be re-initized in
580 # sub initialize_grep_and_friends in a testing mode.
593 @is_if_unless{@q} = (1) x scalar(@q);
596 @is_and_or{@q} = (1) x scalar(@q);
598 # Identify certain operators which often occur in chains.
599 # Note: the minus (-) causes a side effect of padding of the first line in
600 # something like this (by sub set_logical_padding):
601 # Checkbutton => 'Transmission checked',
602 # -variable => \$TRANS
603 # This usually improves appearance so it seems ok.
604 @q = qw(&& || and or : ? . + - * /);
605 @is_chain_operator{@q} = (1) x scalar(@q);
607 # Operators that the user can request break before or after.
608 # Note that some are keywords
609 @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
610 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
611 . : ? && || and or err xor
614 # We can remove semicolons after blocks preceded by these keywords
616 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
617 unless while until for foreach given when default);
618 @is_block_without_semicolon{@q} = (1) x scalar(@q);
620 # We will allow semicolons to be added within these block types
621 # as well as sub and package blocks.
623 # 1. Note that these keywords are omitted:
624 # switch case given when default sort map grep
625 # 2. It is also ok to add for sub and package blocks and a labeled block
626 # 3. But not okay for other perltidy types including:
628 # 4. Test files: blktype.t, blktype1.t, semicolon.t
630 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
631 unless do while until eval for foreach );
632 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
634 # 'L' is token for opening { at hash key
636 @is_opening_type{@q} = (1) x scalar(@q);
638 # 'R' is token for closing } at hash key
640 @is_closing_type{@q} = (1) x scalar(@q);
643 @is_opening_token{@q} = (1) x scalar(@q);
646 @is_closing_token{@q} = (1) x scalar(@q);
649 @is_opening_sequence_token{@q} = (1) x scalar(@q);
652 @is_closing_sequence_token{@q} = (1) x scalar(@q);
654 # a hash needed by sub break_lists for labeling containers
655 @q = qw( k => && || ? : . );
656 @is_container_label_type{@q} = (1) x scalar(@q);
658 # Braces -bbht etc must follow these. Note: experimentation with
659 # including a simple comma shows that it adds little and can lead
660 # to poor formatting in complex lists.
662 @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
666 @is_counted_type{@q} = (1) x scalar(@q);
670 { ## begin closure to count instances
672 # methods to count instances
674 sub get_count { return $_count; }
675 sub _increment_count { return ++$_count }
676 sub _decrement_count { return --$_count }
677 } ## end closure to count instances
681 my ( $class, @args ) = @_;
683 # we are given an object with a write_line() method to take lines
685 sink_object => undef,
686 diagnostics_object => undef,
687 logger_object => undef,
688 length_function => sub { return length( $_[0] ) },
689 is_encoded_data => "",
692 my %args = ( %defaults, @args );
694 my $length_function = $args{length_function};
695 my $is_encoded_data = $args{is_encoded_data};
696 my $fh_tee = $args{fh_tee};
697 my $logger_object = $args{logger_object};
698 my $diagnostics_object = $args{diagnostics_object};
700 # we create another object with a get_line() and peek_ahead() method
701 my $sink_object = $args{sink_object};
702 my $file_writer_object =
703 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
705 # initialize closure variables...
706 set_logger_object($logger_object);
707 set_diagnostics_object($diagnostics_object);
708 initialize_lp_vars();
709 initialize_csc_vars();
710 initialize_break_lists();
711 initialize_undo_ci();
712 initialize_process_line_of_CODE();
713 initialize_grind_batch_of_CODE();
714 initialize_final_indentation_adjustment();
715 initialize_postponed_breakpoint();
716 initialize_batch_variables();
717 initialize_forced_breakpoint_vars();
718 initialize_write_line();
720 my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
722 file_writer_object => $file_writer_object,
723 logger_object => $logger_object,
724 diagnostics_object => $diagnostics_object,
725 length_function => $length_function
728 write_logfile_entry("\nStarting tokenization pass...\n");
730 if ( $rOpts->{'entab-leading-whitespace'} ) {
732 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
735 elsif ( $rOpts->{'tabs'} ) {
736 write_logfile_entry("Indentation will be with a tab character\n");
740 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
743 # Initialize the $self array reference.
744 # To add an item, first add a constant index in the BEGIN block above.
747 # Basic data structures...
748 $self->[_rlines_] = []; # = ref to array of lines of the file
749 $self->[_rlines_new_] = []; # = ref to array of output lines
751 # 'rLL' = reference to the continuous liner array of all tokens in a file.
752 # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
753 # 'LL' stuck because it is easy to type. The 'rLL' array is updated
754 # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
755 # with '$K' by convention.
757 $self->[_Klimit_] = undef; # = maximum K index for rLL.
759 # Indexes into the rLL list
760 $self->[_K_opening_container_] = {};
761 $self->[_K_closing_container_] = {};
762 $self->[_K_opening_ternary_] = {};
763 $self->[_K_closing_ternary_] = {};
764 $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
766 # Array of phantom semicolons, in case we ever need to undo them
767 $self->[_rK_phantom_semicolons_] = undef;
769 # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
770 # numbers with + or - indicating opening or closing. This list represents
771 # the entire container tree and is invariant under reformatting. It can be
772 # used to quickly travel through the tree. Indexes in the rSS array begin
773 # with '$I' by convention. The 'Iss' arrays give the indexes in this list
774 # of opening and closing sequence numbers.
776 $self->[_Iss_opening_] = [];
777 $self->[_Iss_closing_] = [];
779 # Arrays to help traverse the tree
780 $self->[_rdepth_of_opening_seqno_] = [];
781 $self->[_rblock_type_of_seqno_] = {};
782 $self->[_ris_asub_block_] = {};
783 $self->[_ris_sub_block_] = {};
785 # Mostly list characteristics and processing flags
786 $self->[_rtype_count_by_seqno_] = {};
787 $self->[_ris_function_call_paren_] = {};
788 $self->[_rlec_count_by_seqno_] = {};
789 $self->[_ris_broken_container_] = {};
790 $self->[_ris_permanently_broken_] = {};
791 $self->[_rhas_list_] = {};
792 $self->[_rhas_broken_list_] = {};
793 $self->[_rhas_broken_list_with_lec_] = {};
794 $self->[_rhas_code_block_] = {};
795 $self->[_rhas_broken_code_block_] = {};
796 $self->[_rhas_ternary_] = {};
797 $self->[_ris_excluded_lp_container_] = {};
798 $self->[_rlp_object_by_seqno_] = {};
799 $self->[_rwant_reduced_ci_] = {};
800 $self->[_rno_xci_by_seqno_] = {};
801 $self->[_rbrace_left_] = {};
802 $self->[_ris_bli_container_] = {};
803 $self->[_rparent_of_seqno_] = {};
804 $self->[_rchildren_of_seqno_] = {};
805 $self->[_ris_list_by_seqno_] = {};
807 $self->[_rbreak_container_] = {}; # prevent one-line blocks
808 $self->[_rshort_nested_] = {}; # blocks not forced open
809 $self->[_length_function_] = $length_function;
810 $self->[_is_encoded_data_] = $is_encoded_data;
813 $self->[_fh_tee_] = $fh_tee;
814 $self->[_sink_object_] = $sink_object;
815 $self->[_file_writer_object_] = $file_writer_object;
816 $self->[_vertical_aligner_object_] = $vertical_aligner_object;
817 $self->[_logger_object_] = $logger_object;
819 # Reference to the batch being processed
820 $self->[_this_batch_] = [];
822 # Memory of processed text...
823 $self->[_last_last_line_leading_level_] = 0;
824 $self->[_last_line_leading_level_] = 0;
825 $self->[_last_line_leading_type_] = '#';
826 $self->[_last_output_short_opening_token_] = 0;
827 $self->[_added_semicolon_count_] = 0;
828 $self->[_first_added_semicolon_at_] = 0;
829 $self->[_last_added_semicolon_at_] = 0;
830 $self->[_deleted_semicolon_count_] = 0;
831 $self->[_first_deleted_semicolon_at_] = 0;
832 $self->[_last_deleted_semicolon_at_] = 0;
833 $self->[_embedded_tab_count_] = 0;
834 $self->[_first_embedded_tab_at_] = 0;
835 $self->[_last_embedded_tab_at_] = 0;
836 $self->[_first_tabbing_disagreement_] = 0;
837 $self->[_last_tabbing_disagreement_] = 0;
838 $self->[_tabbing_disagreement_count_] = 0;
839 $self->[_in_tabbing_disagreement_] = 0;
840 $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
841 $self->[_saw_END_or_DATA_] = 0;
842 $self->[_first_brace_tabbing_disagreement_] = undef;
843 $self->[_in_brace_tabbing_disagreement_] = undef;
845 # Hashes related to container welding...
846 $self->[_radjusted_levels_] = [];
848 # Weld data structures
849 $self->[_rK_weld_left_] = {};
850 $self->[_rK_weld_right_] = {};
851 $self->[_rweld_len_right_at_K_] = {};
854 $self->[_rseqno_controlling_my_ci_] = {};
855 $self->[_ris_seqno_controlling_ci_] = {};
857 $self->[_rspecial_side_comment_type_] = {};
858 $self->[_maximum_level_] = 0;
859 $self->[_maximum_level_at_line_] = 0;
860 $self->[_maximum_BLOCK_level_] = 0;
861 $self->[_maximum_BLOCK_level_at_line_] = 0;
863 $self->[_rKrange_code_without_comments_] = [];
864 $self->[_rbreak_before_Kfirst_] = {};
865 $self->[_rbreak_after_Klast_] = {};
866 $self->[_rwant_container_open_] = {};
867 $self->[_converged_] = 0;
870 $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
871 $self->[_rending_multiline_qw_seqno_by_K_] = {};
872 $self->[_rKrange_multiline_qw_by_seqno_] = {};
873 $self->[_rmultiline_qw_has_extra_level_] = {};
875 $self->[_rcollapsed_length_by_seqno_] = {};
876 $self->[_rbreak_before_container_by_seqno_] = {};
877 $self->[_ris_essential_old_breakpoint_] = {};
878 $self->[_roverride_cab3_] = {};
879 $self->[_ris_assigned_structure_] = {};
881 # This flag will be updated later by a call to get_save_logfile()
882 $self->[_save_logfile_] = defined($logger_object);
884 # Be sure all variables in $self have been initialized above. To find the
885 # correspondence of index numbers and array names, copy a list to a file
886 # and use the unix 'nl' command to number lines 1..
889 foreach ( 0 .. _LAST_SELF_INDEX_ ) {
890 if ( !exists( $self->[$_] ) ) {
891 push @non_existant, $_;
895 Fault("These indexes in self not initialized: (@non_existant)\n");
901 # Safety check..this is not a class yet
902 if ( _increment_count() > 1 ) {
904 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
909 ######################################
910 # CODE SECTION 2: Some Basic Utilities
911 ######################################
915 # Verify that the rLL array has not been auto-vivified
916 my ( $self, $msg ) = @_;
917 my $rLL = $self->[_rLL_];
918 my $Klimit = $self->[_Klimit_];
920 if ( ( defined($Klimit) && $Klimit != $num - 1 )
921 || ( !defined($Klimit) && $num > 0 ) )
924 # This fault can occur if the array has been accessed for an index
925 # greater than $Klimit, which is the last token index. Just accessing
926 # the array above index $Klimit, not setting a value, can cause @rLL to
927 # increase beyond $Klimit. If this occurs, the problem can be located
928 # by making calls to this routine at different locations in
929 # sub 'finish_formatting'.
930 $Klimit = 'undef' if ( !defined($Klimit) );
931 $msg = "" unless $msg;
932 Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
938 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
940 # Check the keys of a hash:
941 # $rtest = ref to hash to test
942 # $rvalid = ref to hash with valid keys
944 # $msg = a message to write in case of error
945 # $exact_match defines the type of check:
946 # = false: test hash must not have unknown key
947 # = true: test hash must have exactly same keys as known hash
949 grep { !exists $rvalid->{$_} } keys %{$rtest};
951 grep { !exists $rtest->{$_} } keys %{$rvalid};
952 my $error = @unknown_keys;
953 if ($exact_match) { $error ||= @missing_keys }
956 my @expected_keys = sort keys %{$rvalid};
957 @unknown_keys = sort @unknown_keys;
959 ------------------------------------------------------------------------
960 Program error detected checking hash keys
962 Expected keys: (@expected_keys)
963 Unknown key(s): (@unknown_keys)
964 Missing key(s): (@missing_keys)
965 ------------------------------------------------------------------------
971 sub check_token_array {
974 # Check for errors in the array of tokens. This is only called
975 # when the DEVEL_MODE flag is set, so this Fault will only occur
976 # during code development.
977 my $rLL = $self->[_rLL_];
978 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
979 my $nvars = @{ $rLL->[$KK] };
980 if ( $nvars != _NVARS ) {
982 my $type = $rLL->[$KK]->[_TYPE_];
983 $type = '*' unless defined($type);
985 # The number of variables per token node is _NVARS and was set when
986 # the array indexes were generated. So if the number of variables
987 # is different we have done something wrong, like not store all of
988 # them in sub 'write_line' when they were received from the
991 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
994 foreach my $var ( _TOKEN_, _TYPE_ ) {
995 if ( !defined( $rLL->[$KK]->[$var] ) ) {
996 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
998 # This is a simple check that each token has some basic
999 # variables. In other words, that there are no holes in the
1000 # array of tokens. Sub 'write_line' pushes tokens into the
1001 # $rLL array, so this should guarantee no gaps.
1002 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1009 { ## begin closure check_line_hashes
1011 # This code checks that no autovivification occurs in the 'line' hash
1013 my %valid_line_hash;
1017 # These keys are defined for each line in the formatter
1018 # Each line must have exactly these quantities
1019 my @valid_line_keys = qw(
1022 _guessed_indentation_level
1029 _square_bracket_depth
1031 _ended_in_blank_token
1040 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1043 sub check_line_hashes {
1045 my $rlines = $self->[_rlines_];
1046 foreach my $rline ( @{$rlines} ) {
1047 my $iline = $rline->{_line_number};
1048 my $line_type = $rline->{_line_type};
1049 check_keys( $rline, \%valid_line_hash,
1050 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1054 } ## end closure check_line_hashes
1056 { ## begin closure for logger routines
1059 # Called once per file to initialize the logger object
1060 sub set_logger_object {
1061 $logger_object = shift;
1065 sub get_logger_object {
1066 return $logger_object;
1069 sub get_input_stream_name {
1070 my $input_stream_name = "";
1071 if ($logger_object) {
1072 $input_stream_name = $logger_object->get_input_stream_name();
1074 return $input_stream_name;
1077 # interface to Perl::Tidy::Logger routines
1080 if ($logger_object) { $logger_object->warning($msg); }
1086 if ($logger_object) {
1087 $logger_object->complain($msg);
1092 sub write_logfile_entry {
1094 if ($logger_object) {
1095 $logger_object->write_logfile_entry(@msg);
1100 sub get_saw_brace_error {
1101 if ($logger_object) {
1102 return $logger_object->get_saw_brace_error();
1107 sub we_are_at_the_last_line {
1108 if ($logger_object) {
1109 $logger_object->we_are_at_the_last_line();
1114 } ## end closure for logger routines
1116 { ## begin closure for diagnostics routines
1117 my $diagnostics_object;
1119 # Called once per file to initialize the diagnostics object
1120 sub set_diagnostics_object {
1121 $diagnostics_object = shift;
1125 sub write_diagnostics {
1127 if ($diagnostics_object) {
1128 $diagnostics_object->write_diagnostics($msg);
1132 } ## end closure for diagnostics routines
1134 sub get_convergence_check {
1136 return $self->[_converged_];
1139 sub get_added_semicolon_count {
1141 return $self->[_added_semicolon_count_];
1144 sub get_output_line_number {
1146 my $vao = $self->[_vertical_aligner_object_];
1147 return $vao->get_output_line_number();
1150 sub want_blank_line {
1153 my $file_writer_object = $self->[_file_writer_object_];
1154 $file_writer_object->want_blank_line();
1158 sub write_unindented_line {
1159 my ( $self, $line ) = @_;
1161 my $file_writer_object = $self->[_file_writer_object_];
1162 $file_writer_object->write_line($line);
1166 sub consecutive_nonblank_lines {
1168 my $file_writer_object = $self->[_file_writer_object_];
1169 my $vao = $self->[_vertical_aligner_object_];
1170 return $file_writer_object->get_consecutive_nonblank_lines() +
1171 $vao->get_cached_line_count();
1176 my $max = shift @vals;
1177 for (@vals) { $max = $_ > $max ? $_ : $max }
1183 my $min = shift @vals;
1184 for (@vals) { $min = $_ < $min ? $_ : $min }
1190 # given a string containing words separated by whitespace,
1191 # return the list of words
1196 return split( /\s+/, $str );
1199 ###########################################
1200 # CODE SECTION 3: Check and process options
1201 ###########################################
1205 # This routine is called to check the user-supplied run parameters
1206 # and to configure the control hashes to them.
1209 initialize_whitespace_hashes();
1210 initialize_bond_strength_hashes();
1212 # This function must be called early to get hashes with grep initialized
1213 initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
1215 # Make needed regex patterns for matching text.
1216 # NOTE: sub_matching_patterns must be made first because later patterns use
1217 # them; see RT #133130.
1218 make_sub_matching_pattern();
1219 make_static_block_comment_pattern();
1220 make_static_side_comment_pattern();
1221 make_closing_side_comment_prefix();
1222 make_closing_side_comment_list_pattern();
1223 $format_skipping_pattern_begin =
1224 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1225 $format_skipping_pattern_end =
1226 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1227 make_non_indenting_brace_pattern();
1229 # If closing side comments ARE selected, then we can safely
1230 # delete old closing side comments unless closing side comment
1231 # warnings are requested. This is a good idea because it will
1232 # eliminate any old csc's which fall below the line count threshold.
1233 # We cannot do this if warnings are turned on, though, because we
1234 # might delete some text which has been added. So that must
1235 # be handled when comments are created. And we cannot do this
1236 # with -io because -csc will be skipped altogether.
1237 if ( $rOpts->{'closing-side-comments'} ) {
1238 if ( !$rOpts->{'closing-side-comment-warnings'}
1239 && !$rOpts->{'indent-only'} )
1241 $rOpts->{'delete-closing-side-comments'} = 1;
1245 # If closing side comments ARE NOT selected, but warnings ARE
1246 # selected and we ARE DELETING csc's, then we will pretend to be
1247 # adding with a huge interval. This will force the comments to be
1248 # generated for comparison with the old comments, but not added.
1249 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1250 if ( $rOpts->{'delete-closing-side-comments'} ) {
1251 $rOpts->{'delete-closing-side-comments'} = 0;
1252 $rOpts->{'closing-side-comments'} = 1;
1253 $rOpts->{'closing-side-comment-interval'} = 100000000;
1259 make_block_brace_vertical_tightness_pattern();
1260 make_blank_line_pattern();
1261 make_keyword_group_list_pattern();
1263 # Make initial list of desired one line block types
1264 # They will be modified by 'prepare_cuddled_block_types'
1265 # NOTE: this line must come after is_sort_map_grep_eval is
1266 # initialized in sub 'initialize_grep_and_friends'
1267 %want_one_line_block = %is_sort_map_grep_eval;
1269 prepare_cuddled_block_types();
1270 if ( $rOpts->{'dump-cuddled-block-list'} ) {
1271 dump_cuddled_block_list(*STDOUT);
1276 if ( $rOpts->{'extended-line-up-parentheses'} ) {
1277 $rOpts->{'line-up-parentheses'} ||= 1;
1280 if ( $rOpts->{'line-up-parentheses'} ) {
1282 if ( $rOpts->{'indent-only'}
1283 || !$rOpts->{'add-newlines'}
1284 || !$rOpts->{'delete-old-newlines'} )
1287 -----------------------------------------------------------------------
1288 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1290 The -lp indentation logic requires that perltidy be able to coordinate
1291 arbitrarily large numbers of line breakpoints. This isn't possible
1293 -----------------------------------------------------------------------
1295 $rOpts->{'line-up-parentheses'} = 0;
1296 $rOpts->{'extended-line-up-parentheses'} = 0;
1299 if ( $rOpts->{'whitespace-cycle'} ) {
1301 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1303 $rOpts->{'whitespace-cycle'} = 0;
1307 # At present, tabs are not compatible with the line-up-parentheses style
1308 # (it would be possible to entab the total leading whitespace
1309 # just prior to writing the line, if desired).
1310 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1312 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
1314 $rOpts->{'tabs'} = 0;
1317 # Likewise, tabs are not compatible with outdenting..
1318 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1320 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1322 $rOpts->{'tabs'} = 0;
1325 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1327 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
1329 $rOpts->{'tabs'} = 0;
1332 if ( !$rOpts->{'space-for-semicolon'} ) {
1333 $want_left_space{'f'} = -1;
1336 if ( $rOpts->{'space-terminal-semicolon'} ) {
1337 $want_left_space{';'} = 1;
1340 # We should put an upper bound on any -sil=n value. Otherwise enormous
1341 # files could be created by mistake.
1342 for ( $rOpts->{'starting-indentation-level'} ) {
1343 if ( $_ && $_ > 100 ) {
1345 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1351 # implement outdenting preferences for keywords
1352 %outdent_keyword = ();
1353 my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1355 @okw = qw(next last redo goto return); # defaults
1358 # FUTURE: if not a keyword, assume that it is an identifier
1360 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1361 $outdent_keyword{$_} = 1;
1364 Warn("ignoring '$_' in -okwl list; not a perl keyword");
1368 # setup hash for -kpit option
1369 %keyword_paren_inner_tightness = ();
1370 my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1371 if ( defined($kpit_value) && $kpit_value != 1 ) {
1373 split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1375 @kpit = qw(if elsif unless while until for foreach); # defaults
1378 # we will allow keywords and user-defined identifiers
1380 $keyword_paren_inner_tightness{$_} = $kpit_value;
1384 # implement user whitespace preferences
1385 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1386 @want_left_space{@q} = (1) x scalar(@q);
1389 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1390 @want_right_space{@q} = (1) x scalar(@q);
1393 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1394 @want_left_space{@q} = (-1) x scalar(@q);
1397 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1398 @want_right_space{@q} = (-1) x scalar(@q);
1400 if ( $rOpts->{'dump-want-left-space'} ) {
1401 dump_want_left_space(*STDOUT);
1405 if ( $rOpts->{'dump-want-right-space'} ) {
1406 dump_want_right_space(*STDOUT);
1410 # default keywords for which space is introduced before an opening paren
1411 # (at present, including them messes up vertical alignment)
1412 my @sak = qw(my local our and or xor err eq ne if else elsif until
1413 unless while for foreach return switch case given when catch);
1414 %space_after_keyword = map { $_ => 1 } @sak;
1416 # first remove any or all of these if desired
1417 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
1419 # -nsak='*' selects all the above keywords
1420 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
1421 @space_after_keyword{@q} = (0) x scalar(@q);
1424 # then allow user to add to these defaults
1425 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
1426 @space_after_keyword{@q} = (1) x scalar(@q);
1429 # implement user break preferences
1430 my $break_after = sub {
1432 foreach my $tok (@toks) {
1433 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
1434 my $lbs = $left_bond_strength{$tok};
1435 my $rbs = $right_bond_strength{$tok};
1436 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
1437 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1444 my $break_before = sub {
1446 foreach my $tok (@toks) {
1447 my $lbs = $left_bond_strength{$tok};
1448 my $rbs = $right_bond_strength{$tok};
1449 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
1450 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1457 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
1458 $break_before->(@all_operators)
1459 if ( $rOpts->{'break-before-all-operators'} );
1461 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
1462 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
1464 # make note if breaks are before certain key types
1465 %want_break_before = ();
1466 foreach my $tok ( @all_operators, ',' ) {
1467 $want_break_before{$tok} =
1468 $left_bond_strength{$tok} < $right_bond_strength{$tok};
1471 # Coordinate ?/: breaks, which must be similar
1472 if ( !$want_break_before{':'} ) {
1473 $want_break_before{'?'} = $want_break_before{':'};
1474 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
1475 $left_bond_strength{'?'} = NO_BREAK;
1478 # Only make a hash entry for the next parameters if values are defined.
1479 # That allows a quick check to be made later.
1480 %break_before_container_types = ();
1481 for ( $rOpts->{'break-before-hash-brace'} ) {
1482 $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
1484 for ( $rOpts->{'break-before-square-bracket'} ) {
1485 $break_before_container_types{'['} = $_ if $_ && $_ > 0;
1487 for ( $rOpts->{'break-before-paren'} ) {
1488 $break_before_container_types{'('} = $_ if $_ && $_ > 0;
1491 #--------------------------------------------------------------
1492 # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1493 #--------------------------------------------------------------
1494 # The -vmll and -lp parameters do not really work well together.
1495 # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1496 # NOTE: we could make this more precise by looking at any exclusion
1497 # flags for -lp, and allowing -bbx=2 for excluded types.
1498 if ( $rOpts->{'variable-maximum-line-length'}
1499 && $rOpts->{'ignore-old-breakpoints'}
1500 && $rOpts->{'line-up-parentheses'} )
1503 foreach my $key ( keys %break_before_container_types ) {
1504 if ( $break_before_container_types{$key} == 2 ) {
1505 $break_before_container_types{$key} = 1;
1506 push @changed, $key;
1511 # we could write a warning here
1515 #-------------------------------------------------------------------
1516 # The combination -xlp and -vmll can be unstable unless -iscl is set
1517 #-------------------------------------------------------------------
1518 # This is a temporary fix for issue b1302. See also b1306, b1310.
1519 # FIXME: look for a better fix.
1520 if ( $rOpts->{'variable-maximum-line-length'}
1521 && $rOpts->{'extended-line-up-parentheses'}
1522 && !$rOpts->{'ignore-side-comment-lengths'} )
1524 $rOpts->{'ignore-side-comment-lengths'} = 1;
1526 # we could write a warning here
1529 #-----------------------------------------------------------
1530 # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1531 #-----------------------------------------------------------
1532 # The -vmll and -lp parameters do not really work well together.
1533 # This is a very crude fix for an unusual parameter combination.
1534 if ( $rOpts->{'variable-maximum-line-length'}
1535 && $rOpts->{'line-up-parentheses'}
1536 && $rOpts->{'continuation-indentation'} < 2 )
1538 $rOpts->{'continuation-indentation'} = 2;
1539 ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1542 %container_indentation_options = ();
1544 [ 'break-before-hash-brace-and-indent', '{' ],
1545 [ 'break-before-square-bracket-and-indent', '[' ],
1546 [ 'break-before-paren-and-indent', '(' ],
1549 my ( $key, $tok ) = @{$pair};
1550 my $opt = $rOpts->{$key};
1551 if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1554 # (1) -lp is not compatable with opt=2, silently set to opt=0
1555 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1557 if ( $rOpts->{'line-up-parentheses'}
1558 || $rOpts->{'indent-columns'} ==
1559 $rOpts->{'continuation-indentation'} )
1564 $container_indentation_options{$tok} = $opt;
1568 # Define here tokens which may follow the closing brace of a do statement
1569 # on the same line, as in:
1570 # } while ( $something);
1571 my @dof = qw(until while unless if ; : );
1573 @is_do_follower{@dof} = (1) x scalar(@dof);
1575 # What tokens may follow the closing brace of an if or elsif block?
1576 # Not used. Previously used for cuddled else, but no longer needed.
1577 %is_if_brace_follower = ();
1579 # nothing can follow the closing curly of an else { } block:
1580 %is_else_brace_follower = ();
1582 # what can follow a multi-line anonymous sub definition closing curly:
1583 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
1585 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
1587 # what can follow a one-line anonymous sub closing curly:
1588 # one-line anonymous subs also have ']' here...
1589 # see tk3.t and PP.pm
1590 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
1592 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
1594 # What can follow a closing curly of a block
1595 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
1596 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
1597 my @obf = qw# ; : => or and && || ) #;
1599 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
1601 $right_bond_strength{'{'} = WEAK;
1602 $left_bond_strength{'{'} = VERY_STRONG;
1604 # make -l=0 equal to -l=infinite
1605 if ( !$rOpts->{'maximum-line-length'} ) {
1606 $rOpts->{'maximum-line-length'} = 1000000;
1609 # make -lbl=0 equal to -lbl=infinite
1610 if ( !$rOpts->{'long-block-line-count'} ) {
1611 $rOpts->{'long-block-line-count'} = 1000000;
1614 my $ole = $rOpts->{'output-line-ending'};
1623 # Patch for RT #99514, a memoization issue.
1624 # Normally, the user enters one of 'dos', 'win', etc, and we change the
1625 # value in the options parameter to be the corresponding line ending
1626 # character. But, if we are using memoization, on later passes through
1627 # here the option parameter will already have the desired ending
1628 # character rather than the keyword 'dos', 'win', etc. So
1629 # we must check to see if conversion has already been done and, if so,
1630 # bypass the conversion step.
1631 my %endings_inverted = (
1632 "\015\012" => 'dos',
1633 "\015\012" => 'win',
1638 if ( defined( $endings_inverted{$ole} ) ) {
1640 # we already have valid line ending, nothing more to do
1644 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
1645 my $str = join " ", keys %endings;
1647 Unrecognized line ending '$ole'; expecting one of: $str
1650 if ( $rOpts->{'preserve-line-endings'} ) {
1651 Warn("Ignoring -ple; conflicts with -ole\n");
1652 $rOpts->{'preserve-line-endings'} = undef;
1657 # hashes used to simplify setting whitespace
1659 '{' => $rOpts->{'brace-tightness'},
1660 '}' => $rOpts->{'brace-tightness'},
1661 '(' => $rOpts->{'paren-tightness'},
1662 ')' => $rOpts->{'paren-tightness'},
1663 '[' => $rOpts->{'square-bracket-tightness'},
1664 ']' => $rOpts->{'square-bracket-tightness'},
1673 if ( $rOpts->{'ignore-old-breakpoints'} ) {
1676 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1677 $rOpts->{'break-at-old-method-breakpoints'} = 0;
1678 push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1680 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1681 $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1682 push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1684 if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1685 $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1686 push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1688 if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1689 $rOpts->{'keep-old-breakpoints-before'} = "";
1690 push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1692 if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1693 $rOpts->{'keep-old-breakpoints-after'} = "";
1694 push @conflicts, '--keep-old-breakpoints-after (-kba)';
1698 my $msg = join( "\n ",
1699 " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1705 # Note: These additional parameters are made inactive by -iob.
1706 # They are silently turned off here because they are on by default.
1707 # We would generate unexpected warnings if we issued a warning.
1708 $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
1709 $rOpts->{'break-at-old-logical-breakpoints'} = 0;
1710 $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
1711 $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1714 %keep_break_before_type = ();
1715 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1716 'kbb', \%keep_break_before_type );
1718 %keep_break_after_type = ();
1719 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1720 'kba', \%keep_break_after_type );
1722 #------------------------------------------------------------
1723 # Make global vars for frequently used options for efficiency
1724 #------------------------------------------------------------
1726 $rOpts_add_newlines = $rOpts->{'add-newlines'};
1727 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
1728 $rOpts_blank_lines_after_opening_block =
1729 $rOpts->{'blank-lines-after-opening-block'};
1730 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
1731 $rOpts_block_brace_vertical_tightness =
1732 $rOpts->{'block-brace-vertical-tightness'};
1733 $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
1734 $rOpts_break_at_old_attribute_breakpoints =
1735 $rOpts->{'break-at-old-attribute-breakpoints'};
1736 $rOpts_break_at_old_comma_breakpoints =
1737 $rOpts->{'break-at-old-comma-breakpoints'};
1738 $rOpts_break_at_old_keyword_breakpoints =
1739 $rOpts->{'break-at-old-keyword-breakpoints'};
1740 $rOpts_break_at_old_logical_breakpoints =
1741 $rOpts->{'break-at-old-logical-breakpoints'};
1742 $rOpts_break_at_old_semicolon_breakpoints =
1743 $rOpts->{'break-at-old-semicolon-breakpoints'};
1744 $rOpts_break_at_old_ternary_breakpoints =
1745 $rOpts->{'break-at-old-ternary-breakpoints'};
1746 $rOpts_break_open_paren_list = $rOpts->{'break-open-paren-list'};
1747 $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
1748 $rOpts_closing_side_comment_else_flag =
1749 $rOpts->{'closing-side-comment-else-flag'};
1750 $rOpts_closing_side_comment_maximum_text =
1751 $rOpts->{'closing-side-comment-maximum-text'};
1752 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
1753 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
1754 $rOpts_delete_closing_side_comments =
1755 $rOpts->{'delete-closing-side-comments'};
1756 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
1757 $rOpts_extended_continuation_indentation =
1758 $rOpts->{'extended-continuation-indentation'};
1759 $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
1760 $rOpts_format_skipping = $rOpts->{'format-skipping'};
1761 $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
1762 $rOpts_function_paren_vertical_alignment =
1763 $rOpts->{'function-paren-vertical-alignment'};
1764 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
1765 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
1766 $rOpts_ignore_side_comment_lengths =
1767 $rOpts->{'ignore-side-comment-lengths'};
1768 $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
1769 $rOpts_indent_columns = $rOpts->{'indent-columns'};
1770 $rOpts_indent_only = $rOpts->{'indent-only'};
1771 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
1772 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
1773 $rOpts_extended_line_up_parentheses =
1774 $rOpts->{'extended-line-up-parentheses'};
1775 $rOpts_logical_padding = $rOpts->{'logical-padding'};
1776 $rOpts_maximum_consecutive_blank_lines =
1777 $rOpts->{'maximum-consecutive-blank-lines'};
1778 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
1779 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
1780 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
1781 $rOpts_opening_brace_always_on_right =
1782 $rOpts->{'opening-brace-always-on-right'};
1783 $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
1784 $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
1785 $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
1786 $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
1787 $rOpts_outdent_static_block_comments =
1788 $rOpts->{'outdent-static-block-comments'};
1789 $rOpts_recombine = $rOpts->{'recombine'};
1790 $rOpts_short_concatenation_item_length =
1791 $rOpts->{'short-concatenation-item-length'};
1792 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
1793 $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
1794 $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'};
1795 $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
1796 $rOpts_tee_pod = $rOpts->{'tee-pod'};
1797 $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
1798 $rOpts_valign = $rOpts->{'valign'};
1799 $rOpts_valign_code = $rOpts->{'valign-code'};
1800 $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
1801 $rOpts_variable_maximum_line_length =
1802 $rOpts->{'variable-maximum-line-length'};
1804 # Note that both opening and closing tokens can access the opening
1805 # and closing flags of their container types.
1806 %opening_vertical_tightness = (
1807 '(' => $rOpts->{'paren-vertical-tightness'},
1808 '{' => $rOpts->{'brace-vertical-tightness'},
1809 '[' => $rOpts->{'square-bracket-vertical-tightness'},
1810 ')' => $rOpts->{'paren-vertical-tightness'},
1811 '}' => $rOpts->{'brace-vertical-tightness'},
1812 ']' => $rOpts->{'square-bracket-vertical-tightness'},
1815 %closing_vertical_tightness = (
1816 '(' => $rOpts->{'paren-vertical-tightness-closing'},
1817 '{' => $rOpts->{'brace-vertical-tightness-closing'},
1818 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1819 ')' => $rOpts->{'paren-vertical-tightness-closing'},
1820 '}' => $rOpts->{'brace-vertical-tightness-closing'},
1821 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1824 # assume flag for '>' same as ')' for closing qw quotes
1825 %closing_token_indentation = (
1826 ')' => $rOpts->{'closing-paren-indentation'},
1827 '}' => $rOpts->{'closing-brace-indentation'},
1828 ']' => $rOpts->{'closing-square-bracket-indentation'},
1829 '>' => $rOpts->{'closing-paren-indentation'},
1832 # flag indicating if any closing tokens are indented
1833 $some_closing_token_indentation =
1834 $rOpts->{'closing-paren-indentation'}
1835 || $rOpts->{'closing-brace-indentation'}
1836 || $rOpts->{'closing-square-bracket-indentation'}
1837 || $rOpts->{'indent-closing-brace'};
1839 %opening_token_right = (
1840 '(' => $rOpts->{'opening-paren-right'},
1841 '{' => $rOpts->{'opening-hash-brace-right'},
1842 '[' => $rOpts->{'opening-square-bracket-right'},
1845 %stack_opening_token = (
1846 '(' => $rOpts->{'stack-opening-paren'},
1847 '{' => $rOpts->{'stack-opening-hash-brace'},
1848 '[' => $rOpts->{'stack-opening-square-bracket'},
1851 %stack_closing_token = (
1852 ')' => $rOpts->{'stack-closing-paren'},
1853 '}' => $rOpts->{'stack-closing-hash-brace'},
1854 ']' => $rOpts->{'stack-closing-square-bracket'},
1857 # Create a table of maximum line length vs level for later efficient use.
1858 # We will make the tables very long to be sure it will not be exceeded.
1859 # But we have to choose a fixed length. A check will be made at the start
1860 # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
1861 # my standard test problems have indentation levels of about 150, so this
1862 # should be fairly large. If the choice of a maximum level ever becomes
1863 # an issue then these table values could be returned in a sub with a simple
1864 # memoization scheme.
1866 # Also create a table of the maximum spaces available for text due to the
1867 # level only. If a line has continuation indentation, then that space must
1868 # be subtracted from the table value. This table is used for preliminary
1869 # estimates in welding, extended_ci, BBX, and marking short blocks.
1870 my $level_max = 1000;
1873 foreach my $level ( 0 .. $level_max ) {
1874 my $indent = $level * $rOpts_indent_columns;
1875 $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
1876 $maximum_text_length_at_level[$level] =
1877 $rOpts_maximum_line_length - $indent;
1880 # Correct the maximum_text_length table if the -wc=n flag is used
1881 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
1882 if ($rOpts_whitespace_cycle) {
1883 if ( $rOpts_whitespace_cycle > 0 ) {
1884 foreach my $level ( 0 .. $level_max ) {
1885 my $level_mod = $level % $rOpts_whitespace_cycle;
1886 my $indent = $level_mod * $rOpts_indent_columns;
1887 $maximum_text_length_at_level[$level] =
1888 $rOpts_maximum_line_length - $indent;
1892 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
1896 # Correct the tables if the -vmll flag is used. These values override the
1898 if ($rOpts_variable_maximum_line_length) {
1899 foreach my $level ( 0 .. $level_max ) {
1900 $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
1901 $maximum_line_length_at_level[$level] =
1902 $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
1906 # Define two measures of indentation level, alpha and beta, at which some
1907 # formatting features come under stress and need to start shutting down.
1908 # Some combination of the two will be used to shut down different
1909 # formatting features.
1910 # Put a reasonable upper limit on stress level (say 100) in case the
1911 # whitespace-cycle variable is used.
1912 my $stress_level_limit = min( 100, $level_max );
1914 # Find stress_level_alpha, targeted at very short maximum line lengths.
1915 $stress_level_alpha = $stress_level_limit + 1;
1916 foreach my $level_test ( 0 .. $stress_level_limit ) {
1917 my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
1918 my $excess_inside_space =
1920 $rOpts_continuation_indentation -
1921 $rOpts_indent_columns - 8;
1922 if ( $excess_inside_space <= 0 ) {
1923 $stress_level_alpha = $level_test;
1928 # Find stress level beta, a stress level targeted at formatting
1929 # at deep levels near the maximum line length. We start increasing
1930 # from zero and stop at the first level which shows no more space.
1932 # 'const' is a fixed number of spaces for a typical variable.
1933 # Cases b1197-b1204 work ok with const=12 but not with const=8
1935 my $denom = max( 1, $rOpts_indent_columns );
1936 $stress_level_beta = 0;
1937 foreach my $level ( 0 .. $stress_level_limit ) {
1938 my $remaining_cycles = max(
1941 $maximum_text_length_at_level[$level] -
1942 $rOpts_continuation_indentation - $const
1945 last if ( $remaining_cycles <= 3 ); # 2 does not work
1946 $stress_level_beta = $level;
1949 initialize_weld_nested_exclusion_rules($rOpts);
1951 %line_up_parentheses_control_hash = ();
1952 $line_up_parentheses_control_is_lxpl = 1;
1953 my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
1954 my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
1955 if ( $lpxl && $lpil ) {
1957 You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
1961 $line_up_parentheses_control_is_lxpl = 1;
1962 initialize_line_up_parentheses_control_hash(
1963 $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
1966 $line_up_parentheses_control_is_lxpl = 0;
1967 initialize_line_up_parentheses_control_hash(
1968 $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
1974 use constant ALIGN_GREP_ALIASES => 0;
1976 sub initialize_grep_and_friends {
1979 # Initialize or re-initialize hashes with 'grep' and grep aliases. This
1980 # must be done after each set of options because new grep aliases may be
1983 # re-initialize the hash ... this is critical!
1984 %is_sort_map_grep = ();
1986 my @q = qw(sort map grep);
1987 @is_sort_map_grep{@q} = (1) x scalar(@q);
1989 # Note that any 'grep-alias-list' string has been preprocessed to be a
1990 # trimmed, space-separated list.
1991 my @grep_aliases = split /\s+/, $str;
1992 @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
1994 ##@q = qw(sort map grep eval);
1995 %is_sort_map_grep_eval = %is_sort_map_grep;
1996 $is_sort_map_grep_eval{'eval'} = 1;
1998 ##@q = qw(sort map grep eval do);
1999 %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
2000 $is_sort_map_grep_eval_do{'do'} = 1;
2002 # These block types can take ci. This is used by the -xci option.
2003 # Note that the 'sub' in this list is an anonymous sub. To be more correct
2004 # we could remove sub and use ASUB pattern to also handle a
2005 # prototype/signature. But that would slow things down and would probably
2007 ##@q = qw( do sub eval sort map grep );
2008 %is_block_with_ci = %is_sort_map_grep_eval_do;
2009 $is_block_with_ci{'sub'} = 1;
2011 %is_keyword_returning_list = ();
2020 push @q, @grep_aliases;
2021 @is_keyword_returning_list{@q} = (1) x scalar(@q);
2023 # This code enables vertical alignment of grep aliases for testing. It has
2024 # not been found to be beneficial, so it is off by default. But it is
2025 # useful for precise testing of the grep alias coding.
2026 if (ALIGN_GREP_ALIASES) {
2038 $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
2044 sub initialize_weld_nested_exclusion_rules {
2046 %weld_nested_exclusion_rules = ();
2048 my $opt_name = 'weld-nested-exclusion-list';
2049 my $str = $rOpts->{$opt_name};
2050 return unless ($str);
2053 return unless ($str);
2055 # There are four container tokens.
2063 # We are parsing an exclusion list for nested welds. The list is a string
2064 # with spaces separating any number of items. Each item consists of three
2065 # pieces of information:
2066 # <optional position> <optional type> <type of container>
2067 # < ^ or . > < k or K > < ( [ { >
2069 # The last character is the required container type and must be one of:
2071 # [ = square bracket
2074 # An optional leading position indicator:
2075 # ^ means the leading token position in the weld
2076 # . means a secondary token position in the weld
2077 # no position indicator means all positions match
2079 # An optional alphanumeric character between the position and container
2080 # token selects to which the rule applies:
2082 # K = any non-keyword
2084 # F = not a function call
2085 # w = function or keyword
2086 # W = not a function or keyword
2087 # no letter means any preceding type matches
2090 # ^( - the weld must not start with a paren
2091 # .( - the second and later tokens may not be parens
2092 # ( - no parens in weld
2093 # ^K( - exclude a leading paren not preceded by a keyword
2094 # .k( - exclude a secondary paren preceded by a keyword
2095 # [ { - exclude all brackets and braces
2097 my @items = split /\s+/, $str;
2100 foreach my $item (@items) {
2101 my $item_save = $item;
2102 my $tok = chop($item);
2103 my $key = $token_keys{$tok};
2104 if ( !defined($key) ) {
2105 $msg1 .= " '$item_save'";
2108 if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
2109 $weld_nested_exclusion_rules{$key} = [];
2111 my $rflags = $weld_nested_exclusion_rules{$key};
2113 # A 'q' means do not weld quotes
2114 if ( $tok eq 'q' ) {
2123 if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
2125 $select = $2 if ($2);
2128 $msg1 .= " '$item_save'";
2134 if ( $pos eq '^' || $pos eq '*' ) {
2135 if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
2138 $rflags->[0] = $select;
2140 if ( $pos eq '.' || $pos eq '*' ) {
2141 if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
2144 $rflags->[1] = $select;
2146 if ($err) { $msg2 .= " '$item_save'"; }
2150 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2156 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
2158 Only the last will be used.
2164 sub initialize_line_up_parentheses_control_hash {
2165 my ( $str, $opt_name ) = @_;
2166 return unless ($str);
2169 return unless ($str);
2171 # The format is space separated items, where each item must consist of a
2172 # string with a token type preceded by an optional text token and followed
2176 # = (flag1)(key)(flag2), where
2181 my @items = split /\s+/, $str;
2184 foreach my $item (@items) {
2185 my $item_save = $item;
2186 my ( $flag1, $key, $flag2 );
2187 if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2193 $msg1 .= " '$item_save'";
2197 if ( !defined($key) ) {
2198 $msg1 .= " '$item_save'";
2202 # Check for valid flag1
2203 if ( !defined($flag1) ) { $flag1 = '*' }
2204 elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2205 $msg1 .= " '$item_save'";
2209 # Check for valid flag2
2210 # 0 or blank: ignore container contents
2211 # 1 all containers with sublists match
2212 # 2 all containers with sublists, code blocks or ternary operators match
2213 # ... this could be extended in the future
2214 if ( !defined($flag2) ) { $flag2 = 0 }
2215 elsif ( $flag2 !~ /^[012]$/ ) {
2216 $msg1 .= " '$item_save'";
2220 if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2221 $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2225 # check for multiple conflicting specifications
2226 my $rflags = $line_up_parentheses_control_hash{$key};
2228 if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2230 $rflags->[0] = $flag1;
2232 if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2234 $rflags->[1] = $flag2;
2236 $msg2 .= " '$item_save'" if ($err);
2241 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2247 Multiple specifications were encountered in the $opt_name at:
2249 Only the last will be used.
2253 # Speedup: we can turn off -lp if it is not actually used
2254 if ($line_up_parentheses_control_is_lxpl) {
2256 foreach my $key (qw# ( { [ #) {
2257 my $rflags = $line_up_parentheses_control_hash{$key};
2258 if ( defined($rflags) ) {
2259 my ( $flag1, $flag2 ) = @{$rflags};
2260 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
2261 if ($flag2) { $all_off = 0; last }
2265 $rOpts->{'line-up-parentheses'} = "";
2272 use constant DEBUG_KB => 0;
2274 sub initialize_keep_old_breakpoints {
2275 my ( $str, $short_name, $rkeep_break_hash ) = @_;
2279 my @list = split_words($str);
2280 if ( DEBUG_KB && @list ) {
2283 DEBUG_KB entering for '$short_name' with str=$str\n";
2288 # - pull out any any leading container code, like f( or *{
2290 if ( $_ =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2297 foreach my $type (@list) {
2298 if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2299 push @unknown_types, $type;
2303 if (@unknown_types) {
2304 my $num = @unknown_types;
2307 $num unrecognized token types were input with --$short_name :
2312 @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
2314 foreach my $key ( keys %flags ) {
2315 my $flag = $flags{$key};
2317 if ( length($flag) != 1 ) {
2319 Multiple entries given for '$key' in '$short_name'
2322 elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2324 Unknown flag '$flag' given for '$key' in '$short_name'
2327 elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2329 Unknown flag '$flag' given for '$key' in '$short_name'
2333 $rkeep_break_hash->{$key} = $flag;
2336 # Temporary patch and warning during changeover from using type to token for
2337 # containers . This can be eliminated after one or two future releases.
2338 if ( $rkeep_break_hash->{'{'}
2339 && $rkeep_break_hash->{'{'} eq '1'
2340 && !$rkeep_break_hash->{'('}
2341 && !$rkeep_break_hash->{'['} )
2343 $rkeep_break_hash->{'('} = 1;
2344 $rkeep_break_hash->{'['} = 1;
2346 Sorry, but the format for the -kbb and -kba flags is changing a little.
2347 You entered '{' which currently matches '{' '(' and '[',
2348 but in the future it will only match '{'.
2349 To prevent this message please do one of the following:
2350 use '{ ( [' if you want to match all opening containers, or
2351 use '(' or '[' to match just those containers, or
2352 use '*{' to match only opening braces
2356 if ( $rkeep_break_hash->{'}'}
2357 && $rkeep_break_hash->{'}'} eq '1'
2358 && !$rkeep_break_hash->{')'}
2359 && !$rkeep_break_hash->{']'} )
2361 $rkeep_break_hash->{'('} = 1;
2362 $rkeep_break_hash->{'['} = 1;
2364 Sorry, but the format for the -kbb and -kba flags is changing a little.
2365 You entered '}' which currently matches each of '}' ')' and ']',
2366 but in the future it will only match '}'.
2367 To prevent this message please do one of the following:
2368 use '} ) ]' if you want to match all closing containers, or
2369 use ')' or ']' to match just those containers, or
2370 use '*}' to match only closing braces
2374 if ( DEBUG_KB && @list ) {
2379 DEBUG_KB -$short_name flag: $str
2390 sub initialize_whitespace_hashes {
2392 # This is called once before formatting begins to initialize these global
2393 # hashes, which control the use of whitespace around tokens:
2398 # %space_after_keyword
2400 # Many token types are identical to the tokens themselves.
2401 # See the tokenizer for a complete list. Here are some special types:
2403 # f = semicolon in for statement
2406 # Note that :: is excluded since it should be contained in an identifier
2407 # Note that '->' is excluded because it never gets space
2408 # parentheses and brackets are excluded since they are handled specially
2409 # curly braces are included but may be overridden by logic, such as
2412 # NEW_TOKENS: create a whitespace rule here. This can be as
2413 # simple as adding your new letter to @spaces_both_sides, for
2416 my @opening_type = qw< L { ( [ >;
2417 @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
2419 my @closing_type = qw< R } ) ] >;
2420 @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
2422 my @spaces_both_sides = qw#
2423 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2424 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2425 &&= ||= //= <=> A k f w F n C Y U G v
2428 my @spaces_left_side = qw<
2429 t ! ~ m p { \ h pp mm Z j
2431 push( @spaces_left_side, '#' ); # avoids warning message
2433 my @spaces_right_side = qw<
2434 ; } ) ] R J ++ -- **=
2436 push( @spaces_right_side, ',' ); # avoids warning message
2438 %want_left_space = ();
2439 %want_right_space = ();
2440 %binary_ws_rules = ();
2442 # Note that we setting defaults here. Later in processing
2443 # the values of %want_left_space and %want_right_space
2444 # may be overridden by any user settings specified by the
2445 # -wls and -wrs parameters. However the binary_whitespace_rules
2446 # are hardwired and have priority.
2447 @want_left_space{@spaces_both_sides} =
2448 (1) x scalar(@spaces_both_sides);
2449 @want_right_space{@spaces_both_sides} =
2450 (1) x scalar(@spaces_both_sides);
2451 @want_left_space{@spaces_left_side} =
2452 (1) x scalar(@spaces_left_side);
2453 @want_right_space{@spaces_left_side} =
2454 (-1) x scalar(@spaces_left_side);
2455 @want_left_space{@spaces_right_side} =
2456 (-1) x scalar(@spaces_right_side);
2457 @want_right_space{@spaces_right_side} =
2458 (1) x scalar(@spaces_right_side);
2459 $want_left_space{'->'} = WS_NO;
2460 $want_right_space{'->'} = WS_NO;
2461 $want_left_space{'**'} = WS_NO;
2462 $want_right_space{'**'} = WS_NO;
2463 $want_right_space{'CORE::'} = WS_NO;
2465 # These binary_ws_rules are hardwired and have priority over the above
2466 # settings. It would be nice to allow adjustment by the user,
2467 # but it would be complicated to specify.
2469 # hash type information must stay tightly bound
2471 $binary_ws_rules{'i'}{'L'} = WS_NO;
2472 $binary_ws_rules{'i'}{'{'} = WS_YES;
2473 $binary_ws_rules{'k'}{'{'} = WS_YES;
2474 $binary_ws_rules{'U'}{'{'} = WS_YES;
2475 $binary_ws_rules{'i'}{'['} = WS_NO;
2476 $binary_ws_rules{'R'}{'L'} = WS_NO;
2477 $binary_ws_rules{'R'}{'{'} = WS_NO;
2478 $binary_ws_rules{'t'}{'L'} = WS_NO;
2479 $binary_ws_rules{'t'}{'{'} = WS_NO;
2480 $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
2481 $binary_ws_rules{'}'}{'L'} = WS_NO;
2482 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
2483 $binary_ws_rules{'$'}{'L'} = WS_NO;
2484 $binary_ws_rules{'$'}{'{'} = WS_NO;
2485 $binary_ws_rules{'@'}{'L'} = WS_NO;
2486 $binary_ws_rules{'@'}{'{'} = WS_NO;
2487 $binary_ws_rules{'='}{'L'} = WS_YES;
2488 $binary_ws_rules{'J'}{'J'} = WS_YES;
2490 # the following includes ') {'
2491 # as in : if ( xxx ) { yyy }
2492 $binary_ws_rules{']'}{'L'} = WS_NO;
2493 $binary_ws_rules{']'}{'{'} = WS_NO;
2494 $binary_ws_rules{')'}{'{'} = WS_YES;
2495 $binary_ws_rules{')'}{'['} = WS_NO;
2496 $binary_ws_rules{']'}{'['} = WS_NO;
2497 $binary_ws_rules{']'}{'{'} = WS_NO;
2498 $binary_ws_rules{'}'}{'['} = WS_NO;
2499 $binary_ws_rules{'R'}{'['} = WS_NO;
2501 $binary_ws_rules{']'}{'++'} = WS_NO;
2502 $binary_ws_rules{']'}{'--'} = WS_NO;
2503 $binary_ws_rules{')'}{'++'} = WS_NO;
2504 $binary_ws_rules{')'}{'--'} = WS_NO;
2506 $binary_ws_rules{'R'}{'++'} = WS_NO;
2507 $binary_ws_rules{'R'}{'--'} = WS_NO;
2509 $binary_ws_rules{'i'}{'Q'} = WS_YES;
2510 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
2512 $binary_ws_rules{'i'}{'('} = WS_NO;
2514 $binary_ws_rules{'w'}{'('} = WS_NO;
2515 $binary_ws_rules{'w'}{'{'} = WS_YES;
2518 } ## end initialize_whitespace_hashes
2520 # The following hash is used to skip over needless if tests.
2521 # Be sure to update it when adding new checks in its block.
2522 my %is_special_ws_type;
2525 my @q = qw(k w i C m - Q);
2527 @is_special_ws_type{@q} = (1) x scalar(@q);
2530 use constant DEBUG_WHITE => 0;
2532 sub set_whitespace_flags {
2534 # This routine is called once per file to set whitespace flags for that
2535 # file. This routine examines each pair of nonblank tokens and sets a flag
2536 # indicating if white space is needed.
2538 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2539 # BEFORE token $j is needed, with the following values:
2541 # WS_NO = -1 do not want a space BEFORE token $j
2542 # WS_OPTIONAL= 0 optional space or $j is a whitespace
2543 # WS_YES = 1 want a space BEFORE token $j
2548 my $rLL = $self->[_rLL_];
2549 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2550 my $jmax = @{$rLL} - 1;
2552 my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
2553 my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2554 my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
2556 my $rwhitespace_flags = [];
2557 my $ris_function_call_paren = {};
2559 return $rwhitespace_flags if ( $jmax < 0 );
2561 my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2563 my ( $rtokh, $token, $type );
2564 my ( $rtokh_last, $last_token, $last_type );
2566 my $j_tight_closing_paren = -1;
2568 $rtokh = [ @{ $rLL->[0] } ];
2572 $rtokh->[_TOKEN_] = $token;
2573 $rtokh->[_TYPE_] = $type;
2574 $rtokh->[_TYPE_SEQUENCE_] = '';
2575 $rtokh->[_LINE_INDEX_] = 0;
2579 # This is some logic moved to a sub to avoid deep nesting of if stmts
2580 my $ws_in_container = sub {
2584 if ( $j + 1 > $jmax ) { return (WS_NO) }
2586 # Patch to count '-foo' as single token so that
2587 # each of $a{-foo} and $a{foo} and $a{'foo'} do
2588 # not get spaces with default formatting.
2592 && $last_token eq '{'
2593 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
2595 # Patch to count a sign separated from a number as a single token, as
2596 # in the following line. Otherwise, it takes two steps to converge:
2598 if ( ( $type eq 'm' || $type eq 'p' )
2600 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
2601 && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
2602 && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
2607 # $j_next is where a closing token should be if
2608 # the container has a single token
2609 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
2611 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
2615 if ( $j_next > $jmax ) { return WS_NO }
2616 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
2617 my $type_next = $rLL->[$j_next]->[_TYPE_];
2619 # for tightness = 1, if there is just one token
2620 # within the matching pair, we will keep it tight
2622 $tok_next eq $matching_token{$last_token}
2624 # but watch out for this: [ [ ] (misc.t)
2625 && $last_token ne $token
2627 # double diamond is usually spaced
2633 # remember where to put the space for the closing paren
2634 $j_tight_closing_paren = $j_next;
2640 # Local hashes to set spaces around container tokens according to their
2641 # sequence numbers. These are set as keywords are examined.
2642 # They are controlled by the -kpit and -kpitl flags.
2643 my %opening_container_inside_ws;
2644 my %closing_container_inside_ws;
2645 my $set_container_ws_by_keyword = sub {
2647 return unless (%keyword_paren_inner_tightness);
2649 my ( $word, $sequence_number ) = @_;
2651 # We just saw a keyword (or other function name) followed by an opening
2652 # paren. Now check to see if the following paren should have special
2653 # treatment for its inside space. If so we set a hash value using the
2654 # sequence number as key.
2655 if ( $word && $sequence_number ) {
2656 my $tightness = $keyword_paren_inner_tightness{$word};
2657 if ( defined($tightness) && $tightness != 1 ) {
2658 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
2659 $opening_container_inside_ws{$sequence_number} = $ws_flag;
2660 $closing_container_inside_ws{$sequence_number} = $ws_flag;
2666 my ( $ws_1, $ws_2, $ws_3, $ws_4 );
2668 # main loop over all tokens to define the whitespace flags
2669 for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
2671 if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
2672 $rwhitespace_flags->[$j] = WS_OPTIONAL;
2676 $rtokh_last = $rtokh;
2677 $last_token = $token;
2680 $rtokh = $rLL->[$j];
2681 $token = $rtokh->[_TOKEN_];
2682 $type = $rtokh->[_TYPE_];
2686 #---------------------------------------------------------------
2687 # Whitespace Rules Section 1:
2688 # Handle space on the inside of opening braces.
2689 #---------------------------------------------------------------
2692 if ( $is_opening_type{$last_type} ) {
2694 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2695 my $block_type = $rblock_type_of_seqno->{$seqno};
2696 my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
2697 my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
2699 $j_tight_closing_paren = -1;
2701 # let us keep empty matched braces together: () {} []
2703 if ( $token eq $matching_token{$last_token} ) {
2713 # we're considering the right of an opening brace
2714 # tightness = 0 means always pad inside with space
2715 # tightness = 1 means pad inside if "complex"
2716 # tightness = 2 means never pad inside with space
2719 if ( $last_type eq '{'
2720 && $last_token eq '{'
2721 && $last_block_type )
2723 $tightness = $rOpts_block_brace_tightness;
2725 else { $tightness = $tightness{$last_token} }
2727 #=============================================================
2728 # Patch for test problem <<snippets/fabrice_bug.in>>
2729 # We must always avoid spaces around a bare word beginning
2731 # my $before = ${^PREMATCH};
2732 # Because all of the following cause an error in perl:
2733 # my $before = ${ ^PREMATCH };
2734 # my $before = ${ ^PREMATCH};
2735 # my $before = ${^PREMATCH };
2736 # So if brace tightness flag is -bt=0 we must temporarily reset
2737 # to bt=1. Note that here we must set tightness=1 and not 2 so
2738 # that the closing space
2739 # is also avoided (via the $j_tight_closing_paren flag in coding)
2740 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2742 #=============================================================
2744 if ( $tightness <= 0 ) {
2747 elsif ( $tightness > 1 ) {
2751 $ws = $ws_in_container->($j);
2755 # check for special cases which override the above rules
2756 if ( %opening_container_inside_ws && $last_seqno ) {
2757 my $ws_override = $opening_container_inside_ws{$last_seqno};
2758 if ($ws_override) { $ws = $ws_override }
2761 $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
2764 } ## end setting space flag inside opening tokens
2766 #---------------------------------------------------------------
2767 # Whitespace Rules Section 2:
2768 # Handle space on inside of closing brace pairs.
2769 #---------------------------------------------------------------
2772 if ( $is_closing_type{$type} ) {
2774 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2775 if ( $j == $j_tight_closing_paren ) {
2777 $j_tight_closing_paren = -1;
2782 if ( !defined($ws) ) {
2785 my $block_type = $rblock_type_of_seqno->{$seqno};
2786 if ( $type eq '}' && $token eq '}' && $block_type ) {
2787 $tightness = $rOpts_block_brace_tightness;
2789 else { $tightness = $tightness{$token} }
2791 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2795 # check for special cases which override the above rules
2796 if ( %closing_container_inside_ws && $seqno ) {
2797 my $ws_override = $closing_container_inside_ws{$seqno};
2798 if ($ws_override) { $ws = $ws_override }
2801 $ws_4 = $ws_3 = $ws_2 = $ws
2803 } ## end setting space flag inside closing tokens
2805 #---------------------------------------------------------------
2806 # Whitespace Rules Section 3:
2807 # Handle some special cases.
2808 #---------------------------------------------------------------
2811 elsif ( $is_opening_type{$type} ) {
2813 if ( $token eq '(' ) {
2815 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2817 # This will have to be tweaked as tokenization changes.
2818 # We usually want a space at '} (', for example:
2819 # <<snippets/space1.in>>
2820 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2823 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2824 # At present, the above & block is marked as type L/R so this case
2825 # won't go through here.
2826 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
2828 # NOTE: some older versions of Perl had occasional problems if
2829 # spaces are introduced between keywords or functions and opening
2830 # parens. So the default is not to do this except is certain
2831 # cases. The current Perl seems to tolerate spaces.
2833 # Space between keyword and '('
2834 elsif ( $last_type eq 'k' ) {
2836 unless ( $rOpts_space_keyword_paren
2837 || $space_after_keyword{$last_token} );
2839 # Set inside space flag if requested
2840 $set_container_ws_by_keyword->( $last_token, $seqno );
2843 # Space between function and '('
2844 # -----------------------------------------------------
2845 # 'w' and 'i' checks for something like:
2846 # myfun( &myfun( ->myfun(
2847 # -----------------------------------------------------
2849 # Note that at this point an identifier may still have a leading
2850 # arrow, but the arrow will be split off during token respacing.
2851 # After that, the token may become a bare word without leading
2852 # arrow. The point is, it is best to mark function call parens
2853 # right here before that happens.
2854 # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
2855 # NOTE: this would be the place to allow spaces between repeated
2856 # parens, like () () (), as in case c017, but I decided that would
2857 # not be a good idea.
2859 ( $last_type =~ /^[wCUG]$/ )
2860 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ )
2863 $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
2864 $set_container_ws_by_keyword->( $last_token, $seqno );
2865 $ris_function_call_paren->{$seqno} = 1;
2868 # space between something like $i and ( in <<snippets/space2.in>>
2869 # for $i ( 0 .. 20 ) {
2870 # FIXME: eventually, type 'i' could be split into multiple
2871 # token types so this can be a hardwired rule.
2872 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2876 # allow constant function followed by '()' to retain no space
2877 elsif ($last_type eq 'C'
2878 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2884 # patch for SWITCH/CASE: make space at ']{' optional
2885 # since the '{' might begin a case or when block
2886 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2890 # keep space between 'sub' and '{' for anonymous sub definition
2891 if ( $type eq '{' ) {
2892 if ( $last_token eq 'sub' ) {
2896 # this is needed to avoid no space in '){'
2897 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
2899 # avoid any space before the brace or bracket in something like
2900 # @opts{'a','b',...}
2901 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
2905 } ## end if ( $is_opening_type{$type} ) {
2907 # Special checks for certain other types ...
2908 # the hash '%is_special_ws_type' significantly speeds up this routine,
2909 # but be sure to update it if a new check is added.
2910 # Currently has types: qw(k w i C m - Q #)
2911 elsif ( $is_special_ws_type{$type} ) {
2912 if ( $type eq 'i' ) {
2914 # never a space before ->
2915 if ( substr( $token, 0, 2 ) eq '->' ) {
2920 elsif ( $type eq 'k' ) {
2922 # Keywords 'for', 'foreach' are special cases for -kpit since
2923 # the opening paren does not always immediately follow the
2924 # keyword. So we have to search forward for the paren in this
2925 # case. I have limited the search to 10 tokens ahead, just in
2926 # case somebody has a big file and no opening paren. This
2927 # should be enough for all normal code. Added the level check
2929 if ( $is_for_foreach{$token}
2930 && %keyword_paren_inner_tightness
2931 && defined( $keyword_paren_inner_tightness{$token} )
2934 my $level = $rLL->[$j]->[_LEVEL_];
2936 for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
2938 last if ( $jp > $jmax );
2939 last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
2940 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
2941 my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
2942 $set_container_ws_by_keyword->( $token, $seqno_p );
2948 # retain any space between '-' and bare word
2949 elsif ( $type eq 'w' || $type eq 'C' ) {
2950 $ws = WS_OPTIONAL if $last_type eq '-';
2952 # never a space before ->
2953 if ( substr( $token, 0, 2 ) eq '->' ) {
2958 # retain any space between '-' and bare word; for example
2959 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2960 # $myhash{USER-NAME}='steve';
2961 elsif ( $type eq 'm' || $type eq '-' ) {
2962 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2965 # always space before side comment
2966 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2968 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
2969 # allow a space between a backslash and single or double quote
2970 # to avoid fooling html formatters
2971 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
2973 if ($rOpts_space_backslash_quote) {
2974 if ( $rOpts_space_backslash_quote == 1 ) {
2977 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2978 else { } # shouldnt happen
2984 } ## end elsif ( $is_special_ws_type{$type} ...
2986 # always preserver whatever space was used after a possible
2987 # filehandle (except _) or here doc operator
2990 && ( ( $last_type eq 'Z' && $last_token ne '_' )
2991 || $last_type eq 'h' )
3000 if ( !defined($ws) ) {
3002 #---------------------------------------------------------------
3003 # Whitespace Rules Section 4:
3004 # Use the binary rule table.
3005 #---------------------------------------------------------------
3006 $ws = $binary_ws_rules{$last_type}{$type};
3007 $ws_4 = $ws if DEBUG_WHITE;
3009 #---------------------------------------------------------------
3010 # Whitespace Rules Section 5:
3011 # Apply default rules not covered above.
3012 #---------------------------------------------------------------
3014 # If we fall through to here, look at the pre-defined hash tables for
3015 # the two tokens, and:
3016 # if (they are equal) use the common value
3017 # if (either is zero or undef) use the other
3018 # if (either is -1) use it
3032 if ( !defined($ws) ) {
3033 my $wl = $want_left_space{$type};
3034 my $wr = $want_right_space{$last_type};
3035 if ( !defined($wl) ) {
3036 $ws = defined($wr) ? $wr : 0;
3038 elsif ( !defined($wr) ) {
3043 ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3048 # Treat newline as a whitespace. Otherwise, we might combine
3049 # 'Send' and '-recipients' here according to the above rules:
3050 # <<snippets/space3.in>>
3051 # my $msg = new Fax::Send
3052 # -recipients => $to,
3055 && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3060 $rwhitespace_flags->[$j] = $ws;
3063 my $str = substr( $last_token, 0, 15 );
3064 $str .= ' ' x ( 16 - length($str) );
3065 if ( !defined($ws_1) ) { $ws_1 = "*" }
3066 if ( !defined($ws_2) ) { $ws_2 = "*" }
3067 if ( !defined($ws_3) ) { $ws_3 = "*" }
3068 if ( !defined($ws_4) ) { $ws_4 = "*" }
3070 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3072 # reset for next pass
3073 $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3077 if ( $rOpts->{'tight-secret-operators'} ) {
3078 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3080 $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3081 return $rwhitespace_flags;
3083 } ## end sub set_whitespace_flags
3085 sub dump_want_left_space {
3089 These values are the main control of whitespace to the left of a token type;
3090 They may be altered with the -wls parameter.
3091 For a list of token types, use perltidy --dump-token-types (-dtt)
3092 1 means the token wants a space to its left
3093 -1 means the token does not want a space to its left
3094 ------------------------------------------------------------------------
3096 foreach my $key ( sort keys %want_left_space ) {
3097 $fh->print("$key\t$want_left_space{$key}\n");
3102 sub dump_want_right_space {
3106 These values are the main control of whitespace to the right of a token type;
3107 They may be altered with the -wrs parameter.
3108 For a list of token types, use perltidy --dump-token-types (-dtt)
3109 1 means the token wants a space to its right
3110 -1 means the token does not want a space to its right
3111 ------------------------------------------------------------------------
3113 foreach my $key ( sort keys %want_right_space ) {
3114 $fh->print("$key\t$want_right_space{$key}\n");
3119 { ## begin closure is_essential_whitespace
3121 my %is_sort_grep_map;
3125 my %essential_whitespace_filter_l1;
3126 my %essential_whitespace_filter_r1;
3127 my %essential_whitespace_filter_l2;
3128 my %essential_whitespace_filter_r2;
3129 my %is_type_with_space_before_bareword;
3130 my %is_special_variable_char;
3136 # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3137 # grep aliases on purpose, since here we are looking parens, not braces
3138 @q = qw(sort grep map);
3139 @is_sort_grep_map{@q} = (1) x scalar(@q);
3141 @q = qw(for foreach);
3142 @is_for_foreach{@q} = (1) x scalar(@q);
3145 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
3146 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3148 @is_digraph{@q} = (1) x scalar(@q);
3150 @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3151 @is_trigraph{@q} = (1) x scalar(@q);
3153 # These are used as a speedup filters for sub is_essential_whitespace.
3156 # These left side token types USUALLY do not require a space:
3157 @q = qw( ; { } [ ] L R );
3161 @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3163 # BUT some might if followed by these right token types
3164 @q = qw( pp mm << <<= h );
3165 @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3168 # These right side filters usually do not require a space
3172 @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3174 # BUT some might if followed by these left token types
3176 @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3178 # Keep a space between certain types and any bareword:
3179 # Q: keep a space between a quote and a bareword to prevent the
3180 # bareword from becoming a quote modifier.
3181 # &: do not remove space between an '&' and a bare word because
3182 # it may turn into a function evaluation, like here
3183 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3184 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3186 @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3188 # These are the only characters which can (currently) form special
3189 # variables, like $^W: (issue c066, c068).
3191 qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3192 @{is_special_variable_char}{@q} = (1) x scalar(@q);
3196 sub is_essential_whitespace {
3198 # Essential whitespace means whitespace which cannot be safely deleted
3199 # without risking the introduction of a syntax error.
3200 # We are given three tokens and their types:
3201 # ($tokenl, $typel) is the token to the left of the space in question
3202 # ($tokenr, $typer) is the token to the right of the space in question
3203 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3205 # Note1: This routine should almost never need to be changed. It is
3206 # for avoiding syntax problems rather than for formatting.
3208 # Note2: The -mangle option causes large numbers of calls to this
3209 # routine and therefore is a good test. So if a change is made, be sure
3210 # to use nytprof to profile with both old and reviesed coding using the
3211 # -mangle option and check differences.
3213 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3215 # This is potentially a very slow routine but the following quick
3216 # filters typically catch and handle over 90% of the calls.
3218 # Filter 1: usually no space required after common types ; , [ ] { } ( )
3220 if ( $essential_whitespace_filter_l1{$typel}
3221 && !$essential_whitespace_filter_r1{$typer} );
3223 # Filter 2: usually no space before common types ; ,
3225 if ( $essential_whitespace_filter_r2{$typer}
3226 && !$essential_whitespace_filter_l2{$typel} );
3228 # Filter 3: Handle side comments: a space is only essential if the left
3229 # token ends in '$' For example, we do not want to create $#foo below:
3238 # Also, I prefer not to put a ? and # together because ? used to be
3239 # a pattern delmiter and spacing was used if guessing was needed.
3241 if ( $typer eq '#' ) {
3245 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3249 my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3250 my $tokenr_is_open_paren = $tokenr eq '(';
3251 my $token_joined = $tokenl . $tokenr;
3252 my $tokenl_is_dash = $tokenl eq '-';
3256 # never combine two bare words or numbers
3257 # examples: and ::ok(1)
3259 # for bla::bla:: abc
3260 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3261 # $input eq"quit" to make $inputeq"quit"
3262 # my $size=-s::SINK if $file; <==OK but we won't do it
3263 # don't join something like: for bla::bla:: abc
3264 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3265 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3266 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3268 # do not combine a number with a concatenation dot
3269 # example: pom.caputo:
3270 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3271 || $typel eq 'n' && $tokenr eq '.'
3272 || $typer eq 'n' && $tokenl eq '.'
3274 # cases of a space before a bareword...
3276 $tokenr_is_bareword && (
3278 # do not join a minus with a bare word, because you might form
3279 # a file test operator. Example from Complex.pm:
3280 # if (CORE::abs($z - i) < $eps);
3281 # "z-i" would be taken as a file test.
3282 $tokenl_is_dash && length($tokenr) == 1
3284 # and something like this could become ambiguous without space
3286 # use constant III=>1;
3290 || $tokenl_is_dash && $typer =~ /^[wC]$/
3292 # keep space between types Q & and a bareword
3293 || $is_type_with_space_before_bareword{$typel}
3295 # +-: binary plus and minus before a bareword could get
3296 # converted into unary plus and minus on next pass through the
3297 # tokenizer. This can lead to blinkers: cases b660 b670 b780
3298 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3299 # follows an operator
3300 || ( ( $typel eq '+' || $typel eq '-' )
3301 && $typell !~ /^[niC\)\}\]R]$/ )
3303 # keep a space between a token ending in '$' and any word;
3304 # this caused trouble: "die @$ if $@"
3305 ##|| $typel eq 'i' && $tokenl =~ /\$$/
3306 || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3308 # don't combine $$ or $# with any alphanumeric
3309 # (testfile mangle.t with --mangle)
3310 ##|| $tokenl =~ /^\$[\$\#]$/
3315 ) ## end $tokenr_is_bareword
3318 # '= -' should not become =- or you will get a warning
3320 # || ($tokenr eq '-')
3322 # do not join a bare word with a minus, like between 'Send' and
3323 # '-recipients' here <<snippets/space3.in>>
3324 # my $msg = new Fax::Send
3325 # -recipients => $to,
3327 # This is the safest thing to do. If we had the token to the right of
3328 # the minus we could do a better check.
3330 # And do not combine a bareword and a quote, like this:
3331 # oops "Your login, $Bad_Login, is not valid";
3332 # It can cause a syntax error if oops is a sub
3333 || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3335 # perl is very fussy about spaces before <<
3336 || substr( $tokenr, 0, 2 ) eq '<<'
3337 ##|| $tokenr =~ /^\<\</
3339 # avoid combining tokens to create new meanings. Example:
3340 # $a+ +$b must not become $a++$b
3341 || ( $is_digraph{$token_joined} )
3342 || $is_trigraph{$token_joined}
3344 # another example: do not combine these two &'s:
3345 # allow_options & &OPT_EXECCGI
3346 || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3348 # retain any space after possible filehandle
3349 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3352 # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3353 # space after type Y. Otherwise, it will get parsed as type 'Z' later
3354 # and any space would have to be added back manually if desired.
3357 # Perl is sensitive to whitespace after the + here:
3358 # $b = xvals $a + 0.1 * yvals $a;
3359 || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3362 $tokenr_is_open_paren && (
3364 # keep paren separate in 'use Foo::Bar ()'
3365 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3367 # OLD: keep any space between filehandle and paren:
3368 # file mangle.t with --mangle:
3369 # NEW: this test is no longer necessary here (moved above)
3372 # must have space between grep and left paren; "grep(" will fail
3373 || $is_sort_grep_map{$tokenl}
3375 # don't stick numbers next to left parens, as in:
3376 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3379 ) ## end $tokenr_is_open_paren
3381 # retain any space after here doc operator ( hereerr.t)
3384 # be careful with a space around ++ and --, to avoid ambiguity as to
3385 # which token it applies
3386 ##|| $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
3387 || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3388 || ( $typel eq '++' || $typel eq '--' )
3389 && $tokenr !~ /^[\;\}\)\]]/
3390 ##|| $typel =~ /^(\+\+|\-\-)$/ && $tokenr !~ /^[\;\}\)\]]/
3392 # need space after foreach my; for example, this will fail in
3393 # older versions of Perl:
3394 # foreach my$ft(@filetypes)...
3398 && substr( $tokenr, 0, 1 ) eq '$'
3399 ##&& $tokenr =~ /^\$/
3402 && $is_for_foreach{$tokenll}
3405 # Keep space after like $^ if needed to avoid forming a different
3406 # special variable (issue c068). For example:
3407 # my $aa = $^ ? "none" : "ok";
3409 && length($tokenl) == 2
3410 && substr( $tokenl, 1, 1 ) eq '^'
3411 && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3413 # We must be sure that a space between a ? and a quoted string
3414 # remains if the space before the ? remains. [Loca.pm, lockarea]
3416 # $b=join $comma ? ',' : ':', @_; # ok
3417 # $b=join $comma?',' : ':', @_; # ok!
3418 # $b=join $comma ?',' : ':', @_; # error!
3419 # Not really required:
3420 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
3422 # Space stacked labels...
3423 # Not really required: Perl seems to accept non-spaced labels.
3424 ## || $typel eq 'J' && $typer eq 'J'
3426 ; # the value of this long logic sequence is the result we want
3429 } ## end closure is_essential_whitespace
3431 { ## begin closure new_secret_operator_whitespace
3433 my %secret_operators;
3434 my %is_leading_secret_token;
3438 # token lists for perl secret operators as compiled by Philippe Bruhat
3439 # at: https://metacpan.org/module/perlsecret
3440 %secret_operators = (
3441 'Goatse' => [qw#= ( ) =#], #=( )=
3442 'Venus1' => [qw#0 +#], # 0+
3443 'Venus2' => [qw#+ 0#], # +0
3444 'Enterprise' => [qw#) x ! !#], # ()x!!
3445 'Kite1' => [qw#~ ~ <>#], # ~~<>
3446 'Kite2' => [qw#~~ <>#], # ~~<>
3447 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
3448 'Bang bang ' => [qw#! !#], # !!
3451 # The following operators and constants are not included because they
3452 # are normally kept tight by perltidy:
3456 # Make a lookup table indexed by the first token of each operator:
3457 # first token => [list, list, ...]
3458 foreach my $value ( values(%secret_operators) ) {
3459 my $tok = $value->[0];
3460 push @{ $is_leading_secret_token{$tok} }, $value;
3464 sub new_secret_operator_whitespace {
3466 my ( $rlong_array, $rwhitespace_flags ) = @_;
3468 # Loop over all tokens in this line
3469 my ( $token, $type );
3470 my $jmax = @{$rlong_array} - 1;
3471 foreach my $j ( 0 .. $jmax ) {
3473 $token = $rlong_array->[$j]->[_TOKEN_];
3474 $type = $rlong_array->[$j]->[_TYPE_];
3476 # Skip unless this token might start a secret operator
3477 next if ( $type eq 'b' );
3478 next unless ( $is_leading_secret_token{$token} );
3480 # Loop over all secret operators with this leading token
3481 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
3483 foreach my $tok ( @{$rpattern} ) {
3488 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
3490 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
3499 # set flags to prevent spaces within this operator
3500 foreach my $jj ( $j + 1 .. $jend ) {
3501 $rwhitespace_flags->[$jj] = WS_NO;
3506 } ## End Loop over all operators
3507 } ## End loop over all tokens
3510 } ## end closure new_secret_operator_whitespace
3512 { ## begin closure set_bond_strengths
3514 # These routines and variables are involved in deciding where to break very
3517 my %is_good_keyword_breakpoint;
3519 my %is_container_token;
3521 my %binary_bond_strength_nospace;
3522 my %binary_bond_strength;
3531 sub initialize_bond_strength_hashes {
3534 @q = qw(if unless while until for foreach);
3535 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
3537 @q = qw(lt gt le ge);
3538 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3540 @q = qw/ ( [ { } ] ) /;
3541 @is_container_token{@q} = (1) x scalar(@q);
3543 # The decision about where to break a line depends upon a "bond
3544 # strength" between tokens. The LOWER the bond strength, the MORE
3545 # likely a break. A bond strength may be any value but to simplify
3546 # things there are several pre-defined strength levels:
3548 # NO_BREAK => 10000;
3549 # VERY_STRONG => 100;
3553 # VERY_WEAK => 0.55;
3555 # The strength values are based on trial-and-error, and need to be
3556 # tweaked occasionally to get desired results. Some comments:
3558 # 1. Only relative strengths are important. small differences
3559 # in strengths can make big formatting differences.
3560 # 2. Each indentation level adds one unit of bond strength.
3561 # 3. A value of NO_BREAK makes an unbreakable bond
3562 # 4. A value of VERY_WEAK is the strength of a ','
3563 # 5. Values below NOMINAL are considered ok break points.
3564 # 6. Values above NOMINAL are considered poor break points.
3566 # The bond strengths should roughly follow precedence order where
3567 # possible. If you make changes, please check the results very
3568 # carefully on a variety of scripts. Testing with the -extrude
3569 # options is particularly helpful in exercising all of the rules.
3571 # Wherever possible, bond strengths are defined in the following
3572 # tables. There are two main stages to setting bond strengths and
3573 # two types of tables:
3575 # The first stage involves looking at each token individually and
3576 # defining left and right bond strengths, according to if we want
3577 # to break to the left or right side, and how good a break point it
3578 # is. For example tokens like =, ||, && make good break points and
3579 # will have low strengths, but one might want to break on either
3580 # side to put them at the end of one line or beginning of the next.
3582 # The second stage involves looking at certain pairs of tokens and
3583 # defining a bond strength for that particular pair. This second
3584 # stage has priority.
3586 #---------------------------------------------------------------
3587 # Bond Strength BEGIN Section 1.
3588 # Set left and right bond strengths of individual tokens.
3589 #---------------------------------------------------------------
3591 # NOTE: NO_BREAK's set in this section first are HINTS which will
3592 # probably not be honored. Essential NO_BREAKS's should be set in
3593 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
3594 # of this subroutine.
3596 # Note that we are setting defaults in this section. The user
3597 # cannot change bond strengths but can cause the left and right
3598 # bond strengths of any token type to be swapped through the use of
3599 # the -wba and -wbb flags. In this way the user can determine if a
3600 # breakpoint token should appear at the end of one line or the
3601 # beginning of the next line.
3603 %right_bond_strength = ();
3604 %left_bond_strength = ();
3605 %binary_bond_strength_nospace = ();
3606 %binary_bond_strength = ();
3610 # The hash keys in this section are token types, plus the text of
3611 # certain keywords like 'or', 'and'.
3613 # no break around possible filehandle
3614 $left_bond_strength{'Z'} = NO_BREAK;
3615 $right_bond_strength{'Z'} = NO_BREAK;
3617 # never put a bare word on a new line:
3618 # example print (STDERR, "bla"); will fail with break after (
3619 $left_bond_strength{'w'} = NO_BREAK;
3621 # blanks always have infinite strength to force breaks after
3623 $right_bond_strength{'b'} = NO_BREAK;
3625 # try not to break on exponentation
3626 @q = qw# ** .. ... <=> #;
3627 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3628 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3630 # The comma-arrow has very low precedence but not a good break point
3631 $left_bond_strength{'=>'} = NO_BREAK;
3632 $right_bond_strength{'=>'} = NOMINAL;
3634 # ok to break after label
3635 $left_bond_strength{'J'} = NO_BREAK;
3636 $right_bond_strength{'J'} = NOMINAL;
3637 $left_bond_strength{'j'} = STRONG;
3638 $right_bond_strength{'j'} = STRONG;
3639 $left_bond_strength{'A'} = STRONG;
3640 $right_bond_strength{'A'} = STRONG;
3642 $left_bond_strength{'->'} = STRONG;
3643 $right_bond_strength{'->'} = VERY_STRONG;
3645 $left_bond_strength{'CORE::'} = NOMINAL;
3646 $right_bond_strength{'CORE::'} = NO_BREAK;
3648 # breaking AFTER modulus operator is ok:
3650 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3651 @right_bond_strength{@q} =
3652 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
3654 # Break AFTER math operators * and /
3656 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3657 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3659 # Break AFTER weakest math operators + and -
3660 # Make them weaker than * but a bit stronger than '.'
3662 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3663 @right_bond_strength{@q} =
3664 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
3666 # Define left strength of unary plus and minus (fixes case b511)
3667 $left_bond_strength{p} = $left_bond_strength{'+'};
3668 $left_bond_strength{m} = $left_bond_strength{'-'};
3670 # And make right strength of unary plus and minus very high.
3671 # Fixes cases b670 b790
3672 $right_bond_strength{p} = NO_BREAK;
3673 $right_bond_strength{m} = NO_BREAK;
3675 # breaking BEFORE these is just ok:
3677 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3678 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
3680 # breaking before the string concatenation operator seems best
3681 # because it can be hard to see at the end of a line
3682 $right_bond_strength{'.'} = STRONG;
3683 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
3686 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3687 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3689 # make these a little weaker than nominal so that they get
3690 # favored for end-of-line characters
3691 @q = qw< != == =~ !~ ~~ !~~ >;
3692 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3693 @right_bond_strength{@q} =
3694 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
3697 @q = qw# < > | & >= <= #;
3698 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
3699 @right_bond_strength{@q} =
3700 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
3702 # breaking either before or after a quote is ok
3703 # but bias for breaking before a quote
3704 $left_bond_strength{'Q'} = NOMINAL;
3705 $right_bond_strength{'Q'} = NOMINAL + 0.02;
3706 $left_bond_strength{'q'} = NOMINAL;
3707 $right_bond_strength{'q'} = NOMINAL;
3709 # starting a line with a keyword is usually ok
3710 $left_bond_strength{'k'} = NOMINAL;
3712 # we usually want to bond a keyword strongly to what immediately
3713 # follows, rather than leaving it stranded at the end of a line
3714 $right_bond_strength{'k'} = STRONG;
3716 $left_bond_strength{'G'} = NOMINAL;
3717 $right_bond_strength{'G'} = STRONG;
3719 # assignment operators
3721 = **= += *= &= <<= &&=
3722 -= /= |= >>= ||= //=
3727 # Default is to break AFTER various assignment operators
3728 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3729 @right_bond_strength{@q} =
3730 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
3732 # Default is to break BEFORE '&&' and '||' and '//'
3733 # set strength of '||' to same as '=' so that chains like
3734 # $a = $b || $c || $d will break before the first '||'
3735 $right_bond_strength{'||'} = NOMINAL;
3736 $left_bond_strength{'||'} = $right_bond_strength{'='};
3738 # same thing for '//'
3739 $right_bond_strength{'//'} = NOMINAL;
3740 $left_bond_strength{'//'} = $right_bond_strength{'='};
3742 # set strength of && a little higher than ||
3743 $right_bond_strength{'&&'} = NOMINAL;
3744 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
3746 $left_bond_strength{';'} = VERY_STRONG;
3747 $right_bond_strength{';'} = VERY_WEAK;
3748 $left_bond_strength{'f'} = VERY_STRONG;
3750 # make right strength of for ';' a little less than '='
3751 # to make for contents break after the ';' to avoid this:
3752 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
3753 # $number_of_fields )
3754 # and make it weaker than ',' and 'and' too
3755 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
3757 # The strengths of ?/: should be somewhere between
3758 # an '=' and a quote (NOMINAL),
3759 # make strength of ':' slightly less than '?' to help
3760 # break long chains of ? : after the colons
3761 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
3762 $right_bond_strength{':'} = NO_BREAK;
3763 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
3764 $right_bond_strength{'?'} = NO_BREAK;
3766 $left_bond_strength{','} = VERY_STRONG;
3767 $right_bond_strength{','} = VERY_WEAK;
3769 # remaining digraphs and trigraphs not defined above
3770 @q = qw( :: <> ++ --);
3771 @left_bond_strength{@q} = (WEAK) x scalar(@q);
3772 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3774 # Set bond strengths of certain keywords
3775 # make 'or', 'err', 'and' slightly weaker than a ','
3776 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
3777 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
3778 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
3779 $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
3780 $right_bond_strength{'and'} = NOMINAL;
3781 $right_bond_strength{'or'} = NOMINAL;
3782 $right_bond_strength{'err'} = NOMINAL;
3783 $right_bond_strength{'xor'} = NOMINAL;
3785 #---------------------------------------------------------------
3786 # Bond Strength BEGIN Section 2.
3787 # Set binary rules for bond strengths between certain token types.
3788 #---------------------------------------------------------------
3790 # We have a little problem making tables which apply to the
3791 # container tokens. Here is a list of container tokens and
3794 # type tokens // meaning
3795 # { {, [, ( // indent
3796 # } }, ], ) // outdent
3797 # [ [ // left non-structural [ (enclosing an array index)
3798 # ] ] // right non-structural square bracket
3799 # ( ( // left non-structural paren
3800 # ) ) // right non-structural paren
3801 # L { // left non-structural curly brace (enclosing a key)
3802 # R } // right non-structural curly brace
3804 # Some rules apply to token types and some to just the token
3805 # itself. We solve the problem by combining type and token into a
3806 # new hash key for the container types.
3808 # If a rule applies to a token 'type' then we need to make rules
3809 # for each of these 'type.token' combinations:
3820 # If a rule applies to a token then we need to make rules for
3821 # these 'type.token' combinations:
3830 # allow long lines before final { in an if statement, as in:
3835 # Otherwise, the line before the { tends to be too short.
3837 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
3838 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
3840 # break on something like '} (', but keep this stronger than a ','
3841 # example is in 'howe.pl'
3842 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3843 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3845 # keep matrix and hash indices together
3846 # but make them a little below STRONG to allow breaking open
3847 # something like {'some-word'}{'some-very-long-word'} at the }{
3849 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3850 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3851 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3852 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3854 # increase strength to the point where a break in the following
3855 # will be after the opening paren rather than at the arrow:
3857 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
3859 # Note that the following alternative strength would make the break at the
3860 # '->' rather than opening the '('. Both have advantages and disadvantages.
3861 # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
3863 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3864 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3865 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3866 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3867 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3868 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3870 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3871 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3872 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3873 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3875 #---------------------------------------------------------------
3876 # Binary NO_BREAK rules
3877 #---------------------------------------------------------------
3879 # use strict requires that bare word and => not be separated
3880 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
3881 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
3883 # Never break between a bareword and a following paren because
3884 # perl may give an error. For example, if a break is placed
3885 # between 'to_filehandle' and its '(' the following line will
3886 # give a syntax error [Carp.pm]: my( $no) =fileno(
3887 # to_filehandle( $in)) ;
3888 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
3889 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
3890 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
3891 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
3893 # use strict requires that bare word within braces not start new
3895 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
3897 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
3899 # The following two rules prevent a syntax error caused by breaking up
3900 # a construction like '{-y}'. The '-' quotes the 'y' and prevents
3901 # it from being taken as a transliteration. We have to keep
3902 # token types 'L m w' together to prevent this error.
3903 $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
3904 $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
3906 # keep 'bareword-' together, but only if there is no space between
3907 # the word and dash. Do not keep together if there is a space.
3908 # example 'use perl6-alpha'
3909 $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
3911 # use strict requires that bare word and => not be separated
3912 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
3914 # use strict does not allow separating type info from trailing { }
3915 # testfile is readmail.pl
3916 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
3917 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
3919 # As a defensive measure, do not break between a '(' and a
3920 # filehandle. In some cases, this can cause an error. For
3921 # example, the following program works:
3928 # But this program fails:
3936 # This is normally only a problem with the 'extrude' option
3937 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
3938 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
3940 # never break between sub name and opening paren
3941 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
3942 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
3944 # keep '}' together with ';'
3945 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
3947 # Breaking before a ++ can cause perl to guess wrong. For
3948 # example the following line will cause a syntax error
3949 # with -extrude if we break between '$i' and '++' [fixstyle2]
3950 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
3951 $nobreak_lhs{'++'} = NO_BREAK;
3953 # Do not break before a possible file handle
3954 $nobreak_lhs{'Z'} = NO_BREAK;
3956 # use strict hates bare words on any new line. For
3957 # example, a break before the underscore here provokes the
3958 # wrath of use strict:
3959 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
3960 $nobreak_rhs{'F'} = NO_BREAK;
3961 $nobreak_rhs{'CORE::'} = NO_BREAK;
3963 # To prevent the tokenizer from switching between types 'w' and 'G' we
3964 # need to avoid breaking between type 'G' and the following code block
3965 # brace. Fixes case b929.
3966 $nobreak_rhs{G} = NO_BREAK;
3968 #---------------------------------------------------------------
3969 # Bond Strength BEGIN Section 3.
3970 # Define tables and values for applying a small bias to the above
3972 #---------------------------------------------------------------
3973 # Adding a small 'bias' to strengths is a simple way to make a line
3974 # break at the first of a sequence of identical terms. For
3975 # example, to force long string of conditional operators to break
3976 # with each line ending in a ':', we can add a small number to the
3977 # bond strength of each ':' (colon.t)
3978 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
3979 %bias_hash = map { $_ => 0 } @bias_tokens;
3980 $delta_bias = 0.0001; # a very small strength level
3983 } ## end sub initialize_bond_strength_hashes
3985 use constant DEBUG_BOND => 0;
3987 sub set_bond_strengths {
3991 my $rK_weld_right = $self->[_rK_weld_right_];
3992 my $rK_weld_left = $self->[_rK_weld_left_];
3994 # patch-its always ok to break at end of line
3995 $nobreak_to_go[$max_index_to_go] = 0;
3997 # we start a new set of bias values for each line
4000 my $code_bias = -.01; # bias for closing block braces
4004 my $token_length = 1;
4006 my $last_nonblank_type = $type;
4007 my $last_nonblank_token = $token;
4008 my $list_str = $left_bond_strength{'?'};
4010 my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4012 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4013 $next_nonblank_type, $next_token, $next_type,
4014 $total_nesting_depth, );
4016 # main loop to compute bond strengths between each pair of tokens
4017 foreach my $i ( 0 .. $max_index_to_go ) {
4019 if ( $type ne 'b' ) {
4020 $last_nonblank_type = $type;
4021 $last_nonblank_token = $token;
4023 $type = $types_to_go[$i];
4025 # strength on both sides of a blank is the same
4026 if ( $type eq 'b' && $last_type ne 'b' ) {
4027 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
4028 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4032 $token = $tokens_to_go[$i];
4033 $token_length = $token_lengths_to_go[$i];
4034 $block_type = $block_type_to_go[$i];
4036 $next_type = $types_to_go[$i_next];
4037 $next_token = $tokens_to_go[$i_next];
4038 $total_nesting_depth = $nesting_depth_to_go[$i_next];
4039 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4040 $next_nonblank_type = $types_to_go[$i_next_nonblank];
4041 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4043 my $seqno = $type_sequence_to_go[$i];
4044 my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4046 # We are computing the strength of the bond between the current
4047 # token and the NEXT token.
4049 #---------------------------------------------------------------
4050 # Bond Strength Section 1:
4051 # First Approximation.
4052 # Use minimum of individual left and right tabulated bond
4054 #---------------------------------------------------------------
4055 my $bsr = $right_bond_strength{$type};
4056 my $bsl = $left_bond_strength{$next_nonblank_type};
4058 # define right bond strengths of certain keywords
4059 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
4060 $bsr = $right_bond_strength{$token};
4062 elsif ( $token eq 'ne' or $token eq 'eq' ) {
4066 # set terminal bond strength to the nominal value
4067 # this will cause good preceding breaks to be retained
4068 if ( $i_next_nonblank > $max_index_to_go ) {
4072 # define right bond strengths of certain keywords
4073 if ( $next_nonblank_type eq 'k'
4074 && defined( $left_bond_strength{$next_nonblank_token} ) )
4076 $bsl = $left_bond_strength{$next_nonblank_token};
4078 elsif ($next_nonblank_token eq 'ne'
4079 or $next_nonblank_token eq 'eq' )
4083 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
4084 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
4087 # Use the minimum of the left and right strengths. Note: it might
4088 # seem that we would want to keep a NO_BREAK if either token has
4089 # this value. This didn't work, for example because in an arrow
4090 # list, it prevents the comma from separating from the following
4091 # bare word (which is probably quoted by its arrow). So necessary
4092 # NO_BREAK's have to be handled as special cases in the final
4094 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
4095 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
4096 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4097 $bond_str_1 = $bond_str if (DEBUG_BOND);
4099 #---------------------------------------------------------------
4100 # Bond Strength Section 2:
4101 # Apply hardwired rules..
4102 #---------------------------------------------------------------
4104 # Patch to put terminal or clauses on a new line: Weaken the bond
4105 # at an || followed by die or similar keyword to make the terminal
4106 # or clause fall on a new line, like this:
4109 # || die "Cannot add broadcast: No class identifier found";
4111 # Otherwise the break will be at the previous '=' since the || and
4112 # = have the same starting strength and the or is biased, like
4116 # shift || die "Cannot add broadcast: No class identifier found";
4118 # In any case if the user places a break at either the = or the ||
4119 # it should remain there.
4120 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
4121 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
4122 if ( $want_break_before{$token} && $i > 0 ) {
4123 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
4125 # keep bond strength of a token and its following blank
4127 if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4128 $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
4132 $bond_str -= $delta_bias;
4137 # good to break after end of code blocks
4138 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
4140 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4141 $code_bias += $delta_bias;
4144 if ( $type eq 'k' ) {
4146 # allow certain control keywords to stand out
4147 if ( $next_nonblank_type eq 'k'
4148 && $is_last_next_redo_return{$token} )
4150 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4153 # Don't break after keyword my. This is a quick fix for a
4154 # rare problem with perl. An example is this line from file
4157 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4158 # $this->{'question'} ) )
4160 if ( $token eq 'my' ) {
4161 $bond_str = NO_BREAK;
4166 # good to break before 'if', 'unless', etc
4167 if ( $is_if_brace_follower{$next_nonblank_token} ) {
4168 $bond_str = VERY_WEAK;
4171 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
4173 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4174 $bond_str = $list_str if ( $bond_str > $list_str );
4177 # keywords like 'unless', 'if', etc, within statements
4179 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4180 $bond_str = VERY_WEAK / 1.05;
4184 # try not to break before a comma-arrow
4185 elsif ( $next_nonblank_type eq '=>' ) {
4186 if ( $bond_str < STRONG ) { $bond_str = STRONG }
4189 #---------------------------------------------------------------
4190 # Additional hardwired NOBREAK rules
4191 #---------------------------------------------------------------
4193 # map1.t -- correct for a quirk in perl
4195 && $next_nonblank_type eq 'i'
4196 && $last_nonblank_type eq 'k'
4197 && $is_sort_map_grep{$last_nonblank_token} )
4199 # /^(sort|map|grep)$/ )
4201 $bond_str = NO_BREAK;
4204 # extrude.t: do not break before paren at:
4206 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4207 $bond_str = NO_BREAK;
4210 # OLD COMMENT: In older version of perl, use strict can cause
4211 # problems with breaks before bare words following opening parens.
4212 # For example, this will fail under older versions if a break is
4213 # made between '(' and 'MAIL':
4215 # use strict; open( MAIL, "a long filename or command"); close MAIL;
4217 # NEW COMMENT: Third fix for b1213:
4218 # This option does not seem to be needed any longer, and it can
4219 # cause instabilities. It can be turned off, but to minimize
4220 # changes to existing formatting it is retained only in the case
4221 # where the previous token was 'open' and there was no line break.
4222 # Even this could eventually be removed if it causes instability.
4223 if ( $type eq '{' ) {
4226 && $next_nonblank_type eq 'w'
4227 && $last_nonblank_type eq 'k'
4228 && $last_nonblank_token eq 'open'
4229 && !$old_breakpoint_to_go[$i] )
4231 $bond_str = NO_BREAK;
4235 # Do not break between a possible filehandle and a ? or / and do
4236 # not introduce a break after it if there is no blank
4238 elsif ( $type eq 'Z' ) {
4243 # if there is no blank and we do not want one. Examples:
4244 # print $x++ # do not break after $x
4245 # print HTML"HELLO" # break ok after HTML
4248 && defined( $want_left_space{$next_type} )
4249 && $want_left_space{$next_type} == WS_NO
4252 # or we might be followed by the start of a quote,
4253 # and this is not an existing breakpoint; fixes c039.
4254 || !$old_breakpoint_to_go[$i]
4255 && substr( $next_nonblank_token, 0, 1 ) eq '/'
4259 $bond_str = NO_BREAK;
4263 # Breaking before a ? before a quote can cause trouble if
4264 # they are not separated by a blank.
4265 # Example: a syntax error occurs if you break before the ? here
4266 # my$logic=join$all?' && ':' || ',@regexps;
4267 # From: Professional_Perl_Programming_Code/multifind.pl
4268 if ( $next_nonblank_type eq '?' ) {
4269 $bond_str = NO_BREAK
4270 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4273 # Breaking before a . followed by a number
4274 # can cause trouble if there is no intervening space
4275 # Example: a syntax error occurs if you break before the .2 here
4276 # $str .= pack($endian.2, ensurrogate($ord));
4277 # From: perl58/Unicode.pm
4278 elsif ( $next_nonblank_type eq '.' ) {
4279 $bond_str = NO_BREAK
4280 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4284 elsif ( $type eq 'w' ) {
4285 $bond_str = NO_BREAK
4286 if ( !$old_breakpoint_to_go[$i]
4287 && substr( $next_nonblank_token, 0, 1 ) eq '/' );
4290 $bond_str_2 = $bond_str if (DEBUG_BOND);
4292 #---------------------------------------------------------------
4293 # End of hardwired rules
4294 #---------------------------------------------------------------
4296 #---------------------------------------------------------------
4297 # Bond Strength Section 3:
4298 # Apply table rules. These have priority over the above
4300 #---------------------------------------------------------------
4302 my $tabulated_bond_str;
4304 my $rtype = $next_nonblank_type;
4305 if ( $seqno && $is_container_token{$token} ) {
4306 $ltype = $type . $token;
4309 if ( $next_nonblank_seqno
4310 && $is_container_token{$next_nonblank_token} )
4312 $rtype = $next_nonblank_type . $next_nonblank_token;
4314 # Alternate Fix #1 for issue b1299. This version makes the
4315 # decision as soon as possible. See Alternate Fix #2 also.
4316 # Do not separate a bareword identifier from its paren: b1299
4317 # This is currently needed for stability because if the bareword
4318 # gets separated from a preceding '->' and following '(' then
4319 # the tokenizer may switch from type 'i' to type 'w'. This
4320 # patch will prevent this by keeping it adjacent to its '('.
4321 ## if ( $next_nonblank_token eq '('
4323 ## && substr( $token, 0, 1 ) =~ /^\w$/ )
4329 # apply binary rules which apply regardless of space between tokens
4330 if ( $binary_bond_strength{$ltype}{$rtype} ) {
4331 $bond_str = $binary_bond_strength{$ltype}{$rtype};
4332 $tabulated_bond_str = $bond_str;
4335 # apply binary rules which apply only if no space between tokens
4336 if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4337 $bond_str = $binary_bond_strength{$ltype}{$next_type};
4338 $tabulated_bond_str = $bond_str;
4341 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4342 $bond_str = NO_BREAK;
4343 $tabulated_bond_str = $bond_str;
4346 $bond_str_3 = $bond_str if (DEBUG_BOND);
4348 # If the hardwired rules conflict with the tabulated bond
4349 # strength then there is an inconsistency that should be fixed
4351 && $tabulated_bond_str
4353 && $bond_str_1 != $bond_str_2
4354 && $bond_str_2 != $tabulated_bond_str
4357 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4360 #-----------------------------------------------------------------
4361 # Bond Strength Section 4:
4362 # Modify strengths of certain tokens which often occur in sequence
4363 # by adding a small bias to each one in turn so that the breaks
4364 # occur from left to right.
4366 # Note that we only changing strengths by small amounts here,
4367 # and usually increasing, so we should not be altering any NO_BREAKs.
4368 # Other routines which check for NO_BREAKs will use a tolerance
4369 # of one to avoid any problem.
4370 #-----------------------------------------------------------------
4372 # The bias tables use special keys:
4373 # $type - if not keyword
4374 # $token - if keyword, but map some keywords together
4376 $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4378 $next_nonblank_type eq 'k'
4379 ? $next_nonblank_token eq 'err'
4381 : $next_nonblank_token
4382 : $next_nonblank_type;
4384 if ( $type eq ',' ) {
4386 # add any bias set by sub break_lists at old comma break points
4387 $bond_str += $bond_strength_to_go[$i];
4392 elsif ( defined( $bias{$left_key} ) ) {
4393 if ( !$want_break_before{$left_key} ) {
4394 $bias{$left_key} += $delta_bias;
4395 $bond_str += $bias{$left_key};
4400 if ( defined( $bias{$right_key} ) ) {
4401 if ( $want_break_before{$right_key} ) {
4403 # for leading '.' align all but 'short' quotes; the idea
4404 # is to not place something like "\n" on a single line.
4405 if ( $right_key eq '.' ) {
4407 $last_nonblank_type eq '.'
4408 && ( $token_length <=
4409 $rOpts_short_concatenation_item_length )
4410 && ( !$is_closing_token{$token} )
4413 $bias{$right_key} += $delta_bias;
4417 $bias{$right_key} += $delta_bias;
4419 $bond_str += $bias{$right_key};
4423 $bond_str_4 = $bond_str if (DEBUG_BOND);
4425 #---------------------------------------------------------------
4426 # Bond Strength Section 5:
4427 # Fifth Approximation.
4428 # Take nesting depth into account by adding the nesting depth
4429 # to the bond strength.
4430 #---------------------------------------------------------------
4433 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
4434 if ( $total_nesting_depth > 0 ) {
4435 $strength = $bond_str + $total_nesting_depth;
4438 $strength = $bond_str;
4442 $strength = NO_BREAK;
4444 # For critical code such as lines with here targets we must
4445 # be absolutely sure that we do not allow a break. So for
4446 # these the nobreak flag exceeds 1 as a signal. Otherwise we
4447 # can run into trouble when small tolerances are added.
4448 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
4451 #---------------------------------------------------------------
4452 # Bond Strength Section 6:
4453 # Sixth Approximation. Welds.
4454 #---------------------------------------------------------------
4456 # Do not allow a break within welds
4457 if ( $total_weld_count && $seqno ) {
4458 my $KK = $K_to_go[$i];
4459 if ( $rK_weld_right->{$KK} ) {
4460 $strength = NO_BREAK;
4463 # But encourage breaking after opening welded tokens
4464 elsif ($rK_weld_left->{$KK}
4465 && $is_opening_token{$token} )
4471 # always break after side comment
4472 if ( $type eq '#' ) { $strength = 0 }
4474 $bond_strength_to_go[$i] = $strength;
4476 # Fix for case c001: be sure NO_BREAK's are enforced by later
4477 # routines, except at a '?' because '?' as quote delimiter is
4479 if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
4480 $nobreak_to_go[$i] ||= 1;
4484 my $str = substr( $token, 0, 15 );
4485 $str .= ' ' x ( 16 - length($str) );
4487 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
4489 # reset for next pass
4490 $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
4495 } ## end sub set_bond_strengths
4496 } ## end closure set_bond_strengths
4500 # See if a pattern will compile. We have to use a string eval here,
4501 # but it should be safe because the pattern has been constructed
4504 eval "'##'=~/$pattern/";
4508 { ## begin closure prepare_cuddled_block_types
4512 # Add keywords here which really should not be cuddled
4514 my @q = qw(if unless for foreach while);
4515 @no_cuddle{@q} = (1) x scalar(@q);
4518 sub prepare_cuddled_block_types {
4520 # the cuddled-else style, if used, is controlled by a hash that
4523 # Include keywords here which should not be cuddled
4525 my $cuddled_string = "";
4526 if ( $rOpts->{'cuddled-else'} ) {
4529 $cuddled_string = 'elsif else continue catch finally'
4530 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
4532 # This is the old equivalent but more complex version
4533 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
4535 # Add users other blocks to be cuddled
4536 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
4537 if ($cuddled_block_list) {
4538 $cuddled_string .= " " . $cuddled_block_list;
4543 # If we have a cuddled string of the form
4544 # 'try-catch-finally'
4546 # we want to prepare a hash of the form
4548 # $rcuddled_block_types = {
4555 # use -dcbl to dump this hash
4557 # Multiple such strings are input as a space or comma separated list
4559 # If we get two lists with the same leading type, such as
4560 # -cbl = "-try-catch-finally -try-catch-otherwise"
4561 # then they will get merged as follows:
4562 # $rcuddled_block_types = {
4569 # This will allow either type of chain to be followed.
4571 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
4572 my @cuddled_strings = split /\s+/, $cuddled_string;
4574 $rcuddled_block_types = {};
4576 # process each dash-separated string...
4577 my $string_count = 0;
4578 foreach my $string (@cuddled_strings) {
4579 next unless $string;
4580 my @words = split /-+/, $string; # allow multiple dashes
4582 # we could look for and report possible errors here...
4583 next unless ( @words > 0 );
4585 # allow either '-continue' or *-continue' for arbitrary starting type
4588 # a single word without dashes is a secondary block type
4590 $start = shift @words;
4593 # always make an entry for the leading word. If none follow, this
4594 # will still prevent a wildcard from matching this word.
4595 if ( !defined( $rcuddled_block_types->{$start} ) ) {
4596 $rcuddled_block_types->{$start} = {};
4599 # The count gives the original word order in case we ever want it.
4602 foreach my $word (@words) {
4604 if ( $no_cuddle{$word} ) {
4606 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
4611 $rcuddled_block_types->{$start}->{$word} =
4612 1; #"$string_count.$word_count";
4614 # git#9: Remove this word from the list of desired one-line
4616 $want_one_line_block{$word} = 0;
4621 } ## begin closure prepare_cuddled_block_types
4623 sub dump_cuddled_block_list {
4626 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
4627 # which controls this routine
4628 # my $rcuddled_block_types = {
4639 # SIMPLFIED METHOD: the simplified method uses a wildcard for
4640 # the starting block type and puts all cuddled blocks together:
4641 # my $rcuddled_block_types = {
4650 # Both methods work, but the simplified method has proven to be adequate and
4653 my $cuddled_string = $rOpts->{'cuddled-block-list'};
4654 $cuddled_string = '' unless $cuddled_string;
4657 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
4658 $flags .= " -cbl='$cuddled_string'";
4660 unless ( $rOpts->{'cuddled-else'} ) {
4661 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
4665 ------------------------------------------------------------------------
4666 Hash of cuddled block types prepared for a run with these parameters:
4668 ------------------------------------------------------------------------
4672 $fh->print( Dumper($rcuddled_block_types) );
4675 ------------------------------------------------------------------------
4680 sub make_static_block_comment_pattern {
4682 # create the pattern used to identify static block comments
4683 $static_block_comment_pattern = '^\s*##';
4685 # allow the user to change it
4686 if ( $rOpts->{'static-block-comment-prefix'} ) {
4687 my $prefix = $rOpts->{'static-block-comment-prefix'};
4688 $prefix =~ s/^\s*//;
4689 my $pattern = $prefix;
4691 # user may give leading caret to force matching left comments only
4692 if ( $prefix !~ /^\^#/ ) {
4693 if ( $prefix !~ /^#/ ) {
4695 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
4698 $pattern = '^\s*' . $prefix;
4700 if ( bad_pattern($pattern) ) {
4702 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
4705 $static_block_comment_pattern = $pattern;
4710 sub make_format_skipping_pattern {
4711 my ( $opt_name, $default ) = @_;
4712 my $param = $rOpts->{$opt_name};
4713 unless ($param) { $param = $default }
4715 if ( $param !~ /^#/ ) {
4716 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
4718 my $pattern = '^' . $param . '\s';
4719 if ( bad_pattern($pattern) ) {
4721 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
4727 sub make_non_indenting_brace_pattern {
4729 # Create the pattern used to identify static side comments.
4730 # Note that we are ending the pattern in a \s. This will allow
4731 # the pattern to be followed by a space and some text, or a newline.
4732 # The pattern is used in sub 'non_indenting_braces'
4733 $non_indenting_brace_pattern = '^#<<<\s';
4735 # allow the user to change it
4736 if ( $rOpts->{'non-indenting-brace-prefix'} ) {
4737 my $prefix = $rOpts->{'non-indenting-brace-prefix'};
4738 $prefix =~ s/^\s*//;
4739 if ( $prefix !~ /^#/ ) {
4740 Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
4742 my $pattern = '^' . $prefix . '\s';
4743 if ( bad_pattern($pattern) ) {
4745 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
4748 $non_indenting_brace_pattern = $pattern;
4753 sub make_closing_side_comment_list_pattern {
4755 # turn any input list into a regex for recognizing selected block types
4756 $closing_side_comment_list_pattern = '^\w+';
4757 if ( defined( $rOpts->{'closing-side-comment-list'} )
4758 && $rOpts->{'closing-side-comment-list'} )
4760 $closing_side_comment_list_pattern =
4761 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
4766 sub make_sub_matching_pattern {
4768 # Patterns for standardizing matches to block types for regular subs and
4769 # anonymous subs. Examples
4770 # 'sub process' is a named sub
4771 # 'sub ::m' is a named sub
4772 # 'sub' is an anonymous sub
4773 # 'sub:' is a label, not a sub
4774 # 'sub :' is a label, not a sub ( block type will be <sub:> )
4775 # sub'_ is a named sub ( block type will be <sub '_> )
4776 # 'substr' is a keyword
4777 # So note that named subs always have a space after 'sub'
4778 $SUB_PATTERN = '^sub\s'; # match normal sub
4779 $ASUB_PATTERN = '^sub$'; # match anonymous sub
4781 # Note (see also RT #133130): These patterns are used by
4782 # sub make_block_pattern, which is used for making most patterns.
4783 # So this sub needs to be called before other pattern-making routines.
4785 if ( $rOpts->{'sub-alias-list'} ) {
4787 # Note that any 'sub-alias-list' has been preprocessed to
4788 # be a trimmed, space-separated list which includes 'sub'
4789 # for example, it might be 'sub method fun'
4790 my $sub_alias_list = $rOpts->{'sub-alias-list'};
4791 $sub_alias_list =~ s/\s+/\|/g;
4792 $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4793 $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4798 sub make_bl_pattern {
4800 # Set defaults lists to retain historical default behavior for -bl:
4801 my $bl_list_string = '*';
4802 my $bl_exclusion_list_string = 'sort map grep eval asub';
4804 if ( defined( $rOpts->{'brace-left-list'} )
4805 && $rOpts->{'brace-left-list'} )
4807 $bl_list_string = $rOpts->{'brace-left-list'};
4809 if ( $bl_list_string =~ /\bsub\b/ ) {
4810 $rOpts->{'opening-sub-brace-on-new-line'} ||=
4811 $rOpts->{'opening-brace-on-new-line'};
4813 if ( $bl_list_string =~ /\basub\b/ ) {
4814 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4815 $rOpts->{'opening-brace-on-new-line'};
4818 $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
4820 # for -bl, a list with '*' turns on -sbl and -asbl
4821 if ( $bl_pattern =~ /\.\*/ ) {
4822 $rOpts->{'opening-sub-brace-on-new-line'} ||=
4823 $rOpts->{'opening-brace-on-new-line'};
4824 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4825 $rOpts->{'opening-anonymous-brace-on-new-line'};
4828 if ( defined( $rOpts->{'brace-left-exclusion-list'} )
4829 && $rOpts->{'brace-left-exclusion-list'} )
4831 $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
4832 if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
4833 $rOpts->{'opening-sub-brace-on-new-line'} = 0;
4835 if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
4836 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
4840 $bl_exclusion_pattern =
4841 make_block_pattern( '-blxl', $bl_exclusion_list_string );
4845 sub make_bli_pattern {
4847 # default list of block types for which -bli would apply
4848 my $bli_list_string = 'if else elsif unless while for foreach do : sub';
4849 my $bli_exclusion_list_string = ' ';
4851 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
4852 && $rOpts->{'brace-left-and-indent-list'} )
4854 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
4857 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
4859 if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
4860 && $rOpts->{'brace-left-and-indent-exclusion-list'} )
4862 $bli_exclusion_list_string =
4863 $rOpts->{'brace-left-and-indent-exclusion-list'};
4865 $bli_exclusion_pattern =
4866 make_block_pattern( '-blixl', $bli_exclusion_list_string );
4870 sub make_keyword_group_list_pattern {
4872 # turn any input list into a regex for recognizing selected block types.
4873 # Here are the defaults:
4874 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
4875 $keyword_group_list_comment_pattern = '';
4876 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
4877 && $rOpts->{'keyword-group-blanks-list'} )
4879 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
4882 foreach my $word (@words) {
4883 if ( $word =~ /^(BC|SBC)$/ ) {
4884 push @comment_list, $word;
4885 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
4888 push @keyword_list, $word;
4891 $keyword_group_list_pattern =
4892 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
4893 $keyword_group_list_comment_pattern =
4894 make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
4899 sub make_block_brace_vertical_tightness_pattern {
4901 # turn any input list into a regex for recognizing selected block types
4902 $block_brace_vertical_tightness_pattern =
4903 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4904 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
4905 && $rOpts->{'block-brace-vertical-tightness-list'} )
4907 $block_brace_vertical_tightness_pattern =
4908 make_block_pattern( '-bbvtl',
4909 $rOpts->{'block-brace-vertical-tightness-list'} );
4914 sub make_blank_line_pattern {
4916 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
4917 my $key = 'blank-lines-before-closing-block-list';
4918 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4919 $blank_lines_before_closing_block_pattern =
4920 make_block_pattern( '-blbcl', $rOpts->{$key} );
4923 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
4924 $key = 'blank-lines-after-opening-block-list';
4925 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4926 $blank_lines_after_opening_block_pattern =
4927 make_block_pattern( '-blaol', $rOpts->{$key} );
4932 sub make_block_pattern {
4934 # given a string of block-type keywords, return a regex to match them
4935 # The only tricky part is that labels are indicated with a single ':'
4936 # and the 'sub' token text may have additional text after it (name of
4941 # input string: "if else elsif unless while for foreach do : sub";
4942 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4946 # To distinguish between anonymous subs and named subs, use 'sub' to
4947 # indicate a named sub, and 'asub' to indicate an anonymous sub
4949 my ( $abbrev, $string ) = @_;
4950 my @list = split_words($string);
4954 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
4957 if ( $i eq 'sub' ) {
4959 elsif ( $i eq 'asub' ) {
4961 elsif ( $i eq ';' ) {
4964 elsif ( $i eq '{' ) {
4967 elsif ( $i eq ':' ) {
4968 push @words, '\w+:';
4970 elsif ( $i =~ /^\w/ ) {
4974 Warn("unrecognized block type $i after $abbrev, ignoring\n");
4978 # Fix 2 for c091, prevent the pattern from matching an empty string
4979 # '1 ' is an impossible block name.
4980 if ( !@words ) { push @words, "1 " }
4982 my $pattern = '(' . join( '|', @words ) . ')$';
4983 my $sub_patterns = "";
4984 if ( $seen{'sub'} ) {
4985 $sub_patterns .= '|' . $SUB_PATTERN;
4987 if ( $seen{'asub'} ) {
4988 $sub_patterns .= '|' . $ASUB_PATTERN;
4990 if ($sub_patterns) {
4991 $pattern = '(' . $pattern . $sub_patterns . ')';
4993 $pattern = '^' . $pattern;
4997 sub make_static_side_comment_pattern {
4999 # create the pattern used to identify static side comments
5000 $static_side_comment_pattern = '^##';
5002 # allow the user to change it
5003 if ( $rOpts->{'static-side-comment-prefix'} ) {
5004 my $prefix = $rOpts->{'static-side-comment-prefix'};
5005 $prefix =~ s/^\s*//;
5006 my $pattern = '^' . $prefix;
5007 if ( bad_pattern($pattern) ) {
5009 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5012 $static_side_comment_pattern = $pattern;
5017 sub make_closing_side_comment_prefix {
5019 # Be sure we have a valid closing side comment prefix
5020 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5021 my $csc_prefix_pattern;
5022 if ( !defined($csc_prefix) ) {
5023 $csc_prefix = '## end';
5024 $csc_prefix_pattern = '^##\s+end';
5027 my $test_csc_prefix = $csc_prefix;
5028 if ( $test_csc_prefix !~ /^#/ ) {
5029 $test_csc_prefix = '#' . $test_csc_prefix;
5032 # make a regex to recognize the prefix
5033 my $test_csc_prefix_pattern = $test_csc_prefix;
5035 # escape any special characters
5036 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5038 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5040 # allow exact number of intermediate spaces to vary
5041 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5043 # make sure we have a good pattern
5044 # if we fail this we probably have an error in escaping
5047 if ( bad_pattern($test_csc_prefix_pattern) ) {
5049 # shouldn't happen..must have screwed up escaping, above
5052 Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5056 # just warn and keep going with defaults
5058 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5060 Warn("Please consider using a simpler -cscp prefix\n");
5061 Warn("Using default -cscp instead; please check output\n");
5064 $csc_prefix = $test_csc_prefix;
5065 $csc_prefix_pattern = $test_csc_prefix_pattern;
5068 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5069 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5073 ##################################################
5074 # CODE SECTION 4: receive lines from the tokenizer
5075 ##################################################
5077 { ## begin closure write_line
5081 # Variables used by sub check_sequence_numbers:
5083 my %saw_opening_seqno;
5084 my %saw_closing_seqno;
5087 sub initialize_write_line {
5089 $nesting_depth = undef;
5091 $last_seqno = SEQ_ROOT;
5092 %saw_opening_seqno = ();
5093 %saw_closing_seqno = ();
5098 sub check_sequence_numbers {
5100 # Routine for checking sequence numbers. This only needs to be
5101 # done occasionally in DEVEL_MODE to be sure everything is working
5103 my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5104 my $jmax = @{$rtokens} - 1;
5105 return unless ( $jmax >= 0 );
5106 foreach my $j ( 0 .. $jmax ) {
5107 my $seqno = $rtype_sequence->[$j];
5108 my $token = $rtokens->[$j];
5109 my $type = $rtoken_type->[$j];
5111 "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5115 # Sequence numbers are generated for opening tokens, so every opening
5116 # token should be sequenced. Closing tokens will be unsequenced
5117 # if they do not have a matching opening token.
5118 if ( $is_opening_sequence_token{$token}
5124 $err_msg Unexpected opening token without sequence number
5131 # Save starting seqno to identify sequence method:
5132 # New method starts with 2 and has continuous numbering
5133 # Old method starts with >2 and may have gaps
5134 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
5136 if ( $is_opening_sequence_token{$token} ) {
5138 # New method should have continuous numbering
5139 if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5142 $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5146 $last_seqno = $seqno;
5148 # Numbers must be unique
5149 if ( $saw_opening_seqno{$seqno} ) {
5150 my $lno = $saw_opening_seqno{$seqno};
5153 $err_msg Already saw an opening tokens at line $lno with this sequence number
5157 $saw_opening_seqno{$seqno} = $input_line_no;
5160 # only one closing item per seqno
5161 elsif ( $is_closing_sequence_token{$token} ) {
5162 if ( $saw_closing_seqno{$seqno} ) {
5163 my $lno = $saw_closing_seqno{$seqno};
5166 $err_msg Already saw a closing token with this seqno at line $lno
5170 $saw_closing_seqno{$seqno} = $input_line_no;
5172 # Every closing seqno must have an opening seqno
5173 if ( !$saw_opening_seqno{$seqno} ) {
5176 $err_msg Saw a closing token but no opening token with this seqno
5182 # Sequenced items must be opening or closing
5186 $err_msg Unexpected token type with a sequence number
5197 # This routine receives lines one-by-one from the tokenizer and stores
5198 # them in a format suitable for further processing. After the last
5199 # line has been sent, the tokenizer will call sub 'finish_formatting'
5200 # to do the actual formatting.
5202 my ( $self, $line_of_tokens_old ) = @_;
5203 my $rLL = $self->[_rLL_];
5204 my $Klimit = $self->[_Klimit_];
5205 my $rlines_new = $self->[_rlines_];
5207 my $K_opening_container = $self->[_K_opening_container_];
5208 my $K_closing_container = $self->[_K_closing_container_];
5209 my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5210 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5211 my $rSS = $self->[_rSS_];
5212 my $Iss_opening = $self->[_Iss_opening_];
5213 my $Iss_closing = $self->[_Iss_closing_];
5216 my $line_of_tokens = {};
5221 _guessed_indentation_level
5227 _square_bracket_depth
5232 $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
5235 # Data needed by Logger
5236 $line_of_tokens->{_level_0} = 0;
5237 $line_of_tokens->{_ci_level_0} = 0;
5238 $line_of_tokens->{_nesting_blocks_0} = "";
5239 $line_of_tokens->{_nesting_tokens_0} = "";
5241 # Needed to avoid trimming quotes
5242 $line_of_tokens->{_ended_in_blank_token} = undef;
5244 my $line_type = $line_of_tokens_old->{_line_type};
5245 my $line_number = $line_of_tokens_old->{_line_number};
5249 # Handle line of non-code
5250 if ( $line_type ne 'CODE' ) {
5251 $tee_output ||= $rOpts_tee_pod
5252 && substr( $line_type, 0, 3 ) eq 'POD';
5255 # Handle line of code
5258 my $rtokens = $line_of_tokens_old->{_rtokens};
5259 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
5260 my $rblock_type = $line_of_tokens_old->{_rblock_type};
5261 my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
5262 my $rcontainer_environment =
5263 $line_of_tokens_old->{_rcontainer_environment};
5264 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
5265 my $rlevels = $line_of_tokens_old->{_rlevels};
5266 my $rslevels = $line_of_tokens_old->{_rslevels};
5267 my $rci_levels = $line_of_tokens_old->{_rci_levels};
5268 my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
5269 my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
5271 my $jmax = @{$rtokens} - 1;
5273 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5276 && check_sequence_numbers( $rtokens, $rtoken_type,
5277 $rtype_sequence, $line_number );
5279 # Find the starting nesting depth ...
5280 # It must be the value of variable 'level' of the first token
5281 # because the nesting depth is used as a token tag in the
5282 # vertical aligner and is compared to actual levels.
5283 # So vertical alignment problems will occur with any other
5285 if ( !defined($nesting_depth) ) {
5286 $nesting_depth = $rlevels->[0];
5287 $nesting_depth = 0 if ( $nesting_depth < 0 );
5288 $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
5291 foreach my $j ( 0 .. $jmax ) {
5293 # Do not clip the 'level' variable yet. We will do this
5294 # later, in sub 'store_token_to_go'. The reason is that in
5295 # files with level errors, the logic in 'weld_cuddled_else'
5296 # uses a stack logic that will give bad welds if we clip
5298 ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
5300 # Handle tokens with sequence numbers ...
5301 my $seqno = $rtype_sequence->[$j];
5303 my $token = $rtokens->[$j];
5305 if ( $is_opening_token{$token} ) {
5306 $K_opening_container->{$seqno} = @{$rLL};
5307 $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
5310 # Save a sequenced block type at its opening token.
5311 # Note that unsequenced block types can occur in
5312 # unbalanced code with errors but are ignored here.
5313 if ( $rblock_type->[$j] ) {
5314 my $block_type = $rblock_type->[$j];
5315 $rblock_type_of_seqno->{$seqno} = $block_type;
5316 if ( substr( $block_type, 0, 3 ) eq 'sub'
5317 || $rOpts_sub_alias_list )
5319 if ( $block_type =~ /$ASUB_PATTERN/ ) {
5320 $self->[_ris_asub_block_]->{$seqno} = 1;
5322 elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5323 $self->[_ris_sub_block_]->{$seqno} = 1;
5328 elsif ( $is_closing_token{$token} ) {
5330 # The opening depth should always be defined, and
5331 # it should equal $nesting_depth-1. To protect
5332 # against unforseen error conditions, however, we
5333 # will check this and fix things if necessary. For
5334 # a test case see issue c055.
5336 $rdepth_of_opening_seqno->[$seqno];
5337 if ( !defined($opening_depth) ) {
5338 $opening_depth = $nesting_depth - 1;
5339 $opening_depth = 0 if ( $opening_depth < 0 );
5340 $rdepth_of_opening_seqno->[$seqno] =
5343 # This is not fatal but should not happen. The
5344 # tokenizer generates sequence numbers
5345 # incrementally upon encountering each new
5346 # opening token, so every positive sequence
5347 # number should correspond to an opening token.
5350 No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
5354 $K_closing_container->{$seqno} = @{$rLL};
5355 $nesting_depth = $opening_depth;
5358 elsif ( $token eq '?' ) {
5360 elsif ( $token eq ':' ) {
5364 # The only sequenced types output by the tokenizer are
5365 # the opening & closing containers and the ternary
5366 # types. So we would only get here if the tokenizer has
5367 # been changed to mark some other tokens with sequence
5368 # numbers, or if an error has been introduced in a
5369 # hash such as %is_opening_container
5373 Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
5374 Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
5380 $Iss_opening->[$seqno] = @{$rSS};
5382 # For efficiency, we find the maximum level of
5383 # opening tokens of any type. The actual maximum
5384 # level will be that of their contents which is 1
5385 # greater. That will be fixed in sub
5386 # 'finish_formatting'.
5387 my $level = $rlevels->[$j];
5388 if ( $level > $self->[_maximum_level_] ) {
5389 $self->[_maximum_level_] = $level;
5390 $self->[_maximum_level_at_line_] = $line_number;
5393 else { $Iss_closing->[$seqno] = @{$rSS} }
5394 push @{$rSS}, $sign * $seqno;
5400 _TOKEN_, _TYPE_, _TYPE_SEQUENCE_,
5401 _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
5404 $rtokens->[$j], $rtoken_type->[$j],
5405 $seqno, $rlevels->[$j],
5406 $rci_levels->[$j], $line_number - 1,
5408 push @{$rLL}, \@tokary;
5409 } ## end foreach my $j ( 0 .. $jmax )
5411 $Klimit = @{$rLL} - 1;
5413 # Need to remember if we can trim the input line
5414 $line_of_tokens->{_ended_in_blank_token} =
5415 $rtoken_type->[$jmax] eq 'b';
5417 $line_of_tokens->{_level_0} = $rlevels->[0];
5418 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
5419 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
5420 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
5421 } ## end if ( $jmax >= 0 )
5424 $rOpts_tee_block_comments
5426 && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5429 $rOpts_tee_side_comments
5431 && $Klimit > $Kfirst
5432 && $rLL->[$Klimit]->[_TYPE_] eq '#';
5434 } ## end if ( $line_type eq 'CODE')
5436 # Finish storing line variables
5438 my $fh_tee = $self->[_fh_tee_];
5439 my $line_text = $line_of_tokens_old->{_line_text};
5440 $fh_tee->print($line_text) if ($fh_tee);
5443 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
5444 $line_of_tokens->{_code_type} = $CODE_type;
5445 $self->[_Klimit_] = $Klimit;
5447 push @{$rlines_new}, $line_of_tokens;
5450 } ## end closure write_line
5452 #############################################
5453 # CODE SECTION 5: Pre-process the entire file
5454 #############################################
5456 sub finish_formatting {
5458 my ( $self, $severe_error ) = @_;
5460 # The file has been tokenized and is ready to be formatted.
5461 # All of the relevant data is stored in $self, ready to go.
5463 # Check the maximum level. If it is extremely large we will give up and
5464 # output the file verbatim. Note that the actual maximum level is 1
5465 # greater than the saved value, so we fix that here.
5466 $self->[_maximum_level_] += 1;
5467 my $maximum_level = $self->[_maximum_level_];
5468 my $maximum_table_index = $#maximum_line_length_at_level;
5469 if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
5470 $severe_error ||= 1;
5472 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
5473 Something may be wrong; formatting will be skipped.
5477 # output file verbatim if severe error or no formatting requested
5478 if ( $severe_error || $rOpts->{notidy} ) {
5479 $self->dump_verbatim();
5484 # Update the 'save_logfile' flag based to include any tokenization errors.
5485 # We can save time by skipping logfile calls if it is not going to be saved.
5486 my $logger_object = $self->[_logger_object_];
5487 if ($logger_object) {
5488 $self->[_save_logfile_] = $logger_object->get_save_logfile();
5491 $self->set_CODE_type();
5493 # Verify that the line hash does not have any unknown keys.
5494 $self->check_line_hashes() if (DEVEL_MODE);
5496 # Make a pass through all tokens, adding or deleting any whitespace as
5497 # required. Also make any other changes, such as adding semicolons.
5498 # All token changes must be made here so that the token data structure
5499 # remains fixed for the rest of this iteration.
5500 $self->respace_tokens();
5502 $self->set_excluded_lp_containers();
5504 $self->find_multiline_qw();
5506 $self->keep_old_line_breaks();
5508 # Implement any welding needed for the -wn or -cb options
5509 $self->weld_containers();
5511 $self->collapsed_lengths()
5512 if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
5514 # Locate small nested blocks which should not be broken
5515 $self->mark_short_nested_blocks();
5517 $self->adjust_indentation_levels();
5519 # Verify that the main token array looks OK. If this ever causes a fault
5520 # then place similar checks before the sub calls above to localize the
5522 $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
5524 # Finishes formatting and write the result to the line sink.
5525 # Eventually this call should just change the 'rlines' data according to the
5526 # new line breaks and then return so that we can do an internal iteration
5527 # before continuing with the next stages of formatting.
5528 $self->process_all_lines();
5530 # A final routine to tie up any loose ends
5538 # This routine performs two tasks:
5540 # TASK 1: Examine each line of code and set a flag '$CODE_type' to describe
5541 # any special processing that it requires.
5543 # TASK 2: Delete side comments if requested.
5545 my $rLL = $self->[_rLL_];
5546 my $Klimit = $self->[_Klimit_];
5547 my $rlines = $self->[_rlines_];
5548 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5550 my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
5551 my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
5552 my $rOpts_static_block_comment_prefix =
5553 $rOpts->{'static-block-comment-prefix'};
5555 # Remember indexes of lines with side comments
5556 my @ix_side_comments;
5558 my $In_format_skipping_section = 0;
5559 my $Saw_VERSION_in_this_file = 0;
5560 my $has_side_comment = 0;
5561 my ( $Kfirst, $Klast );
5564 #------------------------------
5565 # TASK 1: Loop to set CODE_type
5566 #------------------------------
5568 # Possible CODE_types
5569 # 'VB' = Verbatim - line goes out verbatim (a quote)
5570 # 'FS' = Format Skipping - line goes out verbatim
5572 # 'HSC' = Hanging Side Comment - fix this hanging side comment
5573 # 'SBCX'= Static Block Comment Without Leading Space
5574 # 'SBC' = Static Block Comment
5575 # 'BC' = Block Comment - an ordinary full line comment
5576 # 'IO' = Indent Only - line goes out unchanged except for indentation
5577 # 'NIN' = No Internal Newlines - line does not get broken
5578 # 'VER' = VERSION statement
5579 # '' = ordinary line of code with no restructions
5582 foreach my $line_of_tokens ( @{$rlines} ) {
5584 my $input_line_no = $line_of_tokens->{_line_number};
5585 my $line_type = $line_of_tokens->{_line_type};
5587 my $Last_line_had_side_comment = $has_side_comment;
5588 if ($has_side_comment) {
5589 push @ix_side_comments, $ix_line - 1;
5591 $has_side_comment = 0;
5593 next unless ( $line_type eq 'CODE' );
5595 my $Klast_prev = $Klast;
5597 my $rK_range = $line_of_tokens->{_rK_range};
5598 ( $Kfirst, $Klast ) = @{$rK_range};
5600 my $last_CODE_type = $CODE_type;
5603 my $input_line = $line_of_tokens->{_line_text};
5604 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
5606 my $is_block_comment = 0;
5607 if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5608 if ( $jmax == 0 ) { $is_block_comment = 1; }
5609 else { $has_side_comment = 1 }
5612 # Write line verbatim if we are in a formatting skip section
5613 if ($In_format_skipping_section) {
5615 # Note: extra space appended to comment simplifies pattern matching
5619 # optional fast pre-check
5620 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
5621 || $rOpts_format_skipping_end )
5623 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
5624 /$format_skipping_pattern_end/
5627 $In_format_skipping_section = 0;
5628 write_logfile_entry(
5629 "Line $input_line_no: Exiting format-skipping section\n");
5635 # Check for a continued quote..
5636 if ( $line_of_tokens->{_starting_in_quote} ) {
5638 # A line which is entirely a quote or pattern must go out
5639 # verbatim. Note: the \n is contained in $input_line.
5641 if ( ( $input_line =~ "\t" ) ) {
5642 my $input_line_number = $line_of_tokens->{_line_number};
5643 $self->note_embedded_tab($input_line_number);
5650 # See if we are entering a formatting skip section
5654 # optional fast pre-check
5655 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
5656 || $rOpts_format_skipping_begin )
5658 && $rOpts_format_skipping
5659 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
5660 /$format_skipping_pattern_begin/
5663 $In_format_skipping_section = 1;
5664 write_logfile_entry(
5665 "Line $input_line_no: Entering format-skipping section\n");
5670 # ignore trailing blank tokens (they will get deleted later)
5671 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
5682 if ($is_block_comment) {
5684 # see if this is a static block comment (starts with ## by default)
5685 my $is_static_block_comment = 0;
5686 my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
5689 # optional fast pre-check
5691 substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
5692 || $rOpts_static_block_comment_prefix
5695 && $rOpts_static_block_comments
5696 && $input_line =~ /$static_block_comment_pattern/
5699 $is_static_block_comment = 1;
5702 # Check for comments which are line directives
5703 # Treat exactly as static block comments without leading space
5704 # reference: perlsyn, near end, section Plain Old Comments (Not!)
5705 # example: '# line 42 "new_filename.plx"'
5708 && $input_line =~ /^\# \s*
5710 (?:\s("?)([^"]+)\2)? \s*
5714 $is_static_block_comment = 1;
5717 # look for hanging side comment ...
5719 $Last_line_had_side_comment # last line had side comment
5720 && !$no_leading_space # there is some leading space
5722 $is_static_block_comment # do not make static comment hanging
5726 # continuing an existing HSC chain?
5727 if ( $last_CODE_type eq 'HSC' ) {
5728 $has_side_comment = 1;
5733 # starting a new HSC chain?
5736 $rOpts->{'hanging-side-comments'} # user is allowing
5737 # hanging side comments
5740 && ( defined($Klast_prev) && $Klast_prev > 1 )
5742 # and the previous side comment was not static (issue c070)
5744 $rOpts->{'static-side-comments'}
5745 && $rLL->[$Klast_prev]->[_TOKEN_] =~
5746 /$static_side_comment_pattern/
5752 # and it is not a closing side comment (issue c070).
5753 my $K_penult = $Klast_prev - 1;
5754 $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
5756 ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
5757 && $rLL->[$K_penult]->[_TYPE_] eq '}'
5758 && $rLL->[$Klast_prev]->[_TOKEN_] =~
5759 /$closing_side_comment_prefix_pattern/ );
5761 if ( !$follows_csc ) {
5762 $has_side_comment = 1;
5769 if ($is_static_block_comment) {
5770 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
5773 elsif ($Last_line_had_side_comment
5774 && !$rOpts_maximum_consecutive_blank_lines
5775 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
5777 # Emergency fix to keep a block comment from becoming a hanging
5778 # side comment. This fix is for the case that blank lines
5779 # cannot be inserted. There is related code in sub
5780 # 'process_line_of_CODE'
5781 $CODE_type = 'SBCX';
5790 # End of comments. Handle a line of normal code:
5792 if ($rOpts_indent_only) {
5797 if ( !$rOpts_add_newlines ) {
5802 # Patch needed for MakeMaker. Do not break a statement
5803 # in which $VERSION may be calculated. See MakeMaker.pm;
5804 # this is based on the coding in it.
5805 # The first line of a file that matches this will be eval'd:
5806 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
5808 # *VERSION = \'1.01';
5809 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
5810 # We will pass such a line straight through without breaking
5811 # it unless -npvl is used.
5813 # Patch for problem reported in RT #81866, where files
5814 # had been flattened into a single line and couldn't be
5815 # tidied without -npvl. There are two parts to this patch:
5816 # First, it is not done for a really long line (80 tokens for now).
5817 # Second, we will only allow up to one semicolon
5818 # before the VERSION. We need to allow at least one semicolon
5819 # for statements like this:
5820 # require Exporter; our $VERSION = $Exporter::VERSION;
5821 # where both statements must be on a single line for MakeMaker
5823 my $is_VERSION_statement = 0;
5824 if ( !$Saw_VERSION_in_this_file
5827 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
5829 $Saw_VERSION_in_this_file = 1;
5830 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
5832 # This code type has lower priority than others
5838 $line_of_tokens->{_code_type} = $CODE_type;
5841 if ($has_side_comment) {
5842 push @ix_side_comments, $ix_line;
5846 if ( !$rOpts_delete_side_comments
5847 && !$rOpts_delete_closing_side_comments );
5849 #-------------------------------------
5850 # TASK 2: Loop to delete side comments
5851 #-------------------------------------
5853 # Handle any requested side comment deletions. It is easier to get
5854 # this done here rather than farther down the pipeline because IO
5855 # lines take a different route, and because lines with deleted HSC
5856 # become BL lines. We have already handled any tee requests in sub
5857 # getline, so it is safe to delete side comments now.
5859 # Also, we can get this done efficiently here.
5861 foreach my $ix (@ix_side_comments) {
5862 my $line_of_tokens = $rlines->[$ix];
5863 my $line_type = $line_of_tokens->{_line_type};
5865 # This fault shouldn't happen because we only saved CODE lines with
5866 # side comments in the TASK 1 loop above.
5867 if ( $line_type ne 'CODE' ) {
5870 Hit unexpected line_type = '$line_type' while deleting side comments, should be 'CODE'
5876 my $CODE_type = $line_of_tokens->{_code_type};
5877 my $rK_range = $line_of_tokens->{_rK_range};
5878 my ( $Kfirst, $Klast ) = @{$rK_range};
5879 my $delete_side_comment =
5880 $rOpts_delete_side_comments
5882 && $rLL->[$Klast]->[_TYPE_] eq '#'
5883 && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
5885 || $CODE_type eq 'HSC'
5886 || $CODE_type eq 'IO'
5887 || $CODE_type eq 'NIN' );
5890 $rOpts_delete_closing_side_comments
5891 && !$delete_side_comment
5894 && $rLL->[$Klast]->[_TYPE_] eq '#'
5896 || $CODE_type eq 'HSC'
5897 || $CODE_type eq 'IO'
5898 || $CODE_type eq 'NIN' )
5901 my $token = $rLL->[$Klast]->[_TOKEN_];
5902 my $K_m = $Klast - 1;
5903 my $type_m = $rLL->[$K_m]->[_TYPE_];
5904 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
5905 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
5907 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
5909 && $token =~ /$closing_side_comment_prefix_pattern/
5910 && $block_type_m =~ /$closing_side_comment_list_pattern/ )
5912 $delete_side_comment = 1;
5915 } ## end if ( $rOpts_delete_closing_side_comments...)
5917 if ($delete_side_comment) {
5919 # We are actually just changing the side comment to a blank.
5920 # This may produce multiple blanks in a row, but sub respace_tokens
5921 # will check for this and fix it.
5922 $rLL->[$Klast]->[_TYPE_] = 'b';
5923 $rLL->[$Klast]->[_TOKEN_] = ' ';
5925 # The -io option outputs the line text, so we have to update
5926 # the line text so that the comment does not reappear.
5927 if ( $CODE_type eq 'IO' ) {
5929 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
5930 $line .= $rLL->[$KK]->[_TOKEN_];
5933 $line_of_tokens->{_line_text} = $line . "\n";
5936 # If we delete a hanging side comment the line becomes blank.
5937 if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
5946 my $rlines = $self->[_rlines_];
5947 foreach my $line ( @{$rlines} ) {
5948 my $input_line = $line->{_line_text};
5949 $self->write_unindented_line($input_line);
5958 my %is_nonlist_keyword;
5959 my %is_nonlist_type;
5960 my %is_special_check_type;
5962 my %is_unexpected_equals;
5966 # added 'U' to fix cases b1125 b1126 b1127
5968 @{wU}{@q} = (1) x scalar(@q);
5970 @q = qw(w i q Q G C Z);
5971 @{wiq}{@q} = (1) x scalar(@q);
5974 @{is_wit}{@q} = (1) x scalar(@q);
5977 @{is_sigil}{@q} = (1) x scalar(@q);
5979 # Parens following these keywords will not be marked as lists. Note that
5980 # 'for' is not included and is handled separately, by including 'f' in the
5981 # hash %is_counted_type, since it may or may not be a c-style for loop.
5982 @q = qw( if elsif unless and or );
5983 @is_nonlist_keyword{@q} = (1) x scalar(@q);
5985 # Parens following these types will not be marked as lists
5987 @is_nonlist_type{@q} = (1) x scalar(@q);
5990 @is_s_y_m_slash{@q} = (1) x scalar(@q);
5993 @is_unexpected_equals{@q} = (1) x scalar(@q);
5997 sub respace_tokens {
6000 return if $rOpts->{'indent-only'};
6002 # This routine is called once per file to do as much formatting as possible
6003 # before new line breaks are set.
6005 # This routine makes all necessary and possible changes to the tokenization
6006 # after the initial tokenization of the file. This is a tedious routine,
6007 # but basically it consists of inserting and deleting whitespace between
6008 # nonblank tokens according to the selected parameters. In a few cases
6009 # non-space characters are added, deleted or modified.
6011 # The goal of this routine is to create a new token array which only needs
6012 # the definition of new line breaks and padding to complete formatting. In
6013 # a few cases we have to cheat a little to achieve this goal. In
6014 # particular, we may not know if a semicolon will be needed, because it
6015 # depends on how the line breaks go. To handle this, we include the
6016 # semicolon as a 'phantom' which can be displayed as normal or as an empty
6019 # Method: The old tokens are copied one-by-one, with changes, from the old
6020 # linear storage array $rLL to a new array $rLL_new.
6022 my $rLL = $self->[_rLL_];
6023 my $Klimit_old = $self->[_Klimit_];
6024 my $rlines = $self->[_rlines_];
6025 my $length_function = $self->[_length_function_];
6026 my $is_encoded_data = $self->[_is_encoded_data_];
6028 my $rLL_new = []; # This is the new array
6030 my $Ktoken_vars; # the old K value of $rtoken_vars
6031 my ( $Kfirst_old, $Klast_old ); # Range of old line
6032 my $Klast_old_code; # K of last token if side comment
6033 my $Kmax = @{$rLL} - 1;
6038 # Set the whitespace flags, which indicate the token spacing preference.
6039 my $rwhitespace_flags = $self->set_whitespace_flags();
6041 # we will be setting token lengths as we go
6042 my $cumulative_length = 0;
6045 my %K_old_opening_by_seqno = (); # Note: old K index
6047 my $depth_next_max = 0;
6049 # Note that $K_opening_container and $K_closing_container have values
6050 # defined in sub get_line() for the previous K indexes. They were needed
6051 # in case option 'indent-only' was set, and we didn't get here. We no longer
6052 # need those and will eliminate them now to avoid any possible mixing of
6053 # old and new values.
6054 my $K_opening_container = $self->[_K_opening_container_] = {};
6055 my $K_closing_container = $self->[_K_closing_container_] = {};
6057 my $K_closing_ternary = $self->[_K_closing_ternary_];
6058 my $K_opening_ternary = $self->[_K_opening_ternary_];
6059 my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_];
6060 my $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
6061 my $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
6062 my $rhas_broken_list = $self->[_rhas_broken_list_];
6063 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
6064 my $rhas_code_block = $self->[_rhas_code_block_];
6065 my $rhas_list = $self->[_rhas_list_];
6066 my $rhas_ternary = $self->[_rhas_ternary_];
6067 my $ris_assigned_structure = $self->[_ris_assigned_structure_];
6068 my $ris_broken_container = $self->[_ris_broken_container_];
6069 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
6070 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
6071 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
6072 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
6073 my $roverride_cab3 = $self->[_roverride_cab3_];
6074 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
6075 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
6076 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6078 my $last_nonblank_code_type = ';';
6079 my $last_nonblank_code_token = ';';
6080 my $last_nonblank_block_type = '';
6081 my $last_last_nonblank_code_type = ';';
6082 my $last_last_nonblank_code_token = ';';
6084 my %K_first_here_doc_by_seqno;
6086 my $set_permanently_broken = sub {
6088 while ( defined($seqno) ) {
6089 $ris_permanently_broken->{$seqno} = 1;
6090 $seqno = $rparent_of_seqno->{$seqno};
6094 my $store_token = sub {
6097 # This will be the index of this item in the new array
6098 my $KK_new = @{$rLL_new};
6100 my $type = $item->[_TYPE_];
6101 my $is_blank = $type eq 'b';
6102 my $block_type = "";
6104 # Do not output consecutive blanks. This situation should have been
6105 # prevented earlier, but it is worth checking because later routines
6106 # make this assumption.
6107 if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
6111 # check for a sequenced item (i.e., container or ?/:)
6112 my $type_sequence = $item->[_TYPE_SEQUENCE_];
6113 my $token = $item->[_TOKEN_];
6114 if ($type_sequence) {
6116 if ( $is_opening_token{$token} ) {
6118 $K_opening_container->{$type_sequence} = $KK_new;
6119 $block_type = $rblock_type_of_seqno->{$type_sequence};
6121 # Fix for case b1100: Count a line ending in ', [' as having
6122 # a line-ending comma. Otherwise, these commas can be hidden
6123 # with something like --opening-square-bracket-right
6124 if ( $last_nonblank_code_type eq ','
6125 && $Ktoken_vars == $Klast_old_code
6126 && $Ktoken_vars > $Kfirst_old )
6128 $rlec_count_by_seqno->{$type_sequence}++;
6131 if ( $last_nonblank_code_type eq '='
6132 || $last_nonblank_code_type eq '=>' )
6134 $ris_assigned_structure->{$type_sequence} =
6135 $last_nonblank_code_type;
6138 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6139 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6140 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
6141 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
6142 $seqno_stack{$depth_next} = $type_sequence;
6143 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
6146 if ( $depth_next > $depth_next_max ) {
6147 $depth_next_max = $depth_next;
6150 elsif ( $is_closing_token{$token} ) {
6152 $K_closing_container->{$type_sequence} = $KK_new;
6153 $block_type = $rblock_type_of_seqno->{$type_sequence};
6155 # Do not include terminal commas in counts
6156 if ( $last_nonblank_code_type eq ','
6157 || $last_nonblank_code_type eq '=>' )
6159 my $seqno = $seqno_stack{ $depth_next - 1 };
6161 $rtype_count_by_seqno->{$seqno}
6162 ->{$last_nonblank_code_type}--;
6164 if ( $Ktoken_vars == $Kfirst_old
6165 && $last_nonblank_code_type eq ','
6166 && $rlec_count_by_seqno->{$seqno} )
6168 $rlec_count_by_seqno->{$seqno}--;
6173 # Update the stack...
6178 # For ternary, note parent but do not include as child
6179 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6180 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6181 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
6183 # These are not yet used but could be useful
6184 if ( $token eq '?' ) {
6185 $K_opening_ternary->{$type_sequence} = $KK_new;
6187 elsif ( $token eq ':' ) {
6188 $K_closing_ternary->{$type_sequence} = $KK_new;
6192 # We really shouldn't arrive here, just being cautious:
6193 # The only sequenced types output by the tokenizer are the
6194 # opening & closing containers and the ternary types. Each
6195 # of those was checked above. So we would only get here
6196 # if the tokenizer has been changed to mark some other
6197 # tokens with sequence numbers.
6199 my $type = $item->[_TYPE_];
6201 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
6208 # Find the length of this token. Later it may be adjusted if phantom
6209 # or ignoring side comment lengths.
6212 ? $length_function->($token)
6216 my $is_comment = $type eq '#';
6219 # trim comments if necessary
6220 my $ord = ord( substr( $token, -1, 1 ) );
6223 && ( $ord < ORD_PRINTABLE_MIN
6224 || $ord > ORD_PRINTABLE_MAX )
6225 && $token =~ s/\s+$//
6228 $token_length = $length_function->($token);
6229 $item->[_TOKEN_] = $token;
6232 # Mark length of side comments as just 1 if sc lengths are ignored
6233 if ( $rOpts_ignore_side_comment_lengths
6234 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
6238 my $seqno = $seqno_stack{ $depth_next - 1 };
6239 if ( defined($seqno)
6240 && !$ris_permanently_broken->{$seqno} )
6242 $set_permanently_broken->($seqno);
6246 $item->[_TOKEN_LENGTH_] = $token_length;
6248 # and update the cumulative length
6249 $cumulative_length += $token_length;
6251 # Save the length sum to just AFTER this token
6252 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
6254 if ( !$is_blank && !$is_comment ) {
6256 # Remember the most recent two non-blank, non-comment tokens.
6257 # NOTE: the phantom semicolon code may change the output stack
6258 # without updating these values. Phantom semicolons are considered
6259 # the same as blanks for now, but future needs might change that.
6260 # See the related note in sub '$add_phantom_semicolon'.
6261 $last_last_nonblank_code_type = $last_nonblank_code_type;
6262 $last_last_nonblank_code_token = $last_nonblank_code_token;
6264 $last_nonblank_code_type = $type;
6265 $last_nonblank_code_token = $token;
6266 $last_nonblank_block_type = $block_type;
6268 # count selected types
6269 if ( $is_counted_type{$type} ) {
6270 my $seqno = $seqno_stack{ $depth_next - 1 };
6271 if ( defined($seqno) ) {
6272 $rtype_count_by_seqno->{$seqno}->{$type}++;
6274 # Count line-ending commas for -bbx
6275 if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
6276 $rlec_count_by_seqno->{$seqno}++;
6279 # Remember index of first here doc target
6280 if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
6281 $K_first_here_doc_by_seqno{$seqno} = $KK_new;
6287 # For reference, here is how to get the parent sequence number.
6288 # This is not used because it is slower than finding it on the fly
6289 # in sub parent_seqno_by_K:
6291 # my $seqno_parent =
6292 # $type_sequence && $is_opening_token{$token}
6293 # ? $seqno_stack{ $depth_next - 2 }
6294 # : $seqno_stack{ $depth_next - 1 };
6295 # my $KK = @{$rLL_new};
6296 # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
6298 # and finally, add this item to the new array
6299 push @{$rLL_new}, $item;
6303 my $store_token_and_space = sub {
6304 my ( $item, $want_space ) = @_;
6306 # store a token with preceding space if requested and needed
6308 # First store the space
6311 && $rLL_new->[-1]->[_TYPE_] ne 'b'
6312 && $rOpts_add_whitespace )
6314 my $rcopy = [ @{$item} ];
6315 $rcopy->[_TYPE_] = 'b';
6316 $rcopy->[_TOKEN_] = ' ';
6317 $rcopy->[_TYPE_SEQUENCE_] = '';
6319 $rcopy->[_LINE_INDEX_] =
6320 $rLL_new->[-1]->[_LINE_INDEX_];
6322 # Patch 23-Jan-2021 to fix -lp blinkers:
6323 # The level and ci_level of newly created spaces should be the same
6324 # as the previous token. Otherwise the coding for the -lp option
6325 # can create a blinking state in some rare cases.
6327 $rLL_new->[-1]->[_LEVEL_];
6328 $rcopy->[_CI_LEVEL_] =
6329 $rLL_new->[-1]->[_CI_LEVEL_];
6331 $store_token->($rcopy);
6335 $store_token->($item);
6339 my $add_phantom_semicolon = sub {
6343 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6344 return unless ( defined($Kp) );
6346 # we are only adding semicolons for certain block types
6347 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
6348 return unless ($type_sequence);
6349 my $block_type = $rblock_type_of_seqno->{$type_sequence};
6350 return unless ($block_type);
6352 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
6353 || $block_type =~ /^(sub|package)/
6354 || $block_type =~ /^\w+\:$/ );
6356 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
6357 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
6358 my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6360 # Do not add a semicolon if...
6364 # it would follow a comment (and be isolated)
6367 # it follows a code block ( because they are not always wanted
6368 # there and may add clutter)
6369 || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
6371 # it would follow a label
6374 # it would be inside a 'format' statement (and cause syntax error)
6376 && $token_p =~ /format/ )
6380 # Do not add a semicolon if it would impede a weld with an immediately
6381 # following closing token...like this
6383 # ^--No semicolon can go here
6385 # look at the previous token... note use of the _NEW rLL array here,
6386 # but sequence numbers are invariant.
6387 my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6389 # If it is also a CLOSING token we have to look closer...
6392 && $is_closing_token{$token_p}
6394 # we only need to look if there is just one inner container..
6395 && defined( $rchildren_of_seqno->{$type_sequence} )
6396 && @{ $rchildren_of_seqno->{$type_sequence} } == 1
6400 # Go back and see if the corresponding two OPENING tokens are also
6401 # together. Note that we are using the OLD K indexing here:
6402 my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
6403 if ( defined($K_outer_opening) ) {
6404 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
6405 if ( defined($K_nxt) ) {
6406 my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
6408 # Is the next token after the outer opening the same as
6409 # our inner closing (i.e. same sequence number)?
6410 # If so, do not insert a semicolon here.
6411 return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
6416 # We will insert an empty semicolon here as a placeholder. Later, if
6417 # it becomes the last token on a line, we will bring it to life. The
6418 # advantage of doing this is that (1) we just have to check line
6419 # endings, and (2) the phantom semicolon has zero width and therefore
6420 # won't cause needless breaks of one-line blocks.
6422 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
6423 && $want_left_space{';'} == WS_NO )
6426 # convert the blank into a semicolon..
6427 # be careful: we are working on the new stack top
6428 # on a token which has been stored.
6429 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
6431 # Convert the existing blank to:
6432 # a phantom semicolon for one_line_block option = 0 or 1
6433 # a real semicolon for one_line_block option = 2
6436 if ( $rOpts_one_line_block_semicolons == 2 ) {
6441 $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
6442 $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
6443 $rLL_new->[$Ktop]->[_TYPE_] = ';';
6445 # NOTE: we are changing the output stack without updating variables
6446 # $last_nonblank_code_type, etc. Future needs might require that
6447 # those variables be updated here. For now, it seems ok to skip
6450 # Save list of new K indexes of phantom semicolons.
6451 # This will be needed if we want to undo them for iterations in
6453 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6455 # Then store a new blank
6456 $store_token->($rcopy);
6460 # Patch for issue c078: keep line indexes in order. If the top
6461 # token is a space that we are keeping (due to '-wls=';') then
6462 # we have to check that old line indexes stay in order.
6464 # instances in which side comments have been deleted and converted
6465 # into blanks, we may have filtered down multiple blanks into just
6466 # one. In that case the top blank may have a higher line number
6467 # than the previous nonblank token. Although the line indexes of
6468 # blanks are not really significant, we need to keep them in order
6469 # in order to pass error checks.
6470 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
6471 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
6472 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
6473 if ( $new_top_ix < $old_top_ix ) {
6474 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
6478 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
6479 $store_token->($rcopy);
6480 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6487 # Check that a quote looks okay
6488 # This sub works but needs to by sync'd with the log file output
6489 # before it can be used.
6490 my ( $KK, $Kfirst, $line_number ) = @_;
6491 my $token = $rLL->[$KK]->[_TOKEN_];
6492 $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
6494 # The remainder of this routine looks for something like
6495 # '$var = s/xxx/yyy/;'
6496 # in case it should have been '$var =~ s/xxx/yyy/;'
6498 # Start by looking for a token begining with one of: s y m / tr
6500 unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
6501 || substr( $token, 0, 2 ) eq 'tr' );
6503 # ... and preceded by one of: = == !=
6504 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6505 return unless ( defined($Kp) );
6506 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
6507 return unless ( $is_unexpected_equals{$previous_nonblank_type} );
6508 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
6510 my $previous_nonblank_type_2 = 'b';
6511 my $previous_nonblank_token_2 = "";
6512 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
6513 if ( defined($Kpp) ) {
6514 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
6515 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
6518 my $next_nonblank_token = "";
6520 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
6521 if ( $Kn <= $Kmax ) {
6522 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
6525 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
6526 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
6529 ##$token =~ /^(s|tr|y|m|\/)/
6530 ##&& $previous_nonblank_token =~ /^(=|==|!=)$/
6533 # preceded by simple scalar
6534 && $previous_nonblank_type_2 eq 'i'
6535 && $previous_nonblank_token_2 =~ /^\$/
6537 # followed by some kind of termination
6538 # (but give complaint if we can not see far enough ahead)
6539 && $next_nonblank_token =~ /^[; \)\}]$/
6541 # scalar is not declared
6542 && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
6545 my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
6546 my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
6548 "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
6554 #-------------------------------------------
6555 # Main loop to respace all lines of the file
6556 #-------------------------------------------
6559 foreach my $line_of_tokens ( @{$rlines} ) {
6561 my $input_line_number = $line_of_tokens->{_line_number};
6562 my $last_line_type = $line_type;
6563 $line_type = $line_of_tokens->{_line_type};
6564 next unless ( $line_type eq 'CODE' );
6565 my $last_CODE_type = $CODE_type;
6566 $CODE_type = $line_of_tokens->{_code_type};
6567 my $rK_range = $line_of_tokens->{_rK_range};
6568 my ( $Kfirst, $Klast ) = @{$rK_range};
6569 next unless defined($Kfirst);
6570 ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
6571 $Klast_old_code = $Klast_old;
6573 # Be sure an old K value is defined for sub $store_token
6574 $Ktoken_vars = $Kfirst;
6576 # Check for correct sequence of token indexes...
6577 # An error here means that sub write_line() did not correctly
6578 # package the tokenized lines as it received them. If we
6579 # get a fault here it has not output a continuous sequence
6580 # of K values. Or a line of CODE may have been mismarked as
6581 # something else. There is no good way to continue after such an
6583 # FIXME: Calling Fault will produce zero output; it would be best to
6584 # find a way to dump the input file.
6585 if ( defined($last_K_out) ) {
6586 if ( $Kfirst != $last_K_out + 1 ) {
6588 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
6594 # The first token should always have been given index 0 by sub
6596 if ( $Kfirst != 0 ) {
6597 Fault("Program Bug: first K is $Kfirst but should be 0");
6600 $last_K_out = $Klast;
6602 # Handle special lines of code
6603 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
6605 # CODE_types are as follows.
6607 # 'VB' = Verbatim - line goes out verbatim
6608 # 'FS' = Format Skipping - line goes out verbatim, no blanks
6609 # 'IO' = Indent Only - only indentation may be changed
6610 # 'NIN' = No Internal Newlines - line does not get broken
6611 # 'HSC'=Hanging Side Comment - fix this hanging side comment
6612 # 'BC'=Block Comment - an ordinary full line comment
6613 # 'SBC'=Static Block Comment - a block comment which does not get
6615 # 'SBCX'=Static Block Comment Without Leading Space
6616 # 'VER'=VERSION statement
6617 # '' or (undefined) - no restructions
6619 # For a hanging side comment we insert an empty quote before
6620 # the comment so that it becomes a normal side comment and
6621 # will be aligned by the vertical aligner
6622 if ( $CODE_type eq 'HSC' ) {
6624 # Safety Check: This must be a line with one token (a comment)
6625 my $rtoken_vars = $rLL->[$Kfirst];
6626 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
6628 # Note that even if the flag 'noadd-whitespace' is set, we
6629 # will make an exception here and allow a blank to be
6630 # inserted to push the comment to the right. We can think
6631 # of this as an adjustment of indentation rather than
6632 # whitespace between tokens. This will also prevent the
6633 # hanging side comment from getting converted to a block
6634 # comment if whitespace gets deleted, as for example with
6635 # the -extrude and -mangle options.
6636 my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
6637 $store_token->($rcopy);
6638 $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
6639 $store_token->($rcopy);
6640 $store_token->($rtoken_vars);
6645 # This line was mis-marked by sub scan_comment. Catch in
6646 # DEVEL_MODE, otherwise try to repair and keep going.
6648 "Program bug. A hanging side comment has been mismarked"
6652 $line_of_tokens->{_code_type} = $CODE_type;
6656 if ( $CODE_type eq 'BL' ) {
6657 my $seqno = $seqno_stack{ $depth_next - 1 };
6658 if ( defined($seqno)
6659 && !$ris_permanently_broken->{$seqno}
6660 && $rOpts_maximum_consecutive_blank_lines )
6662 $set_permanently_broken->($seqno);
6666 # Copy tokens unchanged
6667 foreach my $KK ( $Kfirst .. $Klast ) {
6669 $store_token->( $rLL->[$KK] );
6674 # Handle normal line..
6676 # Define index of last token before any side comment for comma counts
6677 my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
6678 if ( ( $type_end eq '#' || $type_end eq 'b' )
6679 && $Klast_old_code > $Kfirst_old )
6682 if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
6683 && $Klast_old_code > $Kfirst_old )
6689 # Insert any essential whitespace between lines
6690 # if last line was normal CODE.
6691 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
6692 # because comments may disappear.
6693 if ( $last_line_type eq 'CODE' ) {
6694 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
6695 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
6697 is_essential_whitespace(
6698 $last_last_nonblank_code_token,
6699 $last_last_nonblank_code_type,
6700 $last_nonblank_code_token,
6701 $last_nonblank_code_type,
6708 # Copy this first token as blank, but use previous line number
6709 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
6710 $rcopy->[_LINE_INDEX_] =
6711 $rLL_new->[-1]->[_LINE_INDEX_];
6713 # The level and ci_level of newly created spaces should be the
6714 # same as the previous token. Otherwise blinking states can
6715 # be created if the -lp mode is used. See similar coding in
6716 # sub 'store_token_and_space'. Fixes cases b1109 b1110.
6718 $rLL_new->[-1]->[_LEVEL_];
6719 $rcopy->[_CI_LEVEL_] =
6720 $rLL_new->[-1]->[_CI_LEVEL_];
6722 $store_token->($rcopy);
6726 #-------------------------------------------------------
6727 # Loop to copy all tokens on this line, with any changes
6728 #-------------------------------------------------------
6730 for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
6732 $rtoken_vars = $rLL->[$KK];
6733 my $token = $rtoken_vars->[_TOKEN_];
6734 my $type = $rtoken_vars->[_TYPE_];
6735 my $last_type_sequence = $type_sequence;
6736 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6738 # Handle a blank space ...
6739 if ( $type eq 'b' ) {
6741 # Delete it if not wanted by whitespace rules
6742 # or we are deleting all whitespace
6743 # Note that whitespace flag is a flag indicating whether a
6744 # white space BEFORE the token is needed
6745 next if ( $KK >= $Klast ); # skip terminal blank
6746 my $Knext = $KK + 1;
6748 if ($rOpts_freeze_whitespace) {
6749 $store_token->($rtoken_vars);
6753 my $ws = $rwhitespace_flags->[$Knext];
6755 || $rOpts_delete_old_whitespace )
6758 my $token_next = $rLL->[$Knext]->[_TOKEN_];
6759 my $type_next = $rLL->[$Knext]->[_TYPE_];
6761 my $do_not_delete = is_essential_whitespace(
6762 $last_last_nonblank_code_token,
6763 $last_last_nonblank_code_type,
6764 $last_nonblank_code_token,
6765 $last_nonblank_code_type,
6770 # Note that repeated blanks will get filtered out here
6771 next unless ($do_not_delete);
6774 # make it just one character
6775 $rtoken_vars->[_TOKEN_] = ' ';
6776 $store_token->($rtoken_vars);
6780 # Handle a nonblank token...
6782 if ($type_sequence) {
6784 # Insert a tentative missing semicolon if the next token is
6785 # a closing block brace
6790 # not preceded by a ';'
6791 && $last_nonblank_code_type ne ';'
6793 # and this is not a VERSION stmt (is all one line, we
6794 # are not inserting semicolons on one-line blocks)
6795 && $CODE_type ne 'VER'
6797 # and we are allowed to add semicolons
6798 && $rOpts->{'add-semicolons'}
6801 $add_phantom_semicolon->($KK);
6805 # Modify certain tokens here for whitespace
6806 # The following is not yet done, but could be:
6808 # ( $type =~ /^[wit]$/ )
6809 elsif ( $is_wit{$type} ) {
6811 # change '$ var' to '$var' etc
6812 # change '@ ' to '@'
6813 # Examples: <<snippets/space1.in>>
6814 my $ord = ord( substr( $token, 1, 1 ) );
6817 # quick test for possible blank at second char
6818 $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
6819 || $ord > ORD_PRINTABLE_MAX )
6822 my ( $sigil, $word ) = split /\s+/, $token, 2;
6824 # $sigil =~ /^[\$\&\%\*\@]$/ )
6825 if ( $is_sigil{$sigil} ) {
6827 $token .= $word if ( defined($word) ); # fix c104
6828 $rtoken_vars->[_TOKEN_] = $token;
6832 # Split identifiers with leading arrows, inserting blanks
6833 # if necessary. It is easier and safer here than in the
6834 # tokenizer. For example '->new' becomes two tokens, '->'
6835 # and 'new' with a possible blank between.
6837 # Note: there is a related patch in sub set_whitespace_flags
6838 elsif (length($token) > 2
6839 && substr( $token, 0, 2 ) eq '->'
6840 && $token =~ /^\-\>(.*)$/
6844 my $token_save = $1;
6845 my $type_save = $type;
6847 # Change '-> new' to '->new'
6848 $token_save =~ s/^\s+//g;
6850 # store a blank to left of arrow if necessary
6851 my $Kprev = $self->K_previous_nonblank($KK);
6852 if ( defined($Kprev)
6853 && $rLL->[$Kprev]->[_TYPE_] ne 'b'
6854 && $rOpts_add_whitespace
6855 && $want_left_space{'->'} == WS_YES )
6858 copy_token_as_type( $rtoken_vars, 'b', ' ' );
6859 $store_token->($rcopy);
6862 # then store the arrow
6863 my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
6864 $store_token->($rcopy);
6866 # store a blank after the arrow if requested
6867 # added for issue git #33
6868 if ( $want_right_space{'->'} == WS_YES ) {
6870 copy_token_as_type( $rtoken_vars, 'b', ' ' );
6871 $store_token->($rcopy);
6874 # then reset the current token to be the remainder,
6875 # and reset the whitespace flag according to the arrow
6876 $token = $rtoken_vars->[_TOKEN_] = $token_save;
6877 $type = $rtoken_vars->[_TYPE_] = $type_save;
6878 $store_token->($rtoken_vars);
6882 # Trim certain spaces in identifiers
6883 if ( $type eq 'i' ) {
6887 substr( $token, 0, 3 ) eq 'sub'
6888 || $rOpts_sub_alias_list
6890 && $token =~ /$SUB_PATTERN/
6894 # -spp = 0 : no space before opening prototype paren
6895 # -spp = 1 : stable (follow input spacing)
6896 # -spp = 2 : always space before opening prototype paren
6897 my $spp = $rOpts->{'space-prototype-paren'};
6898 if ( defined($spp) ) {
6899 if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
6900 elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
6903 # one space max, and no tabs
6904 $token =~ s/\s+/ /g;
6905 $rtoken_vars->[_TOKEN_] = $token;
6908 # clean up spaces in package identifiers, like
6909 # "package Bob::Dog;"
6910 elsif ( substr( $token, 0, 7 ) eq 'package'
6911 && $token =~ /^package\s/ )
6913 $token =~ s/\s+/ /g;
6914 $rtoken_vars->[_TOKEN_] = $token;
6917 # trim identifiers of trailing blanks which can occur
6918 # under some unusual circumstances, such as if the
6919 # identifier 'witch' has trailing blanks on input here:
6923 # () # prototype may be on new line ...
6925 my $ord = ord( substr( $token, -1, 1 ) );
6928 # quick check for possible ending space
6929 $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
6930 || $ord > ORD_PRINTABLE_MAX )
6933 $token =~ s/\s+$//g;
6934 $rtoken_vars->[_TOKEN_] = $token;
6940 elsif ( $type eq ';' ) {
6942 # Remove unnecessary semicolons, but not after bare
6943 # blocks, where it could be unsafe if the brace is
6946 $rOpts->{'delete-semicolons'}
6949 $last_nonblank_block_type
6950 && $last_nonblank_code_type eq '}'
6952 $is_block_without_semicolon{
6953 $last_nonblank_block_type}
6954 || $last_nonblank_block_type =~ /$SUB_PATTERN/
6955 || $last_nonblank_block_type =~ /^\w+:$/
6958 || $last_nonblank_code_type eq ';'
6963 # This looks like a deletable semicolon, but even if a
6964 # semicolon can be deleted it is not necessarily best to do
6965 # so. We apply these additional rules for deletion:
6966 # - Always ok to delete a ';' at the end of a line
6967 # - Never delete a ';' before a '#' because it would
6968 # promote it to a block comment.
6969 # - If a semicolon is not at the end of line, then only
6970 # delete if it is followed by another semicolon or closing
6971 # token. This includes the comment rule. It may take
6972 # two passes to get to a final state, but it is a little
6973 # safer. For example, keep the first semicolon here:
6974 # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
6975 # It is not required but adds some clarity.
6976 my $ok_to_delete = 1;
6977 if ( $KK < $Klast ) {
6978 my $Kn = $self->K_next_nonblank($KK);
6979 if ( defined($Kn) && $Kn <= $Klast ) {
6980 my $next_nonblank_token_type =
6981 $rLL->[$Kn]->[_TYPE_];
6982 $ok_to_delete = $next_nonblank_token_type eq ';'
6983 || $next_nonblank_token_type eq '}';
6987 # do not delete only nonblank token in a file
6989 my $Kp = $self->K_previous_code( undef, $rLL_new );
6990 my $Kn = $self->K_next_nonblank($KK);
6991 $ok_to_delete = defined($Kn) || defined($Kp);
6994 if ($ok_to_delete) {
6995 $self->note_deleted_semicolon($input_line_number);
6999 write_logfile_entry("Extra ';'\n");
7004 # Old patch to add space to something like "x10".
7005 # Note: This is now done in the Tokenizer, but this code remains
7007 elsif ( $type eq 'n' ) {
7008 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
7010 $rtoken_vars->[_TOKEN_] = $token;
7013 Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
7019 # check for a qw quote
7020 elsif ( $type eq 'q' ) {
7022 # trim blanks from right of qw quotes
7023 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
7026 $rtoken_vars->[_TOKEN_] = $token;
7027 $self->note_embedded_tab($input_line_number)
7028 if ( $token =~ "\t" );
7029 $store_token_and_space->(
7030 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
7033 } ## end if ( $type eq 'q' )
7035 # change 'LABEL :' to 'LABEL:'
7036 elsif ( $type eq 'J' ) {
7038 $rtoken_vars->[_TOKEN_] = $token;
7041 # check a quote for problems
7042 elsif ( $type eq 'Q' ) {
7043 $check_Q->( $KK, $Kfirst, $input_line_number );
7046 # Store this token with possible previous blank
7047 $store_token_and_space->(
7048 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
7054 # Walk backwards through the tokens, making forward links to sequence items.
7055 if ( @{$rLL_new} ) {
7057 for ( my $KK = @{$rLL_new} - 1 ; $KK >= 0 ; $KK-- ) {
7058 $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
7059 if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
7061 $self->[_K_first_seq_item_] = $KNEXT;
7064 # Find and remember lists by sequence number
7065 foreach my $seqno ( keys %{$K_opening_container} ) {
7066 my $K_opening = $K_opening_container->{$seqno};
7067 next unless defined($K_opening);
7069 # code errors may leave undefined closing tokens
7070 my $K_closing = $K_closing_container->{$seqno};
7071 next unless defined($K_closing);
7073 my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
7074 my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
7075 my $line_diff = $lx_close - $lx_open;
7076 $ris_broken_container->{$seqno} = $line_diff;
7078 # See if this is a list
7080 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7082 my $comma_count = $rtype_count->{','};
7083 my $fat_comma_count = $rtype_count->{'=>'};
7084 my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
7086 # We will define a list to be a container with one or more commas
7087 # and no semicolons. Note that we have included the semicolons
7088 # in a 'for' container in the simicolon count to keep c-style for
7089 # statements from being formatted as lists.
7090 if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
7093 # We need to do one more check for a perenthesized list:
7094 # At an opening paren following certain tokens, such as 'if',
7095 # we do not want to format the contents as a list.
7096 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
7097 my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
7098 if ( defined($Kp) ) {
7099 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7100 if ( $type_p eq 'k' ) {
7101 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
7102 $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
7105 $is_list = 0 if ( $is_nonlist_type{$type_p} );
7112 # Look for a block brace marked as uncertain. If the tokenizer thinks
7113 # its guess is uncertain for the type of a brace following an unknown
7114 # bareword then it adds a trailing space as a signal. We can fix the
7115 # type here now that we have had a better look at the contents of the
7116 # container. This fixes case b1085. To find the corresponding code in
7117 # Tokenizer.pm search for 'b1085' with an editor.
7118 my $block_type = $rblock_type_of_seqno->{$seqno};
7119 if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
7121 # Always remove the trailing space
7122 $block_type =~ s/\s+$//;
7124 # Try to filter out parenless sub calls
7125 my ( $Knn1, $Knn2 );
7126 my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
7127 $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
7128 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
7129 $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
7130 $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
7132 # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
7133 if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
7137 # Convert to a hash brace if it looks like it holds a list
7142 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
7143 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
7146 $rblock_type_of_seqno->{$seqno} = $block_type;
7149 # Handle a list container
7150 if ( $is_list && !$block_type ) {
7151 $ris_list_by_seqno->{$seqno} = $seqno;
7152 my $seqno_parent = $rparent_of_seqno->{$seqno};
7154 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7157 # for $rhas_list we need to save the minimum depth
7158 if ( !$rhas_list->{$seqno_parent}
7159 || $rhas_list->{$seqno_parent} > $depth )
7161 $rhas_list->{$seqno_parent} = $depth;
7165 $rhas_broken_list->{$seqno_parent} = 1;
7167 # Patch1: We need to mark broken lists with non-terminal
7168 # line-ending commas for the -bbx=2 parameter. This insures
7169 # that the list will stay broken. Otherwise the flag
7170 # -bbx=2 can be unstable. This fixes case b789 and b938.
7172 # Patch2: Updated to also require either one fat comma or
7173 # one more line-ending comma. Fixes cases b1069 b1070
7176 $rlec_count_by_seqno->{$seqno}
7177 && ( $rlec_count_by_seqno->{$seqno} > 1
7178 || $rtype_count_by_seqno->{$seqno}->{'=>'} )
7181 $rhas_broken_list_with_lec->{$seqno_parent} = 1;
7184 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7188 # Handle code blocks ...
7189 # The -lp option needs to know if a container holds a code block
7190 elsif ( $block_type && $rOpts_line_up_parentheses ) {
7191 my $seqno_parent = $rparent_of_seqno->{$seqno};
7192 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7193 $rhas_code_block->{$seqno_parent} = 1;
7194 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
7195 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7200 # Find containers with ternaries, needed for -lp formatting.
7201 foreach my $seqno ( keys %{$K_opening_ternary} ) {
7202 my $seqno_parent = $rparent_of_seqno->{$seqno};
7203 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7204 $rhas_ternary->{$seqno_parent} = 1;
7205 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7209 # Turn off -lp for containers with here-docs with text within a container,
7210 # since they have their own fixed indentation. Fixes case b1081.
7211 if ($rOpts_line_up_parentheses) {
7212 foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
7213 my $Kh = $K_first_here_doc_by_seqno{$seqno};
7214 my $Kc = $K_closing_container->{$seqno};
7215 my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
7216 my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
7217 next if ( $line_Kh == $line_Kc );
7218 $ris_excluded_lp_container->{$seqno} = 1;
7222 # Set a flag to turn off -cab=3 in complex structures. Otherwise,
7223 # instability can occur. When it is overridden the behavior of the closest
7224 # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
7225 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
7226 foreach my $seqno ( keys %{$K_opening_container} ) {
7228 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7229 next unless ( $rtype_count && $rtype_count->{'=>'} );
7231 # override -cab=3 if this contains a sub-list
7232 if ( $rhas_list->{$seqno} ) {
7233 $roverride_cab3->{$seqno} = 1;
7236 # or if this is a sub-list of its parent container
7238 my $seqno_parent = $rparent_of_seqno->{$seqno};
7239 if ( defined($seqno_parent)
7240 && $ris_list_by_seqno->{$seqno_parent} )
7242 $roverride_cab3->{$seqno} = 1;
7248 # Reset memory to be the new array
7249 $self->[_rLL_] = $rLL_new;
7251 if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
7252 $self->[_Klimit_] = $Klimit;
7254 # During development, verify that the new array still looks okay.
7255 DEVEL_MODE && $self->check_token_array();
7257 # reset the token limits of each line
7258 $self->resync_lines_and_tokens();
7263 sub copy_token_as_type {
7265 # This provides a quick way to create a new token by
7266 # slightly modifying an existing token.
7267 my ( $rold_token, $type, $token ) = @_;
7268 if ( $type eq 'b' ) {
7269 $token = " " unless defined($token);
7271 elsif ( $type eq 'q' ) {
7272 $token = '' unless defined($token);
7274 elsif ( $type eq '->' ) {
7275 $token = '->' unless defined($token);
7277 elsif ( $type eq ';' ) {
7278 $token = ';' unless defined($token);
7282 # Unexpected type ... this sub will work as long as both $token and
7283 # $type are defined, but we should catch any unexpected types during
7287 sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
7295 my @rnew_token = @{$rold_token};
7296 $rnew_token[_TYPE_] = $type;
7297 $rnew_token[_TOKEN_] = $token;
7298 $rnew_token[_TYPE_SEQUENCE_] = '';
7299 return \@rnew_token;
7302 sub Debug_dump_tokens {
7304 # a debug routine, not normally used
7305 my ( $self, $msg ) = @_;
7306 my $rLL = $self->[_rLL_];
7307 my $nvars = @{$rLL};
7308 print STDERR "$msg\n";
7309 print STDERR "ntokens=$nvars\n";
7310 print STDERR "K\t_TOKEN_\t_TYPE_\n";
7313 foreach my $item ( @{$rLL} ) {
7314 print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
7321 my ( $self, $KK, $rLL ) = @_;
7323 # return the index K of the next nonblank, non-comment token
7324 return unless ( defined($KK) && $KK >= 0 );
7326 # use the standard array unless given otherwise
7327 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7330 while ( $Knnb < $Num ) {
7331 if ( !defined( $rLL->[$Knnb] ) ) {
7333 # We seem to have encountered a gap in our array.
7334 # This shouldn't happen because sub write_line() pushed
7335 # items into the $rLL array.
7336 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7339 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
7340 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
7349 sub K_next_nonblank {
7350 my ( $self, $KK, $rLL ) = @_;
7352 # return the index K of the next nonblank token, or
7353 # return undef if none
7354 return unless ( defined($KK) && $KK >= 0 );
7356 # The third arg allows this routine to be used on any array. This is
7357 # useful in sub respace_tokens when we are copying tokens from an old $rLL
7358 # to a new $rLL array. But usually the third arg will not be given and we
7359 # will just use the $rLL array in $self.
7360 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7363 return unless ( $Knnb < $Num );
7364 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7365 return unless ( ++$Knnb < $Num );
7366 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7368 # Backup loop. Very unlikely to get here; it means we have neighboring
7369 # blanks in the token stream.
7371 while ( $Knnb < $Num ) {
7373 # Safety check, this fault shouldn't happen: The $rLL array is the
7374 # main array of tokens, so all entries should be used. It is
7375 # initialized in sub write_line, and then re-initialized by sub
7376 # $store_token() within sub respace_tokens. Tokens are pushed on
7377 # so there shouldn't be any gaps.
7378 if ( !defined( $rLL->[$Knnb] ) ) {
7379 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7382 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
7388 sub K_previous_code {
7390 # return the index K of the previous nonblank, non-comment token
7391 # Call with $KK=undef to start search at the top of the array
7392 my ( $self, $KK, $rLL ) = @_;
7394 # use the standard array unless given otherwise
7395 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7397 if ( !defined($KK) ) { $KK = $Num }
7398 elsif ( $KK > $Num ) {
7400 # This fault can be caused by a programming error in which a bad $KK is
7401 # given. The caller should make the first call with KK_new=undef to
7404 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7409 while ( $Kpnb >= 0 ) {
7410 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
7411 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
7420 sub K_previous_nonblank {
7422 # return index of previous nonblank token before item K;
7423 # Call with $KK=undef to start search at the top of the array
7424 my ( $self, $KK, $rLL ) = @_;
7426 # use the standard array unless given otherwise
7427 $rLL = $self->[_rLL_] unless ( defined($rLL) );
7429 if ( !defined($KK) ) { $KK = $Num }
7430 elsif ( $KK > $Num ) {
7432 # This fault can be caused by a programming error in which a bad $KK is
7433 # given. The caller should make the first call with KK_new=undef to
7436 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7441 return unless ( $Kpnb >= 0 );
7442 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7443 return unless ( --$Kpnb >= 0 );
7444 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7446 # Backup loop. We should not get here unless some routine
7447 # slipped repeated blanks into the token stream.
7448 return unless ( --$Kpnb >= 0 );
7449 while ( $Kpnb >= 0 ) {
7450 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
7456 sub parent_seqno_by_K {
7458 # Return the sequence number of the parent container of token K, if any.
7460 my ( $self, $KK ) = @_;
7461 my $rLL = $self->[_rLL_];
7463 # The task is to jump forward to the next container token
7464 # and use the sequence number of either it or its parent.
7466 # For example, consider the following with seqno=5 of the '[' and ']'
7467 # being called with index K of the first token of each line:
7472 # sub { 99 }, 'do {&{%s} for 1,2}', # 5
7473 # '(&{})(&{})', undef, # 5
7474 # [ 2, 2, 0 ], 0 # 5
7477 # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
7478 # unbalanced files, last sequence number will either be undefined or it may
7479 # be at a deeper level. In either case we will just return SEQ_ROOT to
7480 # have a defined value and allow formatting to proceed.
7481 my $parent_seqno = SEQ_ROOT;
7482 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7483 if ($type_sequence) {
7484 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7487 my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
7488 if ( defined($Kt) ) {
7489 $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
7490 my $type = $rLL->[$Kt]->[_TYPE_];
7492 # if next container token is closing, it is the parent seqno
7493 if ( $is_closing_type{$type} ) {
7494 $parent_seqno = $type_sequence;
7497 # otherwise we want its parent container
7499 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7503 $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
7504 return $parent_seqno;
7507 sub is_in_block_by_i {
7508 my ( $self, $i ) = @_;
7511 # token at i is contained in a BLOCK
7512 # or is at root level
7513 # or there is some kind of error (i.e. unbalanced file)
7514 # returns false otherwise
7515 return 1 if ( $i < 0 ); # shouldn't happen, bad call
7516 my $seqno = $parent_seqno_to_go[$i];
7517 return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
7518 return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
7522 sub is_in_list_by_i {
7523 my ( $self, $i ) = @_;
7525 # returns true if token at i is contained in a LIST
7526 # returns false otherwise
7527 my $seqno = $parent_seqno_to_go[$i];
7528 return unless ( $seqno && $seqno ne SEQ_ROOT );
7529 if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
7537 # Return true if token K is in a list
7538 my ( $self, $KK ) = @_;
7540 my $parent_seqno = $self->parent_seqno_by_K($KK);
7541 return unless defined($parent_seqno);
7542 return $self->[_ris_list_by_seqno_]->{$parent_seqno};
7545 sub is_list_by_seqno {
7547 # Return true if the immediate contents of a container appears to be a
7549 my ( $self, $seqno ) = @_;
7550 return unless defined($seqno);
7551 return $self->[_ris_list_by_seqno_]->{$seqno};
7554 sub resync_lines_and_tokens {
7557 my $rLL = $self->[_rLL_];
7558 my $Klimit = $self->[_Klimit_];
7559 my $rlines = $self->[_rlines_];
7560 my @Krange_code_without_comments;
7561 my @Klast_valign_code;
7563 # Re-construct the arrays of tokens associated with the original input lines
7564 # since they have probably changed due to inserting and deleting blanks
7565 # and a few other tokens.
7567 # This is the next token and its line index:
7569 my $Kmax = defined($Klimit) ? $Klimit : -1;
7571 # Verify that old line indexes are in still order. If this error occurs,
7572 # check locations where sub 'respace_tokens' creates new tokens (like
7573 # blank spaces). It must have set a bad old line index.
7574 if ( DEVEL_MODE && defined($Klimit) ) {
7575 my $iline = $rLL->[0]->[_LINE_INDEX_];
7576 for ( my $KK = 1 ; $KK <= $Klimit ; $KK++ ) {
7577 my $iline_last = $iline;
7578 $iline = $rLL->[$KK]->[_LINE_INDEX_];
7579 if ( $iline < $iline_last ) {
7581 my $token_m = $rLL->[$KK_m]->[_TOKEN_];
7582 my $token = $rLL->[$KK]->[_TOKEN_];
7583 my $type_m = $rLL->[$KK_m]->[_TYPE_];
7584 my $type = $rLL->[$KK]->[_TYPE_];
7586 Line indexes out of order at index K=$KK:
7587 at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
7588 at KK =$KK: old line=$iline, type='$type', token='$token',
7595 foreach my $line_of_tokens ( @{$rlines} ) {
7597 my $line_type = $line_of_tokens->{_line_type};
7598 if ( $line_type eq 'CODE' ) {
7600 # Get the old number of tokens on this line
7601 my $rK_range_old = $line_of_tokens->{_rK_range};
7602 my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
7604 if ( defined($Kfirst_old) ) {
7605 $Kdiff_old = $Klast_old - $Kfirst_old;
7608 # Find the range of NEW K indexes for the line:
7609 # $Kfirst = index of first token on line
7610 # $Klast = index of last token on line
7611 my ( $Kfirst, $Klast );
7613 my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
7615 # Optimization: Although the actual K indexes may be completely
7616 # changed after respacing, the number of tokens on any given line
7617 # will often be nearly unchanged. So we will see if we can start
7618 # our search by guessing that the new line has the same number
7619 # of tokens as the old line.
7620 my $Knext_guess = $Knext + $Kdiff_old;
7621 if ( $Knext_guess > $Knext
7622 && $Knext_guess < $Kmax
7623 && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
7626 # the guess is good, so we can start our search here
7627 $Knext = $Knext_guess + 1;
7630 while ($Knext <= $Kmax
7631 && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
7636 if ( $Knext > $Knext_beg ) {
7638 $Klast = $Knext - 1;
7640 # Delete any terminal blank token
7641 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
7643 if ( $Klast < $Knext_beg ) {
7648 $Kfirst = $Knext_beg;
7650 # Save ranges of non-comment code. This will be used by
7651 # sub keep_old_line_breaks.
7652 if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
7653 push @Krange_code_without_comments, [ $Kfirst, $Klast ];
7656 # Only save ending K indexes of code types which are blank
7657 # or 'VER'. These will be used for a convergence check.
7658 # See related code in sub 'convey_batch_to_vertical_aligner'
7659 my $CODE_type = $line_of_tokens->{_code_type};
7661 || $CODE_type eq 'VER' )
7663 push @Klast_valign_code, $Klast;
7668 # It is only safe to trim the actual line text if the input
7669 # line had a terminal blank token. Otherwise, we may be
7671 if ( $line_of_tokens->{_ended_in_blank_token} ) {
7672 $line_of_tokens->{_line_text} =~ s/\s+$//;
7674 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
7676 # Deleting semicolons can create new empty code lines
7677 # which should be marked as blank
7678 if ( !defined($Kfirst) ) {
7679 my $CODE_type = $line_of_tokens->{_code_type};
7680 if ( !$CODE_type ) {
7681 $line_of_tokens->{_code_type} = 'BL';
7687 # There shouldn't be any nodes beyond the last one. This routine is
7688 # relinking lines and tokens after the tokens have been respaced. A fault
7689 # here indicates some kind of bug has been introduced into the above loops.
7690 # There is not good way to keep going; we better stop here.
7691 # FIXME: This will produce zero output. it would be best to find a way to
7692 # dump the input file.
7693 if ( $Knext <= $Kmax ) {
7695 Fault("unexpected tokens at end of file when reconstructing lines");
7697 $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
7699 # Setup the convergence test in the FileWriter based on line-ending indexes
7700 my $file_writer_object = $self->[_file_writer_object_];
7701 $file_writer_object->setup_convergence_test( \@Klast_valign_code );
7703 # Mark essential old breakpoints if combination -iob -lp is used. These
7704 # two options do not work well together, but we can avoid turning -iob off
7705 # by ignoring -iob at certain essential line breaks.
7706 # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
7707 if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
7708 my %is_assignment_or_fat_comma = %is_assignment;
7709 $is_assignment_or_fat_comma{'=>'} = 1;
7710 my $ris_essential_old_breakpoint =
7711 $self->[_ris_essential_old_breakpoint_];
7713 my ( $Kfirst, $Klast );
7714 foreach my $line_of_tokens ( @{$rlines} ) {
7716 my $line_type = $line_of_tokens->{_line_type};
7717 if ( $line_type ne 'CODE' ) {
7718 ( $Kfirst, $Klast ) = ( undef, undef );
7721 my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
7722 ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
7724 next unless defined($Klast_prev);
7725 next unless defined($Kfirst);
7726 my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
7727 my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
7729 unless ( $is_assignment_or_fat_comma{$type_last}
7730 || $is_assignment_or_fat_comma{$type_first} );
7731 $ris_essential_old_breakpoint->{$Klast_prev} = 1;
7737 sub keep_old_line_breaks {
7739 # Called once per file to find and mark any old line breaks which
7740 # should be kept. We will be translating the input hashes into
7743 # A flag is set as follows:
7744 # = 1 make a hard break (flush the current batch)
7745 # best for something like leading commas (-kbb=',')
7746 # = 2 make a soft break (keep building current batch)
7747 # best for something like leading ->
7751 my $rLL = $self->[_rLL_];
7752 my $rKrange_code_without_comments =
7753 $self->[_rKrange_code_without_comments_];
7754 my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
7755 my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
7756 my $rwant_container_open = $self->[_rwant_container_open_];
7757 my $K_opening_container = $self->[_K_opening_container_];
7758 my $ris_broken_container = $self->[_ris_broken_container_];
7759 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
7761 # This code moved here from sub break_lists to fix b1120
7762 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
7763 foreach my $item ( @{$rKrange_code_without_comments} ) {
7764 my ( $Kfirst, $Klast ) = @{$item};
7765 my $type = $rLL->[$Kfirst]->[_TYPE_];
7766 my $token = $rLL->[$Kfirst]->[_TOKEN_];
7768 # leading '->' use a value of 2 which causes a soft
7769 # break rather than a hard break
7770 if ( $type eq '->' ) {
7771 $rbreak_before_Kfirst->{$Kfirst} = 2;
7774 # leading ')->' use a special flag to insure that both
7775 # opening and closing parens get opened
7776 # Fix for b1120: only for parens, not braces
7777 elsif ( $token eq ')' ) {
7778 my $Kn = $self->K_next_nonblank($Kfirst);
7780 unless ( defined($Kn)
7782 && $rLL->[$Kn]->[_TYPE_] eq '->' );
7783 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
7784 next unless ($seqno);
7786 # Note: in previous versions there was a fix here to avoid
7787 # instability between conflicting -bom and -pvt or -pvtc flags.
7788 # The fix skipped -bom for a small line difference. But this
7789 # was troublesome, and instead the fix has been moved to
7790 # sub set_vertical_tightness_flags where priority is given to
7791 # the -bom flag over -pvt and -pvtc flags. Both opening and
7792 # closing paren flags are involved because even though -bom only
7793 # requests breaking before the closing paren, automated logic
7794 # opens the opening paren when the closing paren opens.
7795 # Relevant cases are b977, b1215, b1270, b1303
7797 $rwant_container_open->{$seqno} = 1;
7802 return unless ( %keep_break_before_type || %keep_break_after_type );
7804 my $check_for_break = sub {
7805 my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
7806 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7808 # non-container tokens use the type as the key
7810 my $type = $rLL->[$KK]->[_TYPE_];
7811 if ( $rkeep_break_hash->{$type} ) {
7812 $rbreak_hash->{$KK} = 1;
7816 # container tokens use the token as the key
7818 my $token = $rLL->[$KK]->[_TOKEN_];
7819 my $flag = $rkeep_break_hash->{$token};
7822 my $match = $flag eq '1' || $flag eq '*';
7824 # check for special matching codes
7826 if ( $token eq '(' || $token eq ')' ) {
7827 $match = $self->match_paren_flag( $KK, $flag );
7829 elsif ( $token eq '{' || $token eq '}' ) {
7831 # These tentative codes 'b' and 'B' for brace types are
7832 # placeholders for possible future brace types. They
7833 # are not documented and may be changed.
7835 $self->[_rblock_type_of_seqno_]->{$seqno};
7836 if ( $flag eq 'b' ) { $match = $block_type }
7837 elsif ( $flag eq 'B' ) { $match = !$block_type }
7839 # unknown code - no match
7843 $rbreak_hash->{$KK} = 1 if ($match);
7848 foreach my $item ( @{$rKrange_code_without_comments} ) {
7849 my ( $Kfirst, $Klast ) = @{$item};
7851 $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
7854 $Klast, \%keep_break_after_type, $rbreak_after_Klast
7860 sub weld_containers {
7862 # Called once per file to do any welding operations requested by --weld*
7866 # This count is used to eliminate needless calls for weld checks elsewere
7867 $total_weld_count = 0;
7869 return if ( $rOpts->{'indent-only'} );
7870 return unless ($rOpts_add_newlines);
7872 # Important: sub 'weld_cuddled_blocks' must be called before
7873 # sub 'weld_nested_containers'. This is because the cuddled option needs to
7874 # use the original _LEVEL_ values of containers, but the weld nested
7875 # containers changes _LEVEL_ of welded containers.
7877 # Here is a good test case to be sure that both cuddling and welding
7878 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
7882 # if ($BOLD_MATH) { (
7883 # $labels, $comment,
7884 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
7886 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
7890 $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
7892 if ( $rOpts->{'weld-nested-containers'} ) {
7894 $self->weld_nested_containers();
7896 $self->weld_nested_quotes();
7899 #-------------------------------------------------------------
7900 # All welding is done. Finish setting up weld data structures.
7901 #-------------------------------------------------------------
7903 my $rLL = $self->[_rLL_];
7904 my $rK_weld_left = $self->[_rK_weld_left_];
7905 my $rK_weld_right = $self->[_rK_weld_right_];
7906 my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
7909 my @keys = keys %{$rK_weld_right};
7910 $total_weld_count = @keys;
7912 # First pass to process binary welds.
7913 # This loop is processed in unsorted order for efficiency.
7914 foreach my $Kstart (@keys) {
7915 my $Kend = $rK_weld_right->{$Kstart};
7917 # An error here would be due to an incorrect initialization introduced
7918 # in one of the above weld routines, like sub weld_nested.
7919 if ( $Kend <= $Kstart ) {
7920 Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
7925 # Set weld values for all tokens this welded pair
7926 foreach ( $Kstart + 1 .. $Kend ) {
7927 $rK_weld_left->{$_} = $Kstart;
7929 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
7930 $rK_weld_right->{$Kx} = $Kend;
7931 $rweld_len_right_at_K->{$Kx} =
7932 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
7933 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
7936 # Remember the leftmost index of welds which continue to the right
7937 if ( defined( $rK_weld_right->{$Kend} )
7938 && !defined( $rK_weld_left->{$Kstart} ) )
7940 push @K_multi_weld, $Kstart;
7944 # Second pass to process chains of welds (these are rare).
7945 # This has to be processed in sorted order.
7946 if (@K_multi_weld) {
7948 foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
7950 # Skip any interior K which was originally missing a left link
7951 next if ( $Kstart <= $Kend );
7953 # Find the end of this chain
7954 $Kend = $rK_weld_right->{$Kstart};
7955 my $Knext = $rK_weld_right->{$Kend};
7956 while ( defined($Knext) ) {
7958 $Knext = $rK_weld_right->{$Kend};
7961 # Set weld values this chain
7962 foreach ( $Kstart + 1 .. $Kend ) {
7963 $rK_weld_left->{$_} = $Kstart;
7965 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
7966 $rK_weld_right->{$Kx} = $Kend;
7967 $rweld_len_right_at_K->{$Kx} =
7968 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
7969 $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
7977 sub cumulative_length_before_K {
7978 my ( $self, $KK ) = @_;
7979 my $rLL = $self->[_rLL_];
7980 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
7983 sub weld_cuddled_blocks {
7986 # Called once per file to handle cuddled formatting
7988 my $rK_weld_left = $self->[_rK_weld_left_];
7989 my $rK_weld_right = $self->[_rK_weld_right_];
7990 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
7992 # This routine implements the -cb flag by finding the appropriate
7993 # closing and opening block braces and welding them together.
7994 return unless ( %{$rcuddled_block_types} );
7996 my $rLL = $self->[_rLL_];
7997 return unless ( defined($rLL) && @{$rLL} );
7998 my $rbreak_container = $self->[_rbreak_container_];
8000 my $K_opening_container = $self->[_K_opening_container_];
8001 my $K_closing_container = $self->[_K_closing_container_];
8003 my $length_to_opening_seqno = sub {
8005 my $KK = $K_opening_container->{$seqno};
8006 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8009 my $length_to_closing_seqno = sub {
8011 my $KK = $K_closing_container->{$seqno};
8012 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8016 my $is_broken_block = sub {
8018 # a block is broken if the input line numbers of the braces differ
8019 # we can only cuddle between broken blocks
8021 my $K_opening = $K_opening_container->{$seqno};
8022 return unless ( defined($K_opening) );
8023 my $K_closing = $K_closing_container->{$seqno};
8024 return unless ( defined($K_closing) );
8025 return $rbreak_container->{$seqno}
8026 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8027 $rLL->[$K_opening]->[_LINE_INDEX_];
8030 # A stack to remember open chains at all levels: This is a hash rather than
8031 # an array for safety because negative levels can occur in files with
8032 # errors. This allows us to keep processing with negative levels.
8033 # $in_chain{$level} = [$chain_type, $type_sequence];
8035 my $CBO = $rOpts->{'cuddled-break-option'};
8037 # loop over structure items to find cuddled pairs
8039 my $KNEXT = $self->[_K_first_seq_item_];
8040 while ( defined($KNEXT) ) {
8042 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8043 my $rtoken_vars = $rLL->[$KK];
8044 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8045 if ( !$type_sequence ) {
8046 next if ( $KK == 0 ); # first token in file may not be container
8048 # A fault here implies that an error was made in the little loop at
8049 # the bottom of sub 'respace_tokens' which set the values of
8050 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
8051 # loop control lines above.
8052 Fault("sequence = $type_sequence not defined at K=$KK")
8057 # NOTE: we must use the original levels here. They can get changed
8058 # by sub 'weld_nested_containers', so this routine must be called
8059 # before sub 'weld_nested_containers'.
8060 my $last_level = $level;
8061 $level = $rtoken_vars->[_LEVEL_];
8063 if ( $level < $last_level ) { $in_chain{$last_level} = undef }
8064 elsif ( $level > $last_level ) { $in_chain{$level} = undef }
8066 # We are only looking at code blocks
8067 my $token = $rtoken_vars->[_TOKEN_];
8068 my $type = $rtoken_vars->[_TYPE_];
8069 next unless ( $type eq $token );
8071 if ( $token eq '{' ) {
8073 my $block_type = $rblock_type_of_seqno->{$type_sequence};
8074 if ( !$block_type ) {
8076 # patch for unrecognized block types which may not be labeled
8077 my $Kp = $self->K_previous_nonblank($KK);
8078 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
8079 $Kp = $self->K_previous_nonblank($Kp);
8082 $block_type = $rLL->[$Kp]->[_TOKEN_];
8084 if ( $in_chain{$level} ) {
8086 # we are in a chain and are at an opening block brace.
8087 # See if we are welding this opening brace with the previous
8088 # block brace. Get their identification numbers:
8089 my $closing_seqno = $in_chain{$level}->[1];
8090 my $opening_seqno = $type_sequence;
8092 # The preceding block must be on multiple lines so that its
8093 # closing brace will start a new line.
8094 if ( !$is_broken_block->($closing_seqno) ) {
8095 next unless ( $CBO == 2 );
8096 $rbreak_container->{$closing_seqno} = 1;
8099 # we will let the trailing block be either broken or intact
8100 ## && $is_broken_block->($opening_seqno);
8102 # We can weld the closing brace to its following word ..
8103 my $Ko = $K_closing_container->{$closing_seqno};
8105 if ( defined($Ko) ) {
8106 $Kon = $self->K_next_nonblank($Ko);
8109 # ..unless it is a comment
8110 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
8112 # OK to weld these two tokens...
8113 $rK_weld_right->{$Ko} = $Kon;
8114 $rK_weld_left->{$Kon} = $Ko;
8116 # Set flag that we want to break the next container
8117 # so that the cuddled line is balanced.
8118 $rbreak_container->{$opening_seqno} = 1
8125 # We are not in a chain. Start a new chain if we see the
8126 # starting block type.
8127 if ( $rcuddled_block_types->{$block_type} ) {
8128 $in_chain{$level} = [ $block_type, $type_sequence ];
8132 $in_chain{$level} = [ $block_type, $type_sequence ];
8136 elsif ( $token eq '}' ) {
8137 if ( $in_chain{$level} ) {
8139 # We are in a chain at a closing brace. See if this chain
8141 my $Knn = $self->K_next_code($KK);
8144 my $chain_type = $in_chain{$level}->[0];
8145 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
8147 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
8151 # Note that we do not weld yet because we must wait until
8152 # we we are sure that an opening brace for this follows.
8153 $in_chain{$level}->[1] = $type_sequence;
8155 else { $in_chain{$level} = undef }
8162 sub find_nested_pairs {
8165 # This routine is called once per file to do preliminary work needed for
8166 # the --weld-nested option. This information is also needed for adding
8169 my $rLL = $self->[_rLL_];
8170 return unless ( defined($rLL) && @{$rLL} );
8173 my $K_opening_container = $self->[_K_opening_container_];
8174 my $K_closing_container = $self->[_K_closing_container_];
8175 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8177 # We define an array of pairs of nested containers
8180 # Names of calling routines can either be marked as 'i' or 'w',
8181 # and they may invoke a sub call with an '->'. We will consider
8182 # any consecutive string of such types as a single unit when making
8183 # weld decisions. We also allow a leading !
8184 my $is_name_type = {
8192 # Loop over all closing container tokens
8193 foreach my $inner_seqno ( keys %{$K_closing_container} ) {
8194 my $K_inner_closing = $K_closing_container->{$inner_seqno};
8196 # See if it is immediately followed by another, outer closing token
8197 my $K_outer_closing = $K_inner_closing + 1;
8198 $K_outer_closing += 1
8199 if ( $K_outer_closing < $Num
8200 && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
8202 next unless ( $K_outer_closing < $Num );
8203 my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
8204 next unless ($outer_seqno);
8205 my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
8206 next unless ( $is_closing_token{$token_outer_closing} );
8208 # Now we have to check the opening tokens.
8209 my $K_outer_opening = $K_opening_container->{$outer_seqno};
8210 my $K_inner_opening = $K_opening_container->{$inner_seqno};
8211 next unless defined($K_outer_opening) && defined($K_inner_opening);
8213 my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
8214 my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
8216 # Verify that the inner opening token is the next container after the
8217 # outer opening token.
8218 my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
8219 next unless defined($K_io_check);
8220 if ( $K_io_check != $K_inner_opening ) {
8222 # The inner opening container does not immediately follow the outer
8223 # opening container, but we may still allow a weld if they are
8224 # separated by a sub signature. For example, we may have something
8225 # like this, where $K_io_check may be at the first 'x' instead of
8226 # 'io'. So we need to hop over the signature and see if we arrive
8231 # $obj->then( sub ( $code ) {
8233 # return $c->render(text => '', status => $code);
8238 next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
8239 next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
8240 my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
8241 next unless defined($seqno_signature);
8242 my $K_signature_closing = $K_closing_container->{$seqno_signature};
8243 next unless defined($K_signature_closing);
8244 my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
8246 unless ( defined($K_test) && $K_test == $K_inner_opening );
8248 # OK, we have arrived at 'io' in the above diagram. We should put
8249 # a limit on the length or complexity of the signature here. There
8250 # is no perfect way to do this, one way is to put a limit on token
8251 # count. For consistency with older versions, we should allow a
8252 # signature with a single variable to weld, but not with
8253 # multiple variables. A single variable as in 'sub ($code) {' can
8254 # have a $Kdiff of 2 to 4, depending on spacing.
8256 # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
8257 # 7, depending on spacing. So to keep formatting consistent with
8258 # previous versions, we will also avoid welding if there is a comma
8261 my $Kdiff = $K_signature_closing - $K_io_check;
8262 next if ( $Kdiff > 4 );
8265 foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
8266 if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
8268 next if ($saw_comma);
8271 # Yes .. this is a possible nesting pair.
8272 # They can be separated by a small amount.
8273 my $K_diff = $K_inner_opening - $K_outer_opening;
8275 # Count nonblank characters separating them.
8276 if ( $K_diff < 0 ) { next } # Shouldn't happen
8277 my $Kn = $K_outer_opening;
8278 my $nonblank_count = 0;
8282 # Here is an example of a long identifier chain which counts as a
8283 # single nonblank here (this spans about 10 K indexes):
8284 # if ( !Boucherot::SetOfConnections->new->handler->execute(
8287 my $Kn_first = $K_outer_opening;
8288 my $Kn_last_nonblank;
8291 my $Kn = $K_outer_opening + 1 ;
8292 $Kn <= $K_inner_opening ;
8296 next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
8297 if ( !$nonblank_count ) { $Kn_first = $Kn }
8298 if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
8299 $Kn_last_nonblank = $Kn;
8301 # skip chain of identifier tokens
8302 my $last_type = $type;
8303 my $last_is_name = $is_name;
8304 $type = $rLL->[$Kn]->[_TYPE_];
8305 if ( $type eq '#' ) { $saw_comment = 1; last }
8306 $is_name = $is_name_type->{$type};
8307 next if ( $is_name && $last_is_name );
8310 last if ( $nonblank_count > 2 );
8313 # Do not weld across a comment .. fix for c058.
8314 next if ($saw_comment);
8316 # Patch for b1104: do not weld to a paren preceded by sort/map/grep
8317 # because the special line break rules may cause a blinking state
8318 if ( defined($Kn_last_nonblank)
8319 && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
8320 && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
8322 my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
8324 # Turn off welding at sort/map/grep (
8325 if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
8330 # adjacent opening containers, like: do {{
8331 $nonblank_count == 1
8333 # short item following opening paren, like: fun( yyy (
8334 || ( $nonblank_count == 2
8335 && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
8337 # anonymous sub + prototype or sig: )->then( sub ($code) {
8338 # ... but it seems best not to stack two structural blocks, like
8340 # sub make_anon_with_my_sub { sub {
8341 # because it probably hides the structure a little too much.
8342 || ( $inner_blocktype
8343 && $inner_blocktype eq 'sub'
8344 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
8345 && !$outer_blocktype )
8349 [ $inner_seqno, $outer_seqno, $K_inner_closing ];
8354 # The weld routine expects the pairs in order in the form
8355 # [$seqno_inner, $seqno_outer]
8356 # And they must be in the same order as the inner closing tokens
8357 # (otherwise, welds of three or more adjacent tokens will not work). The K
8358 # value of this inner closing token has temporarily been stored for
8362 # Drop the K index after sorting (it would cause trouble downstream)
8363 map { [ $_->[0], $_->[1] ] }
8365 # Sort on the K values
8366 sort { $a->[2] <=> $b->[2] } @nested_pairs;
8368 return \@nested_pairs;
8371 sub match_paren_flag {
8373 # Decide if this paren is excluded by user request:
8374 # undef matches no parens
8375 # '*' matches all parens
8376 # 'k' matches only if the previous nonblank token is a perl builtin
8377 # keyword (such as 'if', 'while'),
8378 # 'K' matches if 'k' does not, meaning if the previous token is not a
8380 # 'f' matches if the previous token is a function other than a keyword.
8381 # 'F' matches if 'f' does not.
8382 # 'w' matches if either 'k' or 'f' match.
8383 # 'W' matches if 'w' does not.
8384 my ( $self, $KK, $flag ) = @_;
8386 return 0 unless ( defined($flag) );
8387 return 0 if $flag eq '0';
8388 return 1 if $flag eq '1';
8389 return 1 if $flag eq '*';
8390 return 0 unless ( defined($KK) );
8392 my $rLL = $self->[_rLL_];
8393 my $rtoken_vars = $rLL->[$KK];
8394 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
8395 return 0 unless ($seqno);
8396 my $token = $rtoken_vars->[_TOKEN_];
8397 my $K_opening = $KK;
8398 if ( !$is_opening_token{$token} ) {
8399 $K_opening = $self->[_K_opening_container_]->{$seqno};
8401 return unless ( defined($K_opening) );
8403 my ( $is_f, $is_k, $is_w );
8404 my $Kp = $self->K_previous_nonblank($K_opening);
8405 if ( defined($Kp) ) {
8406 my $type_p = $rLL->[$Kp]->[_TYPE_];
8409 $is_k = $type_p eq 'k';
8412 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
8414 # either keyword or function call?
8415 $is_w = $is_k || $is_f;
8418 if ( $flag eq 'k' ) { $match = $is_k }
8419 elsif ( $flag eq 'K' ) { $match = !$is_k }
8420 elsif ( $flag eq 'f' ) { $match = $is_f }
8421 elsif ( $flag eq 'F' ) { $match = !$is_f }
8422 elsif ( $flag eq 'w' ) { $match = $is_w }
8423 elsif ( $flag eq 'W' ) { $match = !$is_w }
8427 sub is_excluded_weld {
8429 # decide if this weld is excluded by user request
8430 my ( $self, $KK, $is_leading ) = @_;
8431 my $rLL = $self->[_rLL_];
8432 my $rtoken_vars = $rLL->[$KK];
8433 my $token = $rtoken_vars->[_TOKEN_];
8434 my $rflags = $weld_nested_exclusion_rules{$token};
8435 return 0 unless ( defined($rflags) );
8436 my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
8437 return 0 unless ( defined($flag) );
8438 return 1 if $flag eq '*';
8439 return $self->match_paren_flag( $KK, $flag );
8442 # hashes to simplify welding logic
8443 my %type_ok_after_bareword;
8445 my %has_tight_paren;
8449 # types needed for welding RULE 6
8450 my @q = qw# => -> { ( [ #;
8451 @type_ok_after_bareword{@q} = (1) x scalar(@q);
8454 @is_ternary{@q} = (1) x scalar(@q);
8456 # these types do not 'like' to be separated from a following paren
8457 @q = qw(w i q Q G C Z U);
8458 @{has_tight_paren}{@q} = (1) x scalar(@q);
8461 use constant DEBUG_WELD => 0;
8463 sub setup_new_weld_measurements {
8465 # Define quantities to check for excess line lengths when welded.
8466 # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
8468 my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
8470 # Given indexes of outer and inner opening containers to be welded:
8471 # $Kouter_opening, $Kinner_opening
8473 # Returns these variables:
8474 # $new_weld_ok = true (new weld ok) or false (do not start new weld)
8475 # $starting_indent = starting indentation
8476 # $starting_lentot = starting cumulative length
8477 # $msg = diagnostic message for debugging
8479 my $rLL = $self->[_rLL_];
8480 my $rlines = $self->[_rlines_];
8484 my $starting_lentot;
8485 my $maximum_text_length;
8488 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
8489 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
8490 my ( $Kfirst, $Klast ) = @{$rK_range};
8492 #-------------------------------------------------------------------------
8493 # We now define a reference index, '$Kref', from which to start measuring
8494 # This choice turns out to be critical for keeping welds stable during
8495 # iterations, so we go through a number of STEPS...
8496 #-------------------------------------------------------------------------
8498 # STEP 1: Our starting guess is to use measure from the first token of the
8499 # current line. This is usually a good guess.
8502 # STEP 2: See if we should go back a little farther
8503 my $Kprev = $self->K_previous_nonblank($Kfirst);
8504 if ( defined($Kprev) ) {
8506 # Avoid measuring from between an opening paren and a previous token
8507 # which should stay close to it ... fixes b1185
8508 my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
8509 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
8510 if ( $Kouter_opening == $Kfirst
8512 && $has_tight_paren{$type_prev} )
8517 # Back up and count length from a token like '=' or '=>' if -lp
8518 # is used (this fixes b520)
8519 # ...or if a break is wanted before there
8520 elsif ($rOpts_line_up_parentheses
8521 || $want_break_before{$type_prev} )
8524 # If there are other sequence items between the start of this line
8525 # and the opening token in question, then do not include tokens on
8526 # the previous line in length calculations. This check added to
8527 # fix case b1174 which had a '?' on the line
8528 my $no_previous_seq_item = $Kref == $Kouter_opening
8529 || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
8531 if ( $no_previous_seq_item
8532 && substr( $type_prev, 0, 1 ) eq '=' )
8536 # Fix for b1144 and b1112: backup to the first nonblank
8537 # character before the =>, or to the start of its line.
8538 if ( $type_prev eq '=>' ) {
8539 my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
8540 my $rK_range = $rlines->[$iline_prev]->{_rK_range};
8541 my ( $Kfirst, $Klast ) = @{$rK_range};
8542 for ( my $KK = $Kref - 1 ; $KK >= $Kfirst ; $KK-- ) {
8543 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
8552 # STEP 3: Now look ahead for a ternary and, if found, use it.
8553 # This fixes case b1182.
8554 # Also look for a ')' at the same level and, if found, use it.
8555 # This fixes case b1224.
8556 if ( $Kref < $Kouter_opening ) {
8557 my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
8558 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8559 while ( $Knext < $Kouter_opening ) {
8560 if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
8561 if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
8562 || $rLL->[$Knext]->[_TOKEN_] eq ')' )
8568 $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
8572 # Define the starting measurements we will need
8574 $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
8575 $starting_level = $rLL->[$Kref]->[_LEVEL_];
8576 $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
8578 $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
8579 $starting_ci * $rOpts_continuation_indentation;
8581 # STEP 4: Switch to using the outer opening token as the reference
8582 # point if a line break before it would make a longer line.
8583 # Fixes case b1055 and is also an alternate fix for b1065.
8584 my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8585 if ( $Kref < $Kouter_opening ) {
8586 my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
8587 my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
8588 my $maximum_text_length_oo =
8589 $maximum_text_length_at_level[$starting_level_oo] -
8590 $starting_ci_oo * $rOpts_continuation_indentation;
8592 # The excess length to any cumulative length K = lenK is either
8593 # $excess = $lenk - ($lentot + $maximum_text_length), or
8594 # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
8595 # so the worst case (maximum excess) corresponds to the configuration
8596 # with minimum value of the sum: $lentot + $maximum_text_length
8597 if ( $lentot_oo + $maximum_text_length_oo <
8598 $starting_lentot + $maximum_text_length )
8600 $Kref = $Kouter_opening;
8601 $starting_level = $starting_level_oo;
8602 $starting_ci = $starting_ci_oo;
8603 $starting_lentot = $lentot_oo;
8604 $maximum_text_length = $maximum_text_length_oo;
8608 my $new_weld_ok = 1;
8610 # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
8611 # combination -wn -lp -dws -naws does not work well and can cause blinkers.
8612 # It will probably only occur in stress testing. For this situation we
8613 # will only start a new weld if we start at a 'good' location.
8614 # - Added 'if' to fix case b1032.
8615 # - Require blank before certain previous characters to fix b1111.
8616 # - Add ';' to fix case b1139
8617 # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
8618 # - relaxed constraints for b1227
8620 && $rOpts_line_up_parentheses
8621 && $rOpts_delete_old_whitespace
8622 && !$rOpts_add_whitespace
8623 && defined($Kprev) )
8625 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
8626 my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
8627 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
8629 if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
8631 $type_prev =~ /^[\,\.\;]/
8632 || $type_prev =~ /^[=\{\[\(\L]/
8633 && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
8634 || $type_first =~ /^[=\,\.\;\{\[\(\L]/
8635 || $type_first eq '||'
8638 && ( $token_first eq 'if'
8639 || $token_first eq 'or' )
8644 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
8648 return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
8651 sub excess_line_length_for_Krange {
8652 my ( $self, $Kfirst, $Klast ) = @_;
8654 # returns $excess_length =
8655 # by how many characters a line composed of tokens $Kfirst .. $Klast will
8656 # exceed the allowed line length
8658 my $rLL = $self->[_rLL_];
8659 my $length_before_Kfirst =
8662 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
8664 # backup before a side comment if necessary
8666 if ( $rOpts_ignore_side_comment_lengths
8667 && $rLL->[$Klast]->[_TYPE_] eq '#' )
8669 my $Kprev = $self->K_previous_nonblank($Klast);
8670 if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
8673 # get the length of the text
8674 my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
8676 # get the size of the text window
8677 my $level = $rLL->[$Kfirst]->[_LEVEL_];
8678 my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
8679 my $max_text_length = $maximum_text_length_at_level[$level] -
8680 $ci_level * $rOpts_continuation_indentation;
8682 my $excess_length = $length - $max_text_length;
8686 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
8687 return ($excess_length);
8690 sub weld_nested_containers {
8693 # Called once per file for option '--weld-nested-containers'
8695 my $rK_weld_left = $self->[_rK_weld_left_];
8696 my $rK_weld_right = $self->[_rK_weld_right_];
8698 # This routine implements the -wn flag by "welding together"
8699 # the nested closing and opening tokens which were previously
8700 # identified by sub 'find_nested_pairs'. "welding" simply
8701 # involves setting certain hash values which will be checked
8702 # later during formatting.
8704 my $rLL = $self->[_rLL_];
8705 my $rlines = $self->[_rlines_];
8706 my $K_opening_container = $self->[_K_opening_container_];
8707 my $K_closing_container = $self->[_K_closing_container_];
8708 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8709 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
8710 my $ris_asub_block = $self->[_ris_asub_block_];
8711 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
8713 # Find nested pairs of container tokens for any welding.
8714 my $rnested_pairs = $self->find_nested_pairs();
8716 # Return unless there are nested pairs to weld
8717 return unless defined($rnested_pairs) && @{$rnested_pairs};
8719 my $rOpts_break_at_old_method_breakpoints =
8720 $rOpts->{'break-at-old-method-breakpoints'};
8722 # This array will hold the sequence numbers of the tokens to be welded.
8725 # Variables needed for estimating line lengths
8726 my $maximum_text_length; # maximum spaces available for text
8727 my $starting_lentot; # cumulative text to start of current line
8729 my $iline_outer_opening = -1;
8730 my $weld_count_this_start = 0;
8732 # OLD: $single_line_tol added to fix cases b1180 b1181
8733 # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
8734 # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
8735 my $single_line_tol = 0;
8737 my $multiline_tol = $single_line_tol + 1 +
8738 max( $rOpts_indent_columns, $rOpts_continuation_indentation );
8740 # Define a welding cutoff level: do not start a weld if the inside
8741 # container level equals or exceeds this level.
8743 # We use the minimum of two criteria, either of which may be more
8744 # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
8745 # the 'beta' value is more restrictive in other cases (b1243).
8747 my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 );
8749 # The vertical tightness flags can throw off line length calculations.
8750 # This patch was added to fix instability issue b1284.
8751 # It works to always use a tol of 1 for 1 line block length tests, but
8752 # this restricted value keeps test case wn6.wn working as before.
8753 # It may be necessary to include '[' and '{' here in the future.
8754 my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
8756 my $length_to_opening_seqno = sub {
8758 my $KK = $K_opening_container->{$seqno};
8759 my $lentot = defined($KK)
8760 && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8764 my $length_to_closing_seqno = sub {
8766 my $KK = $K_closing_container->{$seqno};
8767 my $lentot = defined($KK)
8768 && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8773 # _oo=outer opening, i.e. first of { {
8774 # _io=inner opening, i.e. second of { {
8775 # _oc=outer closing, i.e. second of } {
8776 # _ic=inner closing, i.e. first of } }
8780 # Main loop over nested pairs...
8781 # We are working from outermost to innermost pairs so that
8782 # level changes will be complete when we arrive at the inner pairs.
8783 while ( my $item = pop( @{$rnested_pairs} ) ) {
8784 my ( $inner_seqno, $outer_seqno ) = @{$item};
8786 my $Kouter_opening = $K_opening_container->{$outer_seqno};
8787 my $Kinner_opening = $K_opening_container->{$inner_seqno};
8788 my $Kouter_closing = $K_closing_container->{$outer_seqno};
8789 my $Kinner_closing = $K_closing_container->{$inner_seqno};
8791 # RULE: do not weld if inner container has <= 3 tokens unless the next
8792 # token is a heredoc (so we know there will be multiple lines)
8793 if ( $Kinner_closing - $Kinner_opening <= 4 ) {
8794 my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
8795 next unless defined($Knext_nonblank);
8796 my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
8797 next unless ( $type eq 'h' );
8800 my $outer_opening = $rLL->[$Kouter_opening];
8801 my $inner_opening = $rLL->[$Kinner_opening];
8802 my $outer_closing = $rLL->[$Kouter_closing];
8803 my $inner_closing = $rLL->[$Kinner_closing];
8805 # RULE: do not weld to a hash brace. The reason is that it has a very
8806 # strong bond strength to the next token, so a line break after it
8807 # may not work. Previously we allowed welding to something like @{
8808 # but that caused blinking states (cases b751, b779).
8809 if ( $inner_opening->[_TYPE_] eq 'L' ) {
8813 # RULE: do not weld to a square bracket which does not contain commas
8814 if ( $inner_opening->[_TYPE_] eq '[' ) {
8815 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
8816 next unless ($rtype_count);
8817 my $comma_count = $rtype_count->{','};
8818 next unless ($comma_count);
8820 # Do not weld if there is text before a '[' such as here:
8821 # curr_opt ( @beg [2,5] )
8822 # It will not break into the desired sandwich structure.
8823 # This fixes case b109, 110.
8824 my $Kdiff = $Kinner_opening - $Kouter_opening;
8825 next if ( $Kdiff > 2 );
8828 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
8832 # RULE: Avoid welding under stress. The idea is that we need to have a
8833 # little space* within a welded container to avoid instability. Note
8834 # that after each weld the level values are reduced, so long multiple
8835 # welds can still be made. This rule will seldom be a limiting factor
8836 # in actual working code. Fixes b1206, b1243.
8837 my $inner_level = $inner_opening->[_LEVEL_];
8838 if ( $inner_level >= $weld_cutoff_level ) { next }
8840 # Set flag saying if this pair starts a new weld
8841 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
8843 # Set flag saying if this pair is adjacent to the previous nesting pair
8844 # (even if previous pair was rejected as a weld)
8845 my $touch_previous_pair =
8846 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
8847 $previous_pair = $item;
8849 my $do_not_weld_rule = 0;
8851 my $is_one_line_weld;
8853 my $iline_oo = $outer_opening->[_LINE_INDEX_];
8854 my $iline_io = $inner_opening->[_LINE_INDEX_];
8855 my $iline_ic = $inner_closing->[_LINE_INDEX_];
8856 my $iline_oc = $outer_closing->[_LINE_INDEX_];
8857 my $token_oo = $outer_opening->[_TOKEN_];
8858 my $token_io = $inner_opening->[_TOKEN_];
8860 my $is_multiline_weld =
8861 $iline_oo == $iline_io
8862 && $iline_ic == $iline_oc
8863 && $iline_io != $iline_ic;
8866 my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
8867 my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
8869 Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
8870 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
8871 tokens '$token_oo' .. '$token_io'
8875 # DO-NOT-WELD RULE 0:
8876 # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
8877 # by one line). This can produce instabilities (fixes b1250 b1251
8879 if ( !$is_multiline_weld
8880 && $iline_ic == $iline_io + 1
8882 && $token_io eq '(' )
8885 $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
8891 # If this pair is not adjacent to the previous pair (skipped or not),
8892 # then measure lengths from the start of line of oo.
8894 !$touch_previous_pair
8896 # Also do this if restarting at a new line; fixes case b965, s001
8897 || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
8901 # Remember the line we are using as a reference
8902 $iline_outer_opening = $iline_oo;
8903 $weld_count_this_start = 0;
8905 ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
8906 = $self->setup_new_weld_measurements( $Kouter_opening,
8911 && ( $iline_oo != $iline_io
8912 || $iline_ic != $iline_oc )
8915 if (DEBUG_WELD) { print $msg}
8919 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
8920 my ( $Kfirst, $Klast ) = @{$rK_range};
8922 # An existing one-line weld is a line in which
8923 # (1) the containers are all on one line, and
8924 # (2) the line does not exceed the allowable length
8925 if ( $iline_oo == $iline_oc ) {
8927 # All the tokens are on one line, now check their length.
8928 # Start with the full line index range. We will reduce this
8929 # in the coding below in some cases.
8930 my $Kstart = $Kfirst;
8933 # Note that the following minimal choice for measuring will
8934 # work and will not cause any instabilities because it is
8937 ## my $Kstart = $Kouter_opening;
8938 ## my $Kstop = $Kouter_closing;
8940 # But that can lead to some undesirable welds. So a little
8941 # more complicated method has been developed.
8943 # We are trying to avoid creating bad two-line welds when we are
8944 # working on long, previously unwelded input text, such as
8946 # INPUT (example of a long input line weld candidate):
8947 ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
8949 # GOOD two-line break: (not welded; result marked too long):
8950 ## $mutation->transpos(
8951 ## $self->RNA->position($mutation->label, $atg_label));
8953 # BAD two-line break: (welded; result if we weld):
8954 ## $mutation->transpos($self->RNA->position(
8955 ## $mutation->label, $atg_label));
8957 # We can only get an approximate estimate of the final length,
8958 # since the line breaks may change, and for -lp mode because
8959 # even the indentation is not yet known.
8961 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
8962 my $level_last = $rLL->[$Klast]->[_LEVEL_];
8963 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8964 my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
8966 # - measure to the end of the original line if balanced
8967 # - measure to the closing container if unbalanced (fixes b1230)
8968 #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
8969 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
8971 # - measure from the start of the original line if balanced
8972 # - measure from the most previous token with same level
8973 # if unbalanced (b1232)
8974 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
8975 $Kstart = $Kouter_opening;
8977 my $KK = $Kouter_opening - 1 ;
8982 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
8983 last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
8989 $self->excess_line_length_for_Krange( $Kstart, $Kstop );
8991 # Coding simplified here for case b1219.
8992 # Increased tol from 0 to 1 when pvt>0 to fix b1284.
8993 $is_one_line_weld = $excess <= $one_line_tol;
8996 # DO-NOT-WELD RULE 1:
8997 # Do not weld something that looks like the start of a two-line
8998 # function call, like this: <<snippets/wn6.in>>
8999 # $trans->add_transformation(
9000 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
9001 # We will look for a semicolon after the closing paren.
9003 # We want to weld something complex, like this though
9004 # my $compass = uc( opposite_direction( line_to_canvas_direction(
9005 # @{ $coords[0] }, @{ $coords[1] } ) ) );
9006 # Otherwise we will get a 'blinker'. For example, the following
9007 # would become a blinker without this rule:
9008 # $Self->_Add( $SortOrderDisplay{ $Field
9009 # ->GenerateFieldForSelectSQL() } );
9010 # But it is okay to weld a two-line statement if it looks like
9011 # it was already welded, meaning that the two opening containers are
9012 # on a different line that the two closing containers. This is
9013 # necessary to prevent blinking of something like this with
9014 # perltidy -wn -pbp (starting indentation two levels deep):
9016 # $top_label->set_text( gettext(
9017 # "Unable to create personal directory - check permissions.") );
9018 if ( $iline_oc == $iline_oo + 1
9019 && $iline_io == $iline_ic
9020 && $token_oo eq '(' )
9023 # Look for following semicolon...
9024 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
9025 my $next_nonblank_type =
9026 defined($Knext_nonblank)
9027 ? $rLL->[$Knext_nonblank]->[_TYPE_]
9029 if ( $next_nonblank_type eq ';' ) {
9031 # Then do not weld if no other containers between inner
9032 # opening and closing.
9033 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
9034 if ( $Knext_seq_item == $Kinner_closing ) {
9035 $do_not_weld_rule = 1;
9039 } ## end starting new weld sequence
9043 # set the 1-line flag if continuing a weld sequence; fixes b1239
9044 $is_one_line_weld = ( $iline_oo == $iline_oc );
9047 # DO-NOT-WELD RULE 2:
9048 # Do not weld an opening paren to an inner one line brace block
9049 # We will just use old line numbers for this test and require
9050 # iterations if necessary for convergence
9052 # For example, otherwise we could cause the opening paren
9053 # in the following example to separate from the caller name
9056 # $_[0]->code_handler
9057 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
9059 # Here is another example where we do not want to weld:
9060 # $wrapped->add_around_modifier(
9061 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
9063 # If the one line sub block gets broken due to length or by the
9064 # user, then we can weld. The result will then be:
9065 # $wrapped->add_around_modifier( sub {
9066 # push @tracelog => 'around 1';
9070 # Updated to fix cases b1082 b1102 b1106 b1115:
9071 # Also, do not weld to an intact inner block if the outer opening token
9072 # is on a different line. For example, this prevents oscillation
9073 # between these two states in case b1106:
9076 # ($_,[$self->$_(@_[1..$#_])])
9080 # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
9083 # The effect of this change on typical code is very minimal. Sometimes
9084 # it may take a second iteration to converge, but this gives protection
9086 if ( !$do_not_weld_rule
9087 && !$is_one_line_weld
9088 && $iline_ic == $iline_io )
9090 $do_not_weld_rule = 2
9091 if ( $token_oo eq '(' || $iline_oo != $iline_io );
9094 # DO-NOT-WELD RULE 2A:
9095 # Do not weld an opening asub brace in -lp mode if -asbl is set. This
9096 # helps avoid instabilities in one-line block formation, and fixes
9097 # b1241. Previously, the '$is_one_line_weld' flag was tested here
9098 # instead of -asbl, and this fixed most cases. But it turns out that
9099 # the real problem was the -asbl flag, and switching to this was
9100 # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
9103 ##&& $is_one_line_weld
9104 && $rOpts_line_up_parentheses
9106 && $ris_asub_block->{$outer_seqno}
9109 $do_not_weld_rule = '2A';
9112 # DO-NOT-WELD RULE 3:
9113 # Do not weld if this makes our line too long.
9114 # Use a tolerance which depends on if the old tokens were welded
9115 # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
9116 if ( !$do_not_weld_rule ) {
9118 # Measure to a little beyond the inner opening token if it is
9119 # followed by a bare word, which may have unusual line break rules.
9121 # NOTE: Originally this was OLD RULE 6: do not weld to a container
9122 # which is followed on the same line by an unknown bareword token.
9123 # This can cause blinkers (cases b626, b611). But OK to weld one
9124 # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
9125 # has been merged into RULE 3 here to also fix cases b1078 b1091.
9127 my $K_for_length = $Kinner_opening;
9128 my $Knext_io = $self->K_next_nonblank($Kinner_opening);
9129 next unless ( defined($Knext_io) ); # shouldn't happen
9130 my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
9132 # Note: may need to eventually also include other types here,
9133 # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
9134 if ( $type_io_next eq 'w' ) {
9135 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
9136 next unless ( defined($Knext_io2) );
9137 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
9138 if ( !$type_ok_after_bareword{$type_io_next2} ) {
9139 $K_for_length = $Knext_io2;
9143 # Use a tolerance for welds over multiple lines to avoid blinkers.
9144 # We can use zero tolerance if it looks like we are working on an
9147 $is_one_line_weld || $is_multiline_weld
9151 # By how many characters does this exceed the text window?
9153 $self->cumulative_length_before_K($K_for_length) -
9154 $starting_lentot + 1 + $tol -
9155 $maximum_text_length;
9157 # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
9158 # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
9159 # Revised patch: New tolerance definition allows going back to '> 0'
9160 # here. This fixes case b1124. See also cases b1087 and b1087a.
9161 if ( $excess > 0 ) { $do_not_weld_rule = 3 }
9165 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
9169 # DO-NOT-WELD RULE 4; implemented for git#10:
9170 # Do not weld an opening -ce brace if the next container is on a single
9171 # line, different from the opening brace. (This is very rare). For
9172 # example, given the following with -ce, we will avoid joining the {
9176 # [ $_, length($_) ]
9179 # because this would produce a terminal one-line block:
9181 # } else { [ $_, length($_) ] }
9183 # which may not be what is desired. But given this input:
9185 # } else { [ $_, length($_) ] }
9187 # then we will do the weld and retain the one-line block
9188 if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
9189 my $block_type = $rblock_type_of_seqno->{$outer_seqno};
9190 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
9191 my $io_line = $inner_opening->[_LINE_INDEX_];
9192 my $ic_line = $inner_closing->[_LINE_INDEX_];
9193 my $oo_line = $outer_opening->[_LINE_INDEX_];
9194 if ( $oo_line < $io_line && $ic_line == $io_line ) {
9195 $do_not_weld_rule = 4;
9200 # DO-NOT-WELD RULE 5: do not include welds excluded by user
9203 && %weld_nested_exclusion_rules
9204 && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
9205 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
9208 $do_not_weld_rule = 5;
9211 # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
9213 # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
9215 if ( !$do_not_weld_rule
9216 && $rOpts_break_at_old_method_breakpoints
9217 && $iline_io > $iline_oo )
9220 foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
9221 my $rK_range = $rlines->[$iline]->{_rK_range};
9222 next unless defined($rK_range);
9223 my ( $Kfirst, $Klast ) = @{$rK_range};
9224 next unless defined($Kfirst);
9225 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
9226 $do_not_weld_rule = 7;
9232 if ($do_not_weld_rule) {
9234 # After neglecting a pair, we start measuring from start of point
9235 # io ... but not if previous type does not like to be separated
9236 # from its container (fixes case b1184)
9237 my $Kprev = $self->K_previous_nonblank($Kinner_opening);
9238 my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
9239 if ( !$has_tight_paren{$type_prev} ) {
9240 my $starting_level = $inner_opening->[_LEVEL_];
9241 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
9243 $self->cumulative_length_before_K($Kinner_opening);
9244 $maximum_text_length =
9245 $maximum_text_length_at_level[$starting_level] -
9246 $starting_ci_level * $rOpts_continuation_indentation;
9250 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
9254 # Normally, a broken pair should not decrease indentation of
9255 # intermediate tokens:
9256 ## if ( $last_pair_broken ) { next }
9257 # However, for long strings of welded tokens, such as '{{{{{{...'
9258 # we will allow broken pairs to also remove indentation.
9259 # This will keep very long strings of opening and closing
9260 # braces from marching off to the right. We will do this if the
9261 # number of tokens in a weld before the broken weld is 4 or more.
9262 # This rule will mainly be needed for test scripts, since typical
9263 # welds have fewer than about 4 welded tokens.
9264 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
9267 # otherwise start new weld ...
9268 elsif ($starting_new_weld) {
9269 $weld_count_this_start++;
9271 $Msg .= "Starting new weld\n";
9276 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9277 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
9279 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9280 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
9283 # ... or extend current weld
9285 $weld_count_this_start++;
9287 $Msg .= "Extending current weld\n";
9290 unshift @{ $welds[-1] }, $inner_seqno;
9291 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9292 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
9294 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9295 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
9298 # After welding, reduce the indentation level if all intermediate tokens
9299 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
9300 if ( $dlevel != 0 ) {
9301 my $Kstart = $Kinner_opening;
9302 my $Kstop = $Kinner_closing;
9303 for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
9304 $rLL->[$KK]->[_LEVEL_] += $dlevel;
9307 # Copy opening ci level to help break at = for -lp mode (case b1124)
9308 $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
9309 $rLL->[$Kouter_opening]->[_CI_LEVEL_];
9311 # But do not copy the closing ci level ... it can give poor results
9312 ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
9313 ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
9320 sub weld_nested_quotes {
9322 # Called once per file for option '--weld-nested-containers'. This
9323 # does welding on qw quotes.
9327 # See if quotes are excluded from welding
9328 my $rflags = $weld_nested_exclusion_rules{'q'};
9329 return if ( defined($rflags) && defined( $rflags->[1] ) );
9331 my $rK_weld_left = $self->[_rK_weld_left_];
9332 my $rK_weld_right = $self->[_rK_weld_right_];
9334 my $rLL = $self->[_rLL_];
9335 return unless ( defined($rLL) && @{$rLL} );
9338 my $K_opening_container = $self->[_K_opening_container_];
9339 my $K_closing_container = $self->[_K_closing_container_];
9340 my $rlines = $self->[_rlines_];
9342 my $starting_lentot;
9343 my $maximum_text_length;
9345 my $is_single_quote = sub {
9346 my ( $Kbeg, $Kend, $quote_type ) = @_;
9347 foreach my $K ( $Kbeg .. $Kend ) {
9348 my $test_type = $rLL->[$K]->[_TYPE_];
9349 next if ( $test_type eq 'b' );
9350 return if ( $test_type ne $quote_type );
9355 # Length tolerance - same as previously used for sub weld_nested
9357 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
9359 # look for single qw quotes nested in containers
9360 my $KNEXT = $self->[_K_first_seq_item_];
9361 while ( defined($KNEXT) ) {
9363 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9364 my $rtoken_vars = $rLL->[$KK];
9365 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9366 if ( !$outer_seqno ) {
9367 next if ( $KK == 0 ); # first token in file may not be container
9369 # A fault here implies that an error was made in the little loop at
9370 # the bottom of sub 'respace_tokens' which set the values of
9371 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
9372 # loop control lines above.
9373 Fault("sequence = $outer_seqno not defined at K=$KK")
9378 my $token = $rtoken_vars->[_TOKEN_];
9379 if ( $is_opening_token{$token} ) {
9381 # see if the next token is a quote of some type
9384 if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
9385 next unless ( $Kn < $Num );
9387 my $next_token = $rLL->[$Kn]->[_TOKEN_];
9388 my $next_type = $rLL->[$Kn]->[_TYPE_];
9390 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
9391 && $next_token =~ /^q/ );
9393 # The token before the closing container must also be a quote
9394 my $Kouter_closing = $K_closing_container->{$outer_seqno};
9395 my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
9396 next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
9398 # This is an inner opening container
9399 my $Kinner_opening = $Kn;
9401 # Do not weld to single-line quotes. Nothing is gained, and it may
9403 next if ( $Kinner_closing == $Kinner_opening );
9405 # Only weld to quotes delimited with container tokens. This is
9406 # because welding to arbitrary quote delimiters can produce code
9407 # which is less readable than without welding.
9408 my $closing_delimiter =
9409 substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
9411 unless ( $is_closing_token{$closing_delimiter}
9412 || $closing_delimiter eq '>' );
9414 # Now make sure that there is just a single quote in the container
9418 $Kinner_opening + 1,
9419 $Kinner_closing - 1,
9424 # OK: This is a candidate for welding
9428 my $Kouter_opening = $K_opening_container->{$outer_seqno};
9429 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
9430 my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
9431 my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
9432 my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
9434 ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
9436 # Fix for case b1189. If quote is marked as type 'Q' then only weld
9437 # if the two closing tokens are on the same input line. Otherwise,
9438 # the closing line will be output earlier in the pipeline than
9439 # other CODE lines and welding will not actually occur. This will
9440 # leave a half-welded structure with potential formatting
9441 # instability. This might be fixed by adding a check for a weld on
9442 # a closing Q token and sending it down the normal channel, but it
9443 # would complicate the code and is potentially risky.
9446 && $next_type eq 'Q'
9447 && $iline_ic != $iline_oc );
9449 # If welded, the line must not exceed allowed line length
9450 ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
9451 = $self->setup_new_weld_measurements( $Kouter_opening,
9453 if ( !$ok_to_weld ) {
9454 if (DEBUG_WELD) { print $msg}
9459 $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
9460 my $excess = $length + $multiline_tol - $maximum_text_length;
9462 my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
9463 if ( $excess >= $excess_max ) {
9468 if ( !$is_old_weld ) { $is_old_weld = "" }
9470 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
9473 # Check weld exclusion rules for outer container
9474 if ( !$do_not_weld ) {
9475 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
9476 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
9479 "No qw weld due to weld exclusion rules for outer container\n";
9485 # Check the length of the last line (fixes case b1039)
9486 if ( !$do_not_weld ) {
9487 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
9488 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
9490 $self->excess_line_length_for_Krange( $Kfirst_ic,
9493 # Allow extra space for additional welded closing container(s)
9494 # and a space and comma or semicolon.
9495 # NOTE: weld len has not been computed yet. Use 2 spaces
9496 # for now, correct for a single weld. This estimate could
9497 # be made more accurate if necessary.
9499 defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
9500 if ( $excess_ic + $weld_len + 2 > 0 ) {
9503 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
9511 $Msg .= "Not Welding QW\n";
9519 $Msg .= "Welding QW\n";
9523 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9524 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
9526 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9527 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
9529 # Undo one indentation level if an extra level was added to this
9532 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
9534 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
9536 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
9537 $rLL->[$K]->[_LEVEL_] -= 1;
9539 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
9540 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
9543 # undo CI for other welded quotes
9546 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
9547 $rLL->[$K]->[_CI_LEVEL_] = 0;
9551 # Change the level of a closing qw token to be that of the outer
9552 # containing token. This will allow -lp indentation to function
9553 # correctly in the vertical aligner.
9554 # Patch to fix c002: but not if it contains text
9555 if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
9556 $rLL->[$Kinner_closing]->[_LEVEL_] =
9557 $rLL->[$Kouter_closing]->[_LEVEL_];
9564 sub is_welded_at_seqno {
9566 my ( $self, $seqno ) = @_;
9568 # given a sequence number:
9569 # return true if it is welded either left or right
9570 # return false otherwise
9571 return unless ( $total_weld_count && defined($seqno) );
9572 my $KK_o = $self->[_K_opening_container_]->{$seqno};
9573 return unless defined($KK_o);
9574 return defined( $self->[_rK_weld_left_]->{$KK_o} )
9575 || defined( $self->[_rK_weld_right_]->{$KK_o} );
9578 sub mark_short_nested_blocks {
9580 # This routine looks at the entire file and marks any short nested blocks
9581 # which should not be broken. The results are stored in the hash
9582 # $rshort_nested->{$type_sequence}
9583 # which will be true if the container should remain intact.
9585 # For example, consider the following line:
9587 # sub cxt_two { sort { $a <=> $b } test_if_list() }
9589 # The 'sort' block is short and nested within an outer sub block.
9590 # Normally, the existence of the 'sort' block will force the sub block to
9591 # break open, but this is not always desirable. Here we will set a flag for
9592 # the sort block to prevent this. To give the user control, we will
9593 # follow the input file formatting. If either of the blocks is broken in
9594 # the input file then we will allow it to remain broken. Otherwise we will
9595 # set a flag to keep it together in later formatting steps.
9597 # The flag which is set here will be checked in two places:
9598 # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
9601 return if $rOpts->{'indent-only'};
9603 my $rLL = $self->[_rLL_];
9604 return unless ( defined($rLL) && @{$rLL} );
9606 return unless ( $rOpts->{'one-line-block-nesting'} );
9608 my $K_opening_container = $self->[_K_opening_container_];
9609 my $K_closing_container = $self->[_K_closing_container_];
9610 my $rbreak_container = $self->[_rbreak_container_];
9611 my $rshort_nested = $self->[_rshort_nested_];
9612 my $rlines = $self->[_rlines_];
9613 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9615 # Variables needed for estimating line lengths
9616 my $maximum_text_length;
9617 my $starting_lentot;
9620 my $excess_length_to_K = sub {
9623 # Estimate the length from the line start to a given token
9624 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
9625 my $excess_length = $length + $length_tol - $maximum_text_length;
9626 return ($excess_length);
9629 my $is_broken_block = sub {
9631 # a block is broken if the input line numbers of the braces differ
9633 my $K_opening = $K_opening_container->{$seqno};
9634 return unless ( defined($K_opening) );
9635 my $K_closing = $K_closing_container->{$seqno};
9636 return unless ( defined($K_closing) );
9637 return $rbreak_container->{$seqno}
9638 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
9639 $rLL->[$K_opening]->[_LINE_INDEX_];
9642 # loop over all containers
9643 my @open_block_stack;
9645 my $KNEXT = $self->[_K_first_seq_item_];
9646 while ( defined($KNEXT) ) {
9648 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9649 my $rtoken_vars = $rLL->[$KK];
9650 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
9651 if ( !$type_sequence ) {
9652 next if ( $KK == 0 ); # first token in file may not be container
9654 # A fault here implies that an error was made in the little loop at
9655 # the bottom of sub 'respace_tokens' which set the values of
9656 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
9657 # loop control lines above.
9658 Fault("sequence = $type_sequence not defined at K=$KK")
9663 # Patch: do not mark short blocks with welds.
9664 # In some cases blinkers can form (case b690).
9665 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
9669 # We are just looking at code blocks
9670 my $token = $rtoken_vars->[_TOKEN_];
9671 my $type = $rtoken_vars->[_TYPE_];
9672 next unless ( $type eq $token );
9673 next unless ( $rblock_type_of_seqno->{$type_sequence} );
9675 # Keep a stack of all acceptable block braces seen.
9676 # Only consider blocks entirely on one line so dump the stack when line
9678 my $iline_last = $iline;
9679 $iline = $rLL->[$KK]->[_LINE_INDEX_];
9680 if ( $iline != $iline_last ) { @open_block_stack = () }
9682 if ( $token eq '}' ) {
9683 if (@open_block_stack) { pop @open_block_stack }
9685 next unless ( $token eq '{' );
9687 # block must be balanced (bad scripts may be unbalanced)
9688 my $K_opening = $K_opening_container->{$type_sequence};
9689 my $K_closing = $K_closing_container->{$type_sequence};
9690 next unless ( defined($K_opening) && defined($K_closing) );
9692 # require that this block be entirely on one line
9693 next if ( $is_broken_block->($type_sequence) );
9695 # See if this block fits on one line of allowed length (which may
9696 # be different from the input script)
9698 $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
9699 my $level = $rLL->[$KK]->[_LEVEL_];
9700 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
9701 $maximum_text_length =
9702 $maximum_text_length_at_level[$level] -
9703 $ci_level * $rOpts_continuation_indentation;
9705 # Dump the stack if block is too long and skip this block
9706 if ( $excess_length_to_K->($K_closing) > 0 ) {
9707 @open_block_stack = ();
9711 # OK, Block passes tests, remember it
9712 push @open_block_stack, $type_sequence;
9714 # We are only marking nested code blocks,
9715 # so check for a previous block on the stack
9716 next unless ( @open_block_stack > 1 );
9718 # Looks OK, mark this as a short nested block
9719 $rshort_nested->{$type_sequence} = 1;
9725 sub adjust_indentation_levels {
9729 # Called once per file to do special indentation adjustments.
9730 # These routines adjust levels either by changing _CI_LEVEL_ directly or
9731 # by setting modified levels in the array $self->[_radjusted_levels_].
9733 # Initialize the adjusted levels. These will be the levels actually used
9734 # for computing indentation.
9736 # NOTE: This routine is called after the weld routines, which may have
9737 # already adjusted _LEVEL_, so we are making adjustments on top of those
9738 # levels. It would be much nicer to have the weld routines also use this
9739 # adjustment, but that gets complicated when we combine -gnu -wn and have
9740 # some welded quotes.
9741 my $Klimit = $self->[_Klimit_];
9742 my $rLL = $self->[_rLL_];
9743 my $radjusted_levels = $self->[_radjusted_levels_];
9745 return unless ( defined($Klimit) );
9747 foreach my $KK ( 0 .. $Klimit ) {
9748 $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
9751 # First set adjusted levels for any non-indenting braces.
9752 $self->non_indenting_braces();
9754 # Adjust breaks and indentation list containers
9755 $self->break_before_list_opening_containers();
9757 # Set adjusted levels for the whitespace cycle option.
9758 $self->whitespace_cycle_adjustment();
9760 $self->braces_left_setup();
9762 # Adjust continuation indentation if -bli is set
9763 $self->bli_adjustment();
9765 $self->extended_ci()
9766 if ($rOpts_extended_continuation_indentation);
9768 # Now clip any adjusted levels to be non-negative
9769 $self->clip_adjusted_levels();
9774 sub clip_adjusted_levels {
9776 # Replace any negative adjusted levels with zero.
9777 # Negative levels can occur in files with brace errors.
9779 my $radjusted_levels = $self->[_radjusted_levels_];
9780 return unless defined($radjusted_levels) && @{$radjusted_levels};
9781 foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
9785 sub non_indenting_braces {
9787 # Called once per file to handle the --non-indenting-braces parameter.
9788 # Remove indentation within marked braces if requested
9790 return unless ( $rOpts->{'non-indenting-braces'} );
9792 my $rLL = $self->[_rLL_];
9793 return unless ( defined($rLL) && @{$rLL} );
9795 my $Klimit = $self->[_Klimit_];
9796 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9797 my $K_opening_container = $self->[_K_opening_container_];
9798 my $K_closing_container = $self->[_K_closing_container_];
9799 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
9800 my $radjusted_levels = $self->[_radjusted_levels_];
9802 # First locate all of the marked blocks
9804 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
9805 my $KK = $K_opening_container->{$seqno};
9807 # followed by a comment
9810 if ( $K_sc <= $Klimit && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
9811 next unless ( $K_sc <= $Klimit );
9812 my $type_sc = $rLL->[$K_sc]->[_TYPE_];
9813 next unless ( $type_sc eq '#' );
9816 my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
9817 my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
9818 next unless ( $line_index_sc == $line_index );
9820 # get the side comment text
9821 my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
9823 # The pattern ends in \s but we have removed the newline, so
9824 # we added it back for the match. That way we require an exact
9825 # match to the special string and also allow additional text.
9827 next unless ( $token_sc =~ /$non_indenting_brace_pattern/ );
9828 $rspecial_side_comment_type->{$K_sc} = 'NIB';
9829 push @K_stack, [ $KK, 1 ];
9830 my $Kc = $K_closing_container->{$seqno};
9831 push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
9833 return unless (@K_stack);
9834 @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
9836 # Then loop to remove indentation within marked blocks
9839 foreach my $item (@K_stack) {
9840 my ( $KK, $inc ) = @{$item};
9843 foreach ( $KK_last + 1 .. $KK ) {
9844 $radjusted_levels->[$_] -= $ndeep;
9847 # We just subtracted the old $ndeep value, which only applies to a
9848 # '{'. The new $ndeep applies to a '}', so we undo the error.
9849 if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
9858 sub whitespace_cycle_adjustment {
9862 # Called once per file to implement the --whitespace-cycle option
9863 my $rLL = $self->[_rLL_];
9864 return unless ( defined($rLL) && @{$rLL} );
9865 my $radjusted_levels = $self->[_radjusted_levels_];
9866 my $maximum_level = $self->[_maximum_level_];
9868 if ( $rOpts_whitespace_cycle
9869 && $rOpts_whitespace_cycle > 0
9870 && $rOpts_whitespace_cycle < $maximum_level )
9873 my $Kmax = @{$rLL} - 1;
9875 my $whitespace_last_level = -1;
9876 my @whitespace_level_stack = ();
9877 my $last_nonblank_type = 'b';
9878 my $last_nonblank_token = '';
9879 foreach my $KK ( 0 .. $Kmax ) {
9880 my $level_abs = $radjusted_levels->[$KK];
9881 my $level = $level_abs;
9882 if ( $level_abs < $whitespace_last_level ) {
9883 pop(@whitespace_level_stack);
9885 if ( !@whitespace_level_stack ) {
9886 push @whitespace_level_stack, $level_abs;
9888 elsif ( $level_abs > $whitespace_last_level ) {
9889 $level = $whitespace_level_stack[-1] +
9890 ( $level_abs - $whitespace_last_level );
9893 # 1 Try to break at a block brace
9895 $level > $rOpts_whitespace_cycle
9896 && $last_nonblank_type eq '{'
9897 && $last_nonblank_token eq '{'
9900 # 2 Then either a brace or bracket
9901 || ( $level > $rOpts_whitespace_cycle + 1
9902 && $last_nonblank_token =~ /^[\{\[]$/ )
9904 # 3 Then a paren too
9905 || $level > $rOpts_whitespace_cycle + 2
9910 push @whitespace_level_stack, $level;
9912 $level = $whitespace_level_stack[-1];
9913 $radjusted_levels->[$KK] = $level;
9915 $whitespace_last_level = $level_abs;
9916 my $type = $rLL->[$KK]->[_TYPE_];
9917 my $token = $rLL->[$KK]->[_TOKEN_];
9918 if ( $type ne 'b' ) {
9919 $last_nonblank_type = $type;
9920 $last_nonblank_token = $token;
9927 use constant DEBUG_BBX => 0;
9929 sub break_before_list_opening_containers {
9933 # This routine is called once per batch to implement parameters
9934 # --break-before-hash-brace=n and similar -bbx=n flags
9935 # and their associated indentation flags:
9936 # --break-before-hash-brace-and-indent and similar -bbxi=n
9938 # Nothing to do if none of the -bbx=n parameters has been set
9939 return unless %break_before_container_types;
9941 my $rLL = $self->[_rLL_];
9942 return unless ( defined($rLL) && @{$rLL} );
9944 # Loop over all opening container tokens
9945 my $K_opening_container = $self->[_K_opening_container_];
9946 my $K_closing_container = $self->[_K_closing_container_];
9947 my $ris_broken_container = $self->[_ris_broken_container_];
9948 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
9949 my $rhas_list = $self->[_rhas_list_];
9950 my $rhas_broken_list = $self->[_rhas_broken_list_];
9951 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
9952 my $radjusted_levels = $self->[_radjusted_levels_];
9953 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
9954 my $rlines = $self->[_rlines_];
9955 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
9956 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
9957 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
9958 my $rK_weld_right = $self->[_rK_weld_right_];
9959 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9962 max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
9963 if ($rOpts_ignore_old_breakpoints) {
9965 # Patch suggested by b1231; the old tol was excessive.
9966 ## $length_tol += $rOpts_maximum_line_length;
9970 my $rbreak_before_container_by_seqno = {};
9971 my $rwant_reduced_ci = {};
9972 foreach my $seqno ( keys %{$K_opening_container} ) {
9974 #----------------------------------------------------------------
9975 # Part 1: Examine any -bbx=n flags
9976 #----------------------------------------------------------------
9978 next if ( $rblock_type_of_seqno->{$seqno} );
9979 my $KK = $K_opening_container->{$seqno};
9981 # This must be a list or contain a list.
9982 # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
9983 # Note2: 'has_list' holds the depth to the sub-list. We will require
9985 my $is_list = $self->is_list_by_seqno($seqno);
9986 my $has_list = $rhas_list->{$seqno};
9988 # Fix for b1173: if welded opening container, use flag of innermost
9989 # seqno. Otherwise, the restriction $has_list==1 prevents triple and
9990 # higher welds from following the -BBX parameters.
9991 if ($total_weld_count) {
9992 my $KK_test = $rK_weld_right->{$KK};
9993 if ( defined($KK_test) ) {
9994 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
9995 $is_list ||= $self->is_list_by_seqno($seqno_inner);
9996 $has_list = $rhas_list->{$seqno_inner};
10000 next unless ( $is_list || $has_list && $has_list == 1 );
10002 my $has_broken_list = $rhas_broken_list->{$seqno};
10003 my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
10005 # Only for types of container tokens with a non-default break option
10006 my $token = $rLL->[$KK]->[_TOKEN_];
10007 my $break_option = $break_before_container_types{$token};
10008 next unless ($break_option);
10010 # Do not use -bbx under stress for stability ... fixes b1300
10011 my $level = $rLL->[$KK]->[_LEVEL_];
10012 if ( $level >= $stress_level_beta ) {
10015 "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
10019 # Require previous nonblank to be '=' or '=>'
10020 my $Kprev = $KK - 1;
10021 next if ( $Kprev < 0 );
10022 my $prev_type = $rLL->[$Kprev]->[_TYPE_];
10023 if ( $prev_type eq 'b' ) {
10025 next if ( $Kprev < 0 );
10026 $prev_type = $rLL->[$Kprev]->[_TYPE_];
10028 next unless ( $is_equal_or_fat_comma{$prev_type} );
10030 my $ci = $rLL->[$KK]->[_CI_LEVEL_];
10032 #--------------------------------------------
10033 # New coding for option 2 (break if complex).
10034 #--------------------------------------------
10035 # This new coding uses clues which are invariant under formatting to
10036 # decide if a list is complex. For now it is only applied when -lp
10037 # and -vmll are used, but eventually it may become the standard method.
10038 # Fixes b1274, b1275, and others, including b1099.
10039 if ( $break_option == 2 ) {
10041 if ( $rOpts_line_up_parentheses
10042 || $rOpts_variable_maximum_line_length )
10045 # Start with the basic definition of a complex list...
10046 my $is_complex = $is_list && $has_list;
10048 # and it is also complex if the parent is a list
10049 if ( !$is_complex ) {
10050 my $parent = $rparent_of_seqno->{$seqno};
10051 if ( $self->is_list_by_seqno($parent) ) {
10056 # finally, we will call it complex if there are inner opening
10057 # and closing container tokens, not parens, within the outer
10058 # container tokens.
10059 if ( !$is_complex ) {
10060 my $Kp = $self->K_next_nonblank($KK);
10061 my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
10062 if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
10064 my $Kc = $K_closing_container->{$seqno};
10065 my $Km = $self->K_previous_nonblank($Kc);
10067 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10069 # ignore any optional ending comma
10070 if ( $token_m eq ',' ) {
10071 $Km = $self->K_previous_nonblank($Km);
10073 defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10077 $is_closing_token{$token_m} && $token_m ne ')';
10081 # Convert to option 3 (always break) if complex
10082 next unless ($is_complex);
10087 # Fix for b1231: the has_list_with_lec does not cover all cases.
10088 # A broken container containing a list and with line-ending commas
10089 # will stay broken, so can be treated as if it had a list with lec.
10090 $has_list_with_lec ||=
10092 && $ris_broken_container->{$seqno}
10093 && $rlec_count_by_seqno->{$seqno};
10097 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
10099 # -bbx=1 = stable, try to follow input
10100 if ( $break_option == 1 ) {
10102 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
10103 my $rK_range = $rlines->[$iline]->{_rK_range};
10104 my ( $Kfirst, $Klast ) = @{$rK_range};
10105 next unless ( $KK == $Kfirst );
10108 # -bbx=2 => apply this style only for a 'complex' list
10109 elsif ( $break_option == 2 ) {
10111 # break if this list contains a broken list with line-ending comma
10114 if ($has_list_with_lec) {
10116 DEBUG_BBX && do { $Msg = "has list with lec;" };
10119 if ( !$ok_to_break ) {
10121 # Turn off -xci if -bbx=2 and this container has a sublist but
10122 # not a broken sublist. This avoids creating blinkers. The
10123 # problem is that -xci can cause one-line lists to break open,
10124 # and thereby creating formatting instability.
10125 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
10126 # b1045 b1046 b1047 b1051 b1052 b1061.
10127 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
10129 my $parent = $rparent_of_seqno->{$seqno};
10130 if ( $self->is_list_by_seqno($parent) ) {
10131 DEBUG_BBX && do { $Msg = "parent is list" };
10136 if ( !$ok_to_break ) {
10138 && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
10143 && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
10145 # Patch: turn off -xci if -bbx=2 and -lp
10146 # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
10147 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
10150 # -bbx=3 = always break
10151 elsif ( $break_option == 3 ) {
10156 # Shouldn't happen! Bad flag, but make behavior same as 3
10161 # Set a flag for actual implementation later in
10162 # sub insert_breaks_before_list_opening_containers
10163 $rbreak_before_container_by_seqno->{$seqno} = 1;
10165 && print STDOUT "BBX: ok to break at seqno=$seqno\n";
10167 # -bbxi=0: Nothing more to do if the ci value remains unchanged
10168 my $ci_flag = $container_indentation_options{$token};
10169 next unless ($ci_flag);
10171 # -bbxi=1: This option removes ci and is handled in
10172 # later sub final_indentation_adjustment
10173 if ( $ci_flag == 1 ) {
10174 $rwant_reduced_ci->{$seqno} = 1;
10180 #----------------------------------------------------------------
10181 # Part 2: Perform tests before committing to changing ci and level
10182 #----------------------------------------------------------------
10184 # Before changing the ci level of the opening container, we need
10185 # to be sure that the container will be broken in the later stages of
10186 # formatting. We have to do this because we are working early in the
10187 # formatting pipeline. A problem can occur if we change the ci or
10188 # level of the opening token but do not actually break the container
10189 # open as expected. In most cases it wouldn't make any difference if
10190 # we changed ci or not, but there are some edge cases where this
10191 # can cause blinking states, so we need to try to only change ci if
10192 # the container will really be broken.
10194 # Only consider containers already broken
10195 next if ( !$ris_broken_container->{$seqno} );
10197 # Patch to fix issue b1305: the combination of -naws and ci>i appears
10198 # to cause an instability. It should almost never occur in practice.
10200 if (!$rOpts_add_whitespace
10201 && $rOpts_continuation_indentation > $rOpts_indent_columns );
10203 # Always ok to change ci for permanently broken containers
10204 if ( $ris_permanently_broken->{$seqno} ) {
10208 # Always OK if this list contains a broken sub-container with
10209 # a non-terminal line-ending comma
10210 if ($has_list_with_lec) { goto OK }
10212 # From here on we are considering a single container...
10214 # A single container must have at least 1 line-ending comma:
10215 next unless ( $rlec_count_by_seqno->{$seqno} );
10217 # Since it has a line-ending comma, it will stay broken if the -boc
10219 if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
10221 # OK if the container contains multiple fat commas
10222 # Better: multiple lines with fat commas
10223 if ( !$rOpts_ignore_old_breakpoints ) {
10224 my $rtype_count = $rtype_count_by_seqno->{$seqno};
10225 next unless ($rtype_count);
10226 my $fat_comma_count = $rtype_count->{'=>'};
10228 && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
10229 if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
10232 # The last check we can make is to see if this container could fit on a
10233 # single line. Use the least possble indentation in the estmate (ci=0),
10234 # so we are not subtracting $ci * $rOpts_continuation_indentation from
10235 # tablulated $maximum_text_length value.
10236 my $maximum_text_length = $maximum_text_length_at_level[$level];
10237 my $K_closing = $K_closing_container->{$seqno};
10238 my $length = $self->cumulative_length_before_K($K_closing) -
10239 $self->cumulative_length_before_K($KK);
10240 my $excess_length = $length - $maximum_text_length;
10243 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
10245 # OK if the net container definitely breaks on length
10246 if ( $excess_length > $length_tol ) {
10248 && print STDOUT "BBX: excess_length=$excess_length\n";
10252 # Otherwise skip it
10255 #################################################################
10256 # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
10257 #################################################################
10261 DEBUG_BBX && print STDOUT "BBX: OK to break\n";
10269 # n=0 default indentation (usually one ci)
10270 # n=1 outdent one ci
10271 # n=2 indent one level (minus one ci)
10272 # n=3 indent one extra ci [This may be dropped]
10274 # NOTE: We are adjusting indentation of the opening container. The
10275 # closing container will normally follow the indentation of the opening
10276 # container automatically, so this is not currently done.
10279 # option 1: outdent
10280 if ( $ci_flag == 1 ) {
10284 # option 2: indent one level
10285 elsif ( $ci_flag == 2 ) {
10287 $radjusted_levels->[$KK] += 1;
10292 # Shouldn't happen - leave ci unchanged
10295 $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
10298 $self->[_rbreak_before_container_by_seqno_] =
10299 $rbreak_before_container_by_seqno;
10300 $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
10304 use constant DEBUG_XCI => 0;
10308 # This routine implements the -xci (--extended-continuation-indentation)
10309 # flag. We add CI to interior tokens of a container which itself has CI but
10310 # only if a token does not already have CI.
10312 # To do this, we will locate opening tokens which themselves have
10313 # continuation indentation (CI). We track them with their sequence
10314 # numbers. These sequence numbers are called 'controlling sequence
10315 # numbers'. They apply continuation indentation to the tokens that they
10316 # contain. These inner tokens remember their controlling sequence numbers.
10317 # Later, when these inner tokens are output, they have to see if the output
10318 # lines with their controlling tokens were output with CI or not. If not,
10319 # then they must remove their CI too.
10321 # The controlling CI concept works hierarchically. But CI itself is not
10322 # hierarchical; it is either on or off. There are some rare instances where
10323 # it would be best to have hierarchical CI too, but not enough to be worth
10324 # the programming effort.
10326 # The operations to remove unwanted CI are done in sub 'undo_ci'.
10330 my $rLL = $self->[_rLL_];
10331 return unless ( defined($rLL) && @{$rLL} );
10333 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
10334 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
10335 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
10336 my $rlines = $self->[_rlines_];
10337 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
10338 my $ris_bli_container = $self->[_ris_bli_container_];
10339 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10341 my %available_space;
10343 # Loop over all opening container tokens
10344 my $K_opening_container = $self->[_K_opening_container_];
10345 my $K_closing_container = $self->[_K_closing_container_];
10346 my $ris_broken_container = $self->[_ris_broken_container_];
10350 my $KNEXT = $self->[_K_first_seq_item_];
10352 # The following variable can be used to allow a little extra space to
10353 # avoid blinkers. A value $len_tol = 20 fixed the following
10354 # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
10355 # It turned out that the real problem was misparsing a list brace as
10356 # a code block in a 'use' statement when the line length was extremely
10357 # small. A value of 0 works now, but a slightly larger value can
10358 # be used to minimize the chance of a blinker.
10361 while ( defined($KNEXT) ) {
10363 # Fix all tokens up to the next sequence item if we are changing CI
10366 my $is_list = $ris_list_by_seqno->{$seqno_top};
10367 my $space = $available_space{$seqno_top};
10368 my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
10370 for ( my $Kt = $KLAST + 1 ; $Kt < $KNEXT ; $Kt++ ) {
10372 # But do not include tokens which might exceed the line length
10373 # and are not in a list.
10374 # ... This fixes case b1031
10375 my $length_before = $length;
10376 $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
10378 !$rLL->[$Kt]->[_CI_LEVEL_]
10380 || $length - $length_before < $space
10381 || $rLL->[$Kt]->[_TYPE_] eq '#' )
10384 $rLL->[$Kt]->[_CI_LEVEL_] = 1;
10385 $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
10389 $ris_seqno_controlling_ci->{$seqno_top} += $count;
10394 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
10396 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10397 my $K_opening = $K_opening_container->{$seqno};
10399 # see if we have reached the end of the current controlling container
10400 if ( $seqno_top && $seqno == $seqno_top ) {
10401 $seqno_top = pop @seqno_stack;
10404 # Patch to fix some block types...
10405 # Certain block types arrive from the tokenizer without CI but should
10406 # have it for this option. These include anonymous subs and
10407 # do sort map grep eval
10408 my $block_type = $rblock_type_of_seqno->{$seqno};
10409 if ( $block_type && $is_block_with_ci{$block_type} ) {
10410 $rLL->[$KK]->[_CI_LEVEL_] = 1;
10412 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10413 $ris_seqno_controlling_ci->{$seqno_top}++;
10417 # If this does not have ci, update ci if necessary and continue looking
10418 if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
10420 $rLL->[$KK]->[_CI_LEVEL_] = 1;
10421 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10422 $ris_seqno_controlling_ci->{$seqno_top}++;
10427 # Skip if requested by -bbx to avoid blinkers
10428 if ( $rno_xci_by_seqno->{$seqno} ) {
10432 # Skip if this is a -bli container (this fixes case b1065) Note: case
10433 # b1065 is also fixed by the update for b1055, so this update is not
10434 # essential now. But there does not seem to be a good reason to add
10435 # xci and bli together, so the update is retained.
10436 if ( $ris_bli_container->{$seqno} ) {
10440 # We are looking for opening container tokens with ci
10441 next unless ( defined($K_opening) && $KK == $K_opening );
10443 # Make sure there is a corresponding closing container
10444 # (could be missing if the script has a brace error)
10445 my $K_closing = $K_closing_container->{$seqno};
10446 next unless defined($K_closing);
10448 # Require different input lines. This will filter out a large number
10449 # of small hash braces and array brackets. If we accidentally filter
10450 # out an important container, it will get fixed on the next pass.
10452 $rLL->[$K_opening]->[_LINE_INDEX_] ==
10453 $rLL->[$K_closing]->[_LINE_INDEX_]
10454 && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
10455 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
10456 $rOpts_maximum_line_length )
10460 && print "XCI: Skipping seqno=$seqno, require different lines\n";
10464 # Do not apply -xci if adding extra ci will put the container contents
10465 # beyond the line length limit (fixes cases b899 b935)
10466 my $level = $rLL->[$K_opening]->[_LEVEL_];
10467 my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
10468 my $maximum_text_length =
10469 $maximum_text_length_at_level[$level] -
10470 $ci_level * $rOpts_continuation_indentation;
10472 # Fix for b1197 b1198 b1199 b1200 b1201 b1202
10473 # Do not apply -xci if we are running out of space
10474 if ( $level >= $stress_level_beta ) {
10477 "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
10481 # remember how much space is available for patch b1031 above
10483 $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
10485 if ( $space < 0 ) {
10486 DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
10489 DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
10491 $available_space{$seqno} = $space;
10493 # This becomes the next controlling container
10494 push @seqno_stack, $seqno_top if ($seqno_top);
10495 $seqno_top = $seqno;
10500 sub braces_left_setup {
10502 # Called once per file to mark all -bl, -sbl, and -asbl containers
10505 my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
10506 my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
10507 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
10508 return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
10510 my $rLL = $self->[_rLL_];
10511 return unless ( defined($rLL) && @{$rLL} );
10513 # We will turn on this hash for braces controlled by these flags:
10514 my $rbrace_left = $self->[_rbrace_left_];
10516 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10517 my $ris_asub_block = $self->[_ris_asub_block_];
10518 my $ris_sub_block = $self->[_ris_sub_block_];
10519 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10521 my $block_type = $rblock_type_of_seqno->{$seqno};
10523 # use -asbl flag for an anonymous sub block
10524 if ( $ris_asub_block->{$seqno} ) {
10526 $rbrace_left->{$seqno} = 1;
10530 # use -sbl flag for a named sub
10531 elsif ( $ris_sub_block->{$seqno} ) {
10533 $rbrace_left->{$seqno} = 1;
10537 # use -bl flag if not a sub block of any type
10540 && $block_type =~ /$bl_pattern/
10541 && $block_type !~ /$bl_exclusion_pattern/ )
10543 $rbrace_left->{$seqno} = 1;
10550 sub bli_adjustment {
10552 # Called once per file to implement the --brace-left-and-indent option.
10553 # If -bli is set, adds one continuation indentation for certain braces
10555 return unless ( $rOpts->{'brace-left-and-indent'} );
10556 my $rLL = $self->[_rLL_];
10557 return unless ( defined($rLL) && @{$rLL} );
10559 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10560 my $ris_bli_container = $self->[_ris_bli_container_];
10561 my $rbrace_left = $self->[_rbrace_left_];
10562 my $K_opening_container = $self->[_K_opening_container_];
10563 my $K_closing_container = $self->[_K_closing_container_];
10565 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10566 my $block_type = $rblock_type_of_seqno->{$seqno};
10568 && $block_type =~ /$bli_pattern/
10569 && $block_type !~ /$bli_exclusion_pattern/ )
10571 $ris_bli_container->{$seqno} = 1;
10572 $rbrace_left->{$seqno} = 1;
10573 my $Ko = $K_opening_container->{$seqno};
10574 my $Kc = $K_closing_container->{$seqno};
10575 if ( defined($Ko) && defined($Kc) ) {
10576 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
10583 sub find_multiline_qw {
10587 # Multiline qw quotes are not sequenced items like containers { [ (
10588 # but behave in some respects in a similar way. So this routine finds them
10589 # and creates a separate sequence number system for later use.
10591 # This is straightforward because they always begin at the end of one line
10592 # and and at the beginning of a later line. This is true no matter how we
10593 # finally make our line breaks, so we can find them before deciding on new
10596 my $rstarting_multiline_qw_seqno_by_K = {};
10597 my $rending_multiline_qw_seqno_by_K = {};
10598 my $rKrange_multiline_qw_by_seqno = {};
10599 my $rmultiline_qw_has_extra_level = {};
10601 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
10603 my $rlines = $self->[_rlines_];
10604 my $rLL = $self->[_rLL_];
10606 my $num_qw_seqno = 0;
10607 my $K_start_multiline_qw;
10609 foreach my $line_of_tokens ( @{$rlines} ) {
10611 my $line_type = $line_of_tokens->{_line_type};
10612 next unless ( $line_type eq 'CODE' );
10613 my $rK_range = $line_of_tokens->{_rK_range};
10614 my ( $Kfirst, $Klast ) = @{$rK_range};
10615 next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
10616 if ( defined($K_start_multiline_qw) ) {
10617 my $type = $rLL->[$Kfirst]->[_TYPE_];
10620 if ( $type ne 'q' ) {
10621 DEVEL_MODE && print STDERR <<EOM;
10622 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
10624 $K_start_multiline_qw = undef;
10627 my $Kprev = $self->K_previous_nonblank($Kfirst);
10628 my $Knext = $self->K_next_nonblank($Kfirst);
10629 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10630 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10631 if ( $type_m eq 'q' && $type_p ne 'q' ) {
10632 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
10633 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
10634 [ $K_start_multiline_qw, $Kfirst ];
10635 $K_start_multiline_qw = undef;
10639 if ( !defined($K_start_multiline_qw)
10640 && $rLL->[$Klast]->[_TYPE_] eq 'q' )
10642 my $Kprev = $self->K_previous_nonblank($Klast);
10643 my $Knext = $self->K_next_nonblank($Klast);
10644 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10645 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10646 if ( $type_m ne 'q' && $type_p eq 'q' ) {
10648 $qw_seqno = 'q' . $num_qw_seqno;
10649 $K_start_multiline_qw = $Klast;
10650 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
10655 # Give multiline qw lists extra indentation instead of CI. This option
10656 # works well but is currently only activated when the -xci flag is set.
10657 # The reason is to avoid unexpected changes in formatting.
10658 if ($rOpts_extended_continuation_indentation) {
10659 while ( my ( $qw_seqno, $rKrange ) =
10660 each %{$rKrange_multiline_qw_by_seqno} )
10662 my ( $Kbeg, $Kend ) = @{$rKrange};
10664 # require isolated closing token
10665 my $token_end = $rLL->[$Kend]->[_TOKEN_];
10667 unless ( length($token_end) == 1
10668 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
10670 # require isolated opening token
10671 my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
10673 # allow space(s) after the qw
10674 if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
10676 $token_beg =~ s/\s+//;
10679 next unless ( length($token_beg) == 3 );
10681 foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
10682 $rLL->[$KK]->[_LEVEL_]++;
10683 $rLL->[$KK]->[_CI_LEVEL_] = 0;
10686 # set flag for -wn option, which will remove the level
10687 $rmultiline_qw_has_extra_level->{$qw_seqno} = 1;
10691 # For the -lp option we need to mark all parent containers of
10693 if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
10695 while ( my ( $qw_seqno, $rKrange ) =
10696 each %{$rKrange_multiline_qw_by_seqno} )
10698 my ( $Kbeg, $Kend ) = @{$rKrange};
10699 my $parent_seqno = $self->parent_seqno_by_K($Kend);
10700 next unless ($parent_seqno);
10702 # If the parent container exactly surrounds this qw, then -lp
10703 # formatting seems to work so we will not mark it.
10704 my $is_tightly_contained;
10705 my $Kn = $self->K_next_nonblank($Kend);
10706 my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
10707 if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
10709 my $Kp = $self->K_previous_nonblank($Kbeg);
10711 defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
10712 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
10713 $is_tightly_contained = 1;
10717 $ris_excluded_lp_container->{$parent_seqno} = 1
10718 unless ($is_tightly_contained);
10720 # continue up the tree marking parent containers
10722 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
10724 unless ( defined($parent_seqno)
10725 && $parent_seqno ne SEQ_ROOT );
10726 $ris_excluded_lp_container->{$parent_seqno} = 1;
10731 $self->[_rstarting_multiline_qw_seqno_by_K_] =
10732 $rstarting_multiline_qw_seqno_by_K;
10733 $self->[_rending_multiline_qw_seqno_by_K_] =
10734 $rending_multiline_qw_seqno_by_K;
10735 $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
10736 $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
10741 use constant DEBUG_COLLAPSED_LENGTHS => 0;
10743 # Minimum space reserved for contents of a code block. A value of 40 has given
10744 # reasonable results. With a large line length, say -l=120, this will not
10745 # normally be noticable but it will prevent making a mess in some edge cases.
10746 use constant MIN_BLOCK_LEN => 40;
10748 my %is_handle_type;
10751 my @q = qw( w C U G i k => );
10752 @is_handle_type{@q} = (1) x scalar(@q);
10756 _max_prong_len_ => $i++,
10757 _handle_len_ => $i++,
10762 _interrupted_list_rule_ => $i++,
10766 sub collapsed_lengths {
10770 #----------------------------------------------------------------
10771 # Define the collapsed lengths of containers for -xlp indentation
10772 #----------------------------------------------------------------
10774 # We need an estimate of the minimum required line length starting at any
10775 # opening container for the -xlp style. This is needed to avoid using too
10776 # much indentation space for lower level containers and thereby running
10777 # out of space for outer container tokens due to the maximum line length
10780 # The basic idea is that at each node in the tree we imagine that we have a
10781 # fork with a handle and collapsable prongs:
10785 # ------------|-------
10786 # handle |------------
10790 # Each prong has a minimum collapsed length. The collapsed length at a node
10791 # is the maximum of these minimum lengths, plus the handle length. Each of
10792 # the prongs may itself be a tree node.
10794 # This is just a rough calculation to get an approximate starting point for
10795 # indentation. Later routines will be more precise. It is important that
10796 # these estimates be independent of the line breaks of the input stream in
10797 # order to avoid instabilities.
10799 my $rLL = $self->[_rLL_];
10800 my $Klimit = $self->[_Klimit_];
10801 my $rlines = $self->[_rlines_];
10802 my $K_opening_container = $self->[_K_opening_container_];
10803 my $K_closing_container = $self->[_K_closing_container_];
10804 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10805 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
10806 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
10807 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
10808 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
10809 my $rhas_broken_list = $self->[_rhas_broken_list_];
10811 my $K_start_multiline_qw;
10812 my $level_start_multiline_qw = 0;
10813 my $max_prong_len = 0;
10814 my $handle_len = 0;
10817 my $last_nonblank_type = 'b';
10819 [ $max_prong_len, $handle_len, SEQ_ROOT, undef, undef, undef, undef ];
10822 foreach my $line_of_tokens ( @{$rlines} ) {
10824 my $line_type = $line_of_tokens->{_line_type};
10825 next if ( $line_type ne 'CODE' );
10826 my $CODE_type = $line_of_tokens->{_code_type};
10828 # Always skip blank lines
10829 next if ( $CODE_type eq 'BL' );
10831 # Note on other line types:
10832 # 'FS' (Format Skipping) lines may contain opening/closing tokens so
10833 # we have to process them to keep the stack correctly sequenced.
10834 # 'VB' (Verbatim) lines could be skipped, but testing shows that
10835 # results look better if we include their lengths.
10837 # Also note that we could exclude -xlp formatting of containers with
10838 # 'FS' and 'VB' lines, but in testing that was not really beneficial.
10840 # So we process tokens in 'FS' and 'VB' lines like all the rest...
10842 my $rK_range = $line_of_tokens->{_rK_range};
10843 my ( $K_first, $K_last ) = @{$rK_range};
10844 next unless ( defined($K_first) && defined($K_last) );
10846 my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
10848 # Always ignore block comments
10849 next if ( $has_comment && $K_first == $K_last );
10851 # Handle an intermediate line of a multiline qw quote. These may
10852 # require including some -ci or -i spaces. See cases c098/x063.
10853 # Updated to check all lines (not just $K_first==$K_last) to fix b1316
10854 my $K_begin_loop = $K_first;
10855 if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
10858 my $level = $rLL->[$KK]->[_LEVEL_];
10859 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
10861 # remember the level of the start
10862 if ( !defined($K_start_multiline_qw) ) {
10863 $K_start_multiline_qw = $K_first;
10864 $level_start_multiline_qw = $level;
10866 $self->[_rstarting_multiline_qw_seqno_by_K_]
10867 ->{$K_start_multiline_qw};
10868 if ( !$seqno_qw ) {
10869 my $Kp = $self->K_previous_nonblank($K_first);
10870 if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
10872 $K_start_multiline_qw = $Kp;
10873 $level_start_multiline_qw =
10874 $rLL->[$K_start_multiline_qw]->[_LEVEL_];
10879 $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
10880 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
10882 # We may have to add the spaces of one level or ci level ... it
10883 # depends depends on the -xci flag, the -wn flag, and if the qw
10884 # uses a container token as the quote delimiter.
10886 # First rule: add ci if there is a $ci_level
10888 $len += $rOpts_continuation_indentation;
10891 # Second rule: otherwise, look for an extra indentation level
10892 # from the start and add one indentation level if found.
10893 elsif ( $level > $level_start_multiline_qw ) {
10894 $len += $rOpts_indent_columns;
10897 if ( $len > $max_prong_len ) { $max_prong_len = $len }
10899 $last_nonblank_type = 'q';
10901 $K_begin_loop = $K_first + 1;
10903 # We can skip to the next line if more tokens
10904 next if ( $K_begin_loop > $K_last );
10907 $K_start_multiline_qw = undef;
10909 # Find the terminal token, before any side comment
10910 my $K_terminal = $K_last;
10911 if ($has_comment) {
10914 if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
10915 && $K_terminal > $K_first );
10918 # Use length to terminal comma if interrupded list rule applies
10919 if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
10920 my $K_c = $stack[-1]->[_K_c_];
10923 && $rLL->[$K_terminal]->[_TYPE_] eq ','
10925 # Ignore a terminal comma, causes instability (b1297)
10926 && ( $K_c - $K_terminal > 2
10927 || $rLL->[ $K_terminal + 1 ]->[_TYPE_] eq 'b' )
10930 my $Kend = $K_terminal;
10932 # This caused an instability in b1311 by making the result
10933 # dependent on input. It is not really necessary because the
10934 # comment length is added at the end of the loop.
10935 ##if ( $has_comment
10936 ## && !$rOpts_ignore_side_comment_lengths )
10938 ## $Kend = $K_last;
10941 $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
10942 $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
10944 if ( $len > $max_prong_len ) { $max_prong_len = $len }
10948 # Loop over tokens on this line ...
10949 foreach my $KK ( $K_begin_loop .. $K_terminal ) {
10951 my $type = $rLL->[$KK]->[_TYPE_];
10952 next if ( $type eq 'b' );
10954 #------------------------
10955 # Handle sequenced tokens
10956 #------------------------
10957 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10960 my $token = $rLL->[$KK]->[_TOKEN_];
10962 #----------------------------
10963 # Entering a new container...
10964 #----------------------------
10965 if ( $is_opening_token{$token} ) {
10967 # save current prong length
10968 $stack[-1]->[_max_prong_len_] = $max_prong_len;
10969 $max_prong_len = 0;
10971 # Start new prong one level deeper
10972 my $handle_len = 0;
10973 if ( $rblock_type_of_seqno->{$seqno} ) {
10975 # code blocks do not use -lp indentation, but behave as
10976 # if they had a handle of one indentation length
10977 $handle_len = $rOpts_indent_columns;
10980 elsif ( $is_handle_type{$last_nonblank_type} ) {
10981 $handle_len = $len;
10983 if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
10986 # Set a flag if the 'Interrupted List Rule' will be applied
10987 # (see sub copy_old_breakpoints).
10988 # - Added check on has_broken_list to fix issue b1298
10990 my $interrupted_list_rule =
10991 $ris_permanently_broken->{$seqno}
10992 && $ris_list_by_seqno->{$seqno}
10993 && !$rhas_broken_list->{$seqno}
10994 && !$rOpts_ignore_old_breakpoints;
10996 # NOTES: Since we are looking at old line numbers we have
10997 # to be very careful not to introduce an instability.
10999 # This following causes instability (b1288-b1296):
11000 # $interrupted_list_rule ||=
11001 # $rOpts_break_at_old_comma_breakpoints;
11003 # - We could turn off the interrupted list rule if there is
11004 # a broken sublist, to follow 'Compound List Rule 1'.
11005 # - We could use the _rhas_broken_list_ flag for this.
11006 # - But it seems safer not to do this, to avoid
11007 # instability, since the broken sublist could be
11008 # temporary. It seems better to let the formatting
11009 # stabilize by itself after one or two iterations.
11010 # - So, not doing this for now
11012 # Include length to a comma ending this line
11013 if ( $interrupted_list_rule
11014 && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
11016 my $Kend = $K_terminal;
11017 if ( $Kend < $K_last
11018 && !$rOpts_ignore_side_comment_lengths )
11023 # Measure from the next blank if any (fixes b1301)
11025 if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
11031 my $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
11032 $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
11033 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11036 my $K_c = $K_closing_container->{$seqno};
11040 $max_prong_len, $handle_len,
11043 $interrupted_list_rule
11047 #--------------------
11048 # Exiting a container
11049 #--------------------
11050 elsif ( $is_closing_token{$token} ) {
11053 # The current prong ends - get its handle
11054 my $item = pop @stack;
11055 my $handle_len = $item->[_handle_len_];
11056 my $seqno_o = $item->[_seqno_o_];
11057 my $iline_o = $item->[_iline_o_];
11058 my $K_o = $item->[_K_o_];
11059 my $K_c_expect = $item->[_K_c_];
11060 my $collapsed_len = $max_prong_len;
11062 if ( $seqno_o ne $seqno ) {
11064 # Shouldn't happen - must have skipped some lines.
11065 # Not fatal but -lp formatting could get messed up.
11068 sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
11073 #------------------------------------------
11074 # Rules to avoid scrunching code blocks ...
11075 #------------------------------------------
11077 # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
11078 if ( $rblock_type_of_seqno->{$seqno} ) {
11081 my $block_length = MIN_BLOCK_LEN;
11082 my $is_one_line_block;
11083 my $level = $rLL->[$K_o]->[_LEVEL_];
11084 if ( defined($K_o) && defined($K_c) ) {
11086 $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
11087 $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
11088 $is_one_line_block = $iline == $iline_o;
11091 # Code block rule 1: Use the total block length if
11092 # it is less than the minimum.
11093 if ( $block_length < MIN_BLOCK_LEN ) {
11094 $collapsed_len = $block_length;
11097 # Code block rule 2: Use the full length of a
11098 # one-line block to avoid breaking it, unless
11099 # extremely long. We do not need to do a precise
11100 # check here, because if it breaks then it will
11101 # stay broken on later iterations.
11102 elsif ($is_one_line_block
11104 $maximum_line_length_at_level[$level] )
11106 $collapsed_len = $block_length;
11109 # Code block rule 3: Otherwise the length should be
11110 # at least MIN_BLOCK_LEN to avoid scrunching code
11112 elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
11113 $collapsed_len = MIN_BLOCK_LEN;
11117 # Store the result. Some extra space, '2', allows for
11118 # length of an opening token, inside space, comma, ...
11119 # This constant has been tuned to give good overall
11121 $collapsed_len += 2;
11122 $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
11124 # Restart scanning the lower level prong
11126 $max_prong_len = $stack[-1]->[_max_prong_len_];
11127 $collapsed_len += $handle_len;
11128 if ( $collapsed_len > $max_prong_len ) {
11129 $max_prong_len = $collapsed_len;
11135 # it is a ternary - no special processing for these yet
11141 $last_nonblank_type = $type;
11145 #----------------------------
11146 # Handle non-container tokens
11147 #----------------------------
11148 my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
11150 # Count lengths of things like 'xx => yy' as a single item
11151 if ( $type eq '=>' ) {
11152 $len += $token_length + 1;
11153 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11155 elsif ( $last_nonblank_type eq '=>' ) {
11156 $len += $token_length;
11157 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11159 # but only include one => per item
11160 if ( $last_nonblank_type eq '=>' ) { $len = $token_length }
11163 # include everthing to end of line after a here target
11164 elsif ( $type eq 'h' ) {
11165 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
11166 $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11167 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11170 # for everything else just use the token length
11172 $len = $token_length;
11173 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11175 $last_nonblank_type = $type;
11177 } ## end loop over tokens on this line
11179 # Now take care of any side comment
11180 if ($has_comment) {
11181 if ($rOpts_ignore_side_comment_lengths) {
11186 # For a side comment when -iscl is not set, measure length from
11187 # the start of the previous nonblank token
11190 ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
11192 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
11193 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11197 } ## end loop over lines
11199 if (DEBUG_COLLAPSED_LENGTHS) {
11200 print "\nCollapsed lengths--\n";
11202 my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
11204 my $clen = $rcollapsed_length_by_seqno->{$key};
11205 print "$key -> $clen\n";
11212 sub is_excluded_lp {
11214 # Decide if this container is excluded by user request:
11215 # returns true if this token is excluded (i.e., may not use -lp)
11216 # returns false otherwise
11218 # The control hash can either describe:
11219 # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
11220 # what to include: $line_up_parentheses_control_is_lxpl = 0
11222 my ( $self, $KK ) = @_;
11223 my $rLL = $self->[_rLL_];
11224 my $rtoken_vars = $rLL->[$KK];
11225 my $token = $rtoken_vars->[_TOKEN_];
11226 my $rflags = $line_up_parentheses_control_hash{$token};
11228 #-----------------------------------------------
11229 # TEST #1: check match to listed container types
11230 #-----------------------------------------------
11231 if ( !defined($rflags) ) {
11233 # There is no entry for this container, so we are done
11234 return !$line_up_parentheses_control_is_lxpl;
11237 my ( $flag1, $flag2 ) = @{$rflags};
11239 #-----------------------------------------------------------
11240 # TEST #2: check match to flag1, the preceding nonblank word
11241 #-----------------------------------------------------------
11242 my $match_flag1 = !defined($flag1) || $flag1 eq '*';
11243 if ( !$match_flag1 ) {
11245 # Find the previous token
11246 my ( $is_f, $is_k, $is_w );
11247 my $Kp = $self->K_previous_nonblank($KK);
11248 if ( defined($Kp) ) {
11249 my $type_p = $rLL->[$Kp]->[_TYPE_];
11250 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11253 $is_k = $type_p eq 'k';
11256 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
11258 # either keyword or function call?
11259 $is_w = $is_k || $is_f;
11262 # Check for match based on flag1 and the previous token:
11263 if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
11264 elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
11265 elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
11266 elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
11267 elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
11268 elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
11271 # See if we can exclude this based on the flag1 test...
11272 if ($line_up_parentheses_control_is_lxpl) {
11273 return 1 if ($match_flag1);
11276 return 1 if ( !$match_flag1 );
11279 #-------------------------------------------------------------
11280 # TEST #3: exclusion based on flag2 and the container contents
11281 #-------------------------------------------------------------
11283 # Note that this is an exclusion test for both -lpxl or -lpil input methods
11285 # 0 or blank: ignore container contents
11286 # 1 exclude non-lists or lists with sublists
11287 # 2 same as 1 but also exclude lists with code blocks
11292 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11294 my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
11295 my $has_list = $self->[_rhas_list_]->{$seqno};
11296 my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
11297 my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
11301 || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
11306 return $match_flag2;
11309 sub set_excluded_lp_containers {
11312 return unless ($rOpts_line_up_parentheses);
11313 my $rLL = $self->[_rLL_];
11314 return unless ( defined($rLL) && @{$rLL} );
11316 my $K_opening_container = $self->[_K_opening_container_];
11317 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
11318 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11320 foreach my $seqno ( keys %{$K_opening_container} ) {
11322 # code blocks are always excluded by the -lp coding so we can skip them
11323 next if ( $rblock_type_of_seqno->{$seqno} );
11325 my $KK = $K_opening_container->{$seqno};
11326 next unless defined($KK);
11328 # see if a user exclusion rule turns off -lp for this container
11329 if ( $self->is_excluded_lp($KK) ) {
11330 $ris_excluded_lp_container->{$seqno} = 1;
11336 ######################################
11337 # CODE SECTION 6: Process line-by-line
11338 ######################################
11340 sub process_all_lines {
11342 #----------------------------------------------------------
11343 # Main loop to format all lines of a file according to type
11344 #----------------------------------------------------------
11347 my $rlines = $self->[_rlines_];
11348 my $sink_object = $self->[_sink_object_];
11349 my $fh_tee = $self->[_fh_tee_];
11350 my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
11351 my $file_writer_object = $self->[_file_writer_object_];
11352 my $logger_object = $self->[_logger_object_];
11353 my $vertical_aligner_object = $self->[_vertical_aligner_object_];
11354 my $save_logfile = $self->[_save_logfile_];
11356 # Note for RT#118553, leave only one newline at the end of a file.
11357 # Example code to do this is in comments below:
11358 # my $Opt_trim_ending_blank_lines = 0;
11359 # if ($Opt_trim_ending_blank_lines) {
11360 # while ( my $line_of_tokens = pop @{$rlines} ) {
11361 # my $line_type = $line_of_tokens->{_line_type};
11362 # if ( $line_type eq 'CODE' ) {
11363 # my $CODE_type = $line_of_tokens->{_code_type};
11364 # next if ( $CODE_type eq 'BL' );
11366 # push @{$rlines}, $line_of_tokens;
11371 # But while this would be a trivial update, it would have very undesirable
11372 # side effects when perltidy is run from within an editor on a small snippet.
11373 # So this is best done with a separate filter, such
11374 # as 'delete_ending_blank_lines.pl' in the examples folder.
11376 # Flag to prevent blank lines when POD occurs in a format skipping sect.
11377 my $in_format_skipping_section;
11379 # set locations for blanks around long runs of keywords
11380 my $rwant_blank_line_after = $self->keyword_group_scan();
11382 my $line_type = "";
11383 my $i_last_POD_END = -10;
11385 foreach my $line_of_tokens ( @{$rlines} ) {
11388 # insert blank lines requested for keyword sequences
11390 && defined( $rwant_blank_line_after->{ $i - 1 } )
11391 && $rwant_blank_line_after->{ $i - 1 } == 1 )
11393 $self->want_blank_line();
11396 my $last_line_type = $line_type;
11397 $line_type = $line_of_tokens->{_line_type};
11398 my $input_line = $line_of_tokens->{_line_text};
11400 # _line_type codes are:
11401 # SYSTEM - system-specific code before hash-bang line
11402 # CODE - line of perl code (including comments)
11403 # POD_START - line starting pod, such as '=head'
11404 # POD - pod documentation text
11405 # POD_END - last line of pod section, '=cut'
11406 # HERE - text of here-document
11407 # HERE_END - last line of here-doc (target word)
11408 # FORMAT - format section
11409 # FORMAT_END - last line of format section, '.'
11410 # SKIP - code skipping section
11411 # SKIP_END - last line of code skipping section, '#>>V'
11412 # DATA_START - __DATA__ line
11413 # DATA - unidentified text following __DATA__
11414 # END_START - __END__ line
11415 # END - unidentified text following __END__
11416 # ERROR - we are in big trouble, probably not a perl script
11418 # put a blank line after an =cut which comes before __END__ and __DATA__
11419 # (required by podchecker)
11420 if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
11421 $i_last_POD_END = $i;
11422 $file_writer_object->reset_consecutive_blank_lines();
11423 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
11424 $self->want_blank_line();
11428 # handle line of code..
11429 if ( $line_type eq 'CODE' ) {
11431 my $CODE_type = $line_of_tokens->{_code_type};
11432 $in_format_skipping_section = $CODE_type eq 'FS';
11434 # Handle blank lines
11435 if ( $CODE_type eq 'BL' ) {
11437 # Keep this blank? Start with the flag -kbl=n, where
11438 # n=0 ignore all old blank lines
11439 # n=1 stable: keep old blanks, but limited by -mbl=n
11440 # n=2 keep all old blank lines, regardless of -mbl=n
11441 # If n=0 we delete all old blank lines and let blank line
11442 # rules generate any needed blank lines.
11443 my $kgb_keep = $rOpts_keep_old_blank_lines;
11445 # Then delete lines requested by the keyword-group logic if
11447 if ( $kgb_keep == 1
11448 && defined( $rwant_blank_line_after->{$i} )
11449 && $rwant_blank_line_after->{$i} == 2 )
11454 # But always keep a blank line following an =cut
11455 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
11460 $self->flush($CODE_type);
11461 $file_writer_object->write_blank_code_line(
11462 $rOpts_keep_old_blank_lines == 2 );
11463 $self->[_last_line_leading_type_] = 'b';
11469 # Let logger see all non-blank lines of code. This is a slow
11470 # operation so we avoid it if it is not going to be saved.
11471 if ( $save_logfile && $logger_object ) {
11472 $logger_object->black_box( $line_of_tokens,
11473 $vertical_aligner_object->get_output_line_number );
11477 # Handle Format Skipping (FS) and Verbatim (VB) Lines
11478 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
11479 $self->write_unindented_line("$input_line");
11480 $file_writer_object->reset_consecutive_blank_lines();
11484 # Handle all other lines of code
11485 $self->process_line_of_CODE($line_of_tokens);
11488 # handle line of non-code..
11491 # set special flags
11493 if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
11495 # Pod docs should have a preceding blank line. But stay
11496 # out of __END__ and __DATA__ sections, because
11497 # the user may be using this section for any purpose whatsoever
11498 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
11499 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
11501 && !$in_format_skipping_section
11502 && $line_type eq 'POD_START'
11503 && !$self->[_saw_END_or_DATA_] )
11505 $self->want_blank_line();
11509 # leave the blank counters in a predictable state
11510 # after __END__ or __DATA__
11511 elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
11512 $file_writer_object->reset_consecutive_blank_lines();
11513 $self->[_saw_END_or_DATA_] = 1;
11516 # Patch to avoid losing blank lines after a code-skipping block;
11518 elsif ( $line_type eq 'SKIP_END' ) {
11519 $file_writer_object->reset_consecutive_blank_lines();
11522 # write unindented non-code line
11523 if ( !$skip_line ) {
11524 $self->write_unindented_line($input_line);
11530 } ## end sub process_all_lines
11532 sub keyword_group_scan {
11535 #-------------------------------------------------------------------------
11536 # Called once per file to process any --keyword-group-blanks-* parameters.
11537 #-------------------------------------------------------------------------
11539 # Manipulate blank lines around keyword groups (kgb* flags)
11540 # Scan all lines looking for runs of consecutive lines beginning with
11541 # selected keywords. Example keywords are 'my', 'our', 'local', ... but
11542 # they may be anything. We will set flags requesting that blanks be
11543 # inserted around and within them according to input parameters. Note
11544 # that we are scanning the lines as they came in in the input stream, so
11545 # they are not necessarily well formatted.
11547 # The output of this sub is a return hash ref whose keys are the indexes of
11548 # lines after which we desire a blank line. For line index i:
11549 # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
11550 # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
11551 my $rhash_of_desires = {};
11553 # Nothing to do if no blanks can be output. This test added to fix
11555 if ( !$rOpts_maximum_consecutive_blank_lines ) {
11556 return $rhash_of_desires;
11559 my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
11560 my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
11561 my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
11562 my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
11563 my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
11565 # A range of sizes can be input with decimal notation like 'min.max' with
11566 # any number of dots between the two numbers. Examples:
11567 # string => min max matches
11568 # 1.1 1 1 exactly 1
11569 # 1.3 1 3 1,2, or 3
11570 # 1..3 1 3 1,2, or 3
11575 my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
11576 if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
11577 || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
11580 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
11581 ignoring all -kgb flags
11584 # Turn this option off so that this message does not keep repeating
11585 # during iterations and other files.
11586 $rOpts->{'keyword-group-blanks-size'} = "";
11587 return $rhash_of_desires;
11589 $Opt_size_min = 1 unless ($Opt_size_min);
11591 if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
11592 return $rhash_of_desires;
11595 # codes for $Opt_blanks_before and $Opt_blanks_after:
11596 # 0 = never (delete if exist)
11597 # 1 = stable (keep unchanged)
11598 # 2 = always (insert if missing)
11600 return $rhash_of_desires
11601 unless $Opt_size_min > 0
11602 && ( $Opt_blanks_before != 1
11603 || $Opt_blanks_after != 1
11604 || $Opt_blanks_inside
11605 || $Opt_blanks_delete );
11607 my $Opt_pattern = $keyword_group_list_pattern;
11608 my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
11609 my $Opt_repeat_count =
11610 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
11612 my $rlines = $self->[_rlines_];
11613 my $rLL = $self->[_rLL_];
11614 my $K_closing_container = $self->[_K_closing_container_];
11615 my $K_opening_container = $self->[_K_opening_container_];
11616 my $rK_weld_right = $self->[_rK_weld_right_];
11618 # variables for the current group and subgroups:
11619 my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
11623 # ($ibeg, $iend) = starting and ending line indexes of this entire group
11624 # $count = total number of keywords seen in this entire group
11625 # $level_beg = indententation level of this group
11626 # @group = [ $i, $token, $count ] =list of all keywords & blanks
11627 # @subgroup = $j, index of group where token changes
11628 # @iblanks = line indexes of blank lines in input stream in this group
11629 # where i=starting line index
11630 # token (the keyword)
11631 # count = number of this token in this subgroup
11632 # j = index in group where token changes
11634 # These vars will contain values for the most recently seen line:
11635 my ( $line_type, $CODE_type, $K_first, $K_last );
11637 my $number_of_groups_seen = 0;
11639 #-------------------
11640 # helper subroutines
11641 #-------------------
11643 my $insert_blank_after = sub {
11645 $rhash_of_desires->{$i} = 1;
11647 if ( defined( $rhash_of_desires->{$ip} )
11648 && $rhash_of_desires->{$ip} == 2 )
11650 $rhash_of_desires->{$ip} = 0;
11655 my $split_into_sub_groups = sub {
11657 # place blanks around long sub-groups of keywords
11659 return unless ($Opt_blanks_inside);
11661 # loop over sub-groups, index k
11662 push @subgroup, scalar @group;
11664 my $kend = @subgroup - 1;
11665 for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
11667 # index j runs through all keywords found
11668 my $j_b = $subgroup[ $k - 1 ];
11669 my $j_e = $subgroup[$k] - 1;
11671 # index i is the actual line number of a keyword
11672 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
11673 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
11674 my $num = $count_e - $count_b + 1;
11676 # This subgroup runs from line $ib to line $ie-1, but may contain
11678 if ( $num >= $Opt_size_min ) {
11680 # if there are blank lines, we require that at least $num lines
11681 # be non-blank up to the boundary with the next subgroup.
11682 my $nog_b = my $nog_e = 1;
11683 if ( @iblanks && !$Opt_blanks_delete ) {
11684 my $j_bb = $j_b + $num - 1;
11685 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
11686 $nog_b = $count_bb - $count_b + 1 == $num;
11688 my $j_ee = $j_e - ( $num - 1 );
11689 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
11690 $nog_e = $count_e - $count_ee + 1 == $num;
11692 if ( $nog_b && $k > $kbeg ) {
11693 $insert_blank_after->( $i_b - 1 );
11695 if ( $nog_e && $k < $kend ) {
11696 my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
11697 $insert_blank_after->( $i_ep - 1 );
11704 my $delete_if_blank = sub {
11707 # delete line $i if it is blank
11708 return unless ( $i >= 0 && $i < @{$rlines} );
11709 my $line_type = $rlines->[$i]->{_line_type};
11710 return if ( $line_type ne 'CODE' );
11711 my $code_type = $rlines->[$i]->{_code_type};
11712 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
11716 my $delete_inner_blank_lines = sub {
11718 # always remove unwanted trailing blank lines from our list
11719 return unless (@iblanks);
11720 while ( my $ibl = pop(@iblanks) ) {
11721 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
11725 # now mark mark interior blank lines for deletion if requested
11726 return unless ($Opt_blanks_delete);
11728 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
11733 my $end_group = sub {
11735 # end a group of keywords
11736 my ($bad_ending) = @_;
11737 if ( defined($ibeg) && $ibeg >= 0 ) {
11739 # then handle sufficiently large groups
11740 if ( $count >= $Opt_size_min ) {
11742 $number_of_groups_seen++;
11744 # do any blank deletions regardless of the count
11745 $delete_inner_blank_lines->();
11748 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
11750 # patch for hash bang line which is not currently marked as
11751 # a comment; mark it as a comment
11752 if ( $ibeg == 1 && !$code_type ) {
11753 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
11755 if ( $line_text && $line_text =~ /^#/ );
11758 # Do not insert a blank after a comment
11759 # (this could be subject to a flag in the future)
11760 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
11761 if ( $Opt_blanks_before == INSERT ) {
11762 $insert_blank_after->( $ibeg - 1 );
11765 elsif ( $Opt_blanks_before == DELETE ) {
11766 $delete_if_blank->( $ibeg - 1 );
11771 # We will only put blanks before code lines. We could loosen
11772 # this rule a little, but we have to be very careful because
11773 # for example we certainly don't want to drop a blank line
11774 # after a line like this:
11776 if ( $line_type eq 'CODE' && defined($K_first) ) {
11778 # - Do not put a blank before a line of different level
11779 # - Do not put a blank line if we ended the search badly
11780 # - Do not put a blank at the end of the file
11781 # - Do not put a blank line before a hanging side comment
11782 my $level = $rLL->[$K_first]->[_LEVEL_];
11783 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
11785 if ( $level == $level_beg
11788 && $iend < @{$rlines}
11789 && $CODE_type ne 'HSC' )
11791 if ( $Opt_blanks_after == INSERT ) {
11792 $insert_blank_after->($iend);
11794 elsif ( $Opt_blanks_after == DELETE ) {
11795 $delete_if_blank->( $iend + 1 );
11800 $split_into_sub_groups->();
11803 # reset for another group
11807 $K_closing = undef;
11815 my $find_container_end = sub {
11817 # If the keyword line is continued onto subsequent lines, find the
11818 # closing token '$K_closing' so that we can easily skip past the
11819 # contents of the container.
11821 # We only set this value if we find a simple list, meaning
11822 # -contents only one level deep
11825 # First check: skip if next line is not one deeper
11826 my $Knext_nonblank = $self->K_next_nonblank($K_last);
11827 goto RETURN if ( !defined($Knext_nonblank) );
11828 my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
11829 goto RETURN if ( $level_next != $level_beg + 1 );
11831 # Find the parent container of the first token on the next line
11832 my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
11833 goto RETURN unless ( defined($parent_seqno) );
11835 # Must not be a weld (can be unstable)
11837 if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
11839 # Opening container must exist and be on this line
11840 my $Ko = $K_opening_container->{$parent_seqno};
11841 goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
11843 # Verify that the closing container exists and is on a later line
11844 my $Kc = $K_closing_container->{$parent_seqno};
11845 goto RETURN unless ( defined($Kc) && $Kc > $K_last );
11855 my $add_to_group = sub {
11856 my ( $i, $token, $level ) = @_;
11858 # End the previous group if we have reached the maximum
11860 if ( $Opt_size_max && @group >= $Opt_size_max ) {
11864 if ( @group == 0 ) {
11866 $level_beg = $level;
11874 if ( !@group || $token ne $group[-1]->[1] ) {
11875 push @subgroup, scalar(@group);
11877 push @group, [ $i, $token, $count ];
11879 # remember if this line ends in an open container
11880 $find_container_end->();
11885 #----------------------------------
11886 # loop over all lines of the source
11887 #----------------------------------
11890 foreach my $line_of_tokens ( @{$rlines} ) {
11894 if ( $Opt_repeat_count > 0
11895 && $number_of_groups_seen >= $Opt_repeat_count );
11900 $line_type = $line_of_tokens->{_line_type};
11902 # always end a group at non-CODE
11903 if ( $line_type ne 'CODE' ) { $end_group->(); next }
11905 $CODE_type = $line_of_tokens->{_code_type};
11907 # end any group at a format skipping line
11908 if ( $CODE_type && $CODE_type eq 'FS' ) {
11913 # continue in a verbatim (VB) type; it may be quoted text
11914 if ( $CODE_type eq 'VB' ) {
11915 if ( $ibeg >= 0 ) { $iend = $i; }
11919 # and continue in blank (BL) types
11920 if ( $CODE_type eq 'BL' ) {
11921 if ( $ibeg >= 0 ) {
11923 push @{iblanks}, $i;
11925 # propagate current subgroup token
11926 my $tok = $group[-1]->[1];
11927 push @group, [ $i, $tok, $count ];
11932 # examine the first token of this line
11933 my $rK_range = $line_of_tokens->{_rK_range};
11934 ( $K_first, $K_last ) = @{$rK_range};
11935 if ( !defined($K_first) ) {
11937 # Somewhat unexpected blank line..
11938 # $rK_range is normally defined for line type CODE, but this can
11939 # happen for example if the input line was a single semicolon which
11940 # is being deleted. In that case there was code in the input
11941 # file but it is not being retained. So we can silently return.
11942 return $rhash_of_desires;
11945 my $level = $rLL->[$K_first]->[_LEVEL_];
11946 my $type = $rLL->[$K_first]->[_TYPE_];
11947 my $token = $rLL->[$K_first]->[_TOKEN_];
11948 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
11950 # End a group 'badly' at an unexpected level. This will prevent
11951 # blank lines being incorrectly placed after the end of the group.
11952 # We are looking for any deviation from two acceptable patterns:
11953 # PATTERN 1: a simple list; secondary lines are at level+1
11954 # PATTERN 2: a long statement; all secondary lines same level
11955 # This was added as a fix for case b1177, in which a complex structure
11956 # got incorrectly inserted blank lines.
11957 if ( $ibeg >= 0 ) {
11959 # Check for deviation from PATTERN 1, simple list:
11960 if ( defined($K_closing) && $K_first < $K_closing ) {
11961 $end_group->(1) if ( $level != $level_beg + 1 );
11964 # Check for deviation from PATTERN 2, single statement:
11965 elsif ( $level != $level_beg ) { $end_group->(1) }
11968 # Do not look for keywords in lists ( keyword 'my' can occur in lists,
11969 # see case b760); fixed for c048.
11970 if ( $self->is_list_by_K($K_first) ) {
11971 if ( $ibeg >= 0 ) { $iend = $i }
11975 # see if this is a code type we seek (i.e. comment)
11977 && $Opt_comment_pattern
11978 && $CODE_type =~ /$Opt_comment_pattern/ )
11981 my $tok = $CODE_type;
11983 # Continuing a group
11984 if ( $ibeg >= 0 && $level == $level_beg ) {
11985 $add_to_group->( $i, $tok, $level );
11991 # first end old group if any; we might be starting new
11992 # keywords at different level
11993 if ( $ibeg >= 0 ) { $end_group->(); }
11994 $add_to_group->( $i, $tok, $level );
11999 # See if it is a keyword we seek, but never start a group in a
12000 # continuation line; the code may be badly formatted.
12001 if ( $ci_level == 0
12003 && $token =~ /$Opt_pattern/ )
12006 # Continuing a keyword group
12007 if ( $ibeg >= 0 && $level == $level_beg ) {
12008 $add_to_group->( $i, $token, $level );
12011 # Start new keyword group
12014 # first end old group if any; we might be starting new
12015 # keywords at different level
12016 if ( $ibeg >= 0 ) { $end_group->(); }
12017 $add_to_group->( $i, $token, $level );
12022 # This is not one of our keywords, but we are in a keyword group
12023 # so see if we should continue or quit
12024 elsif ( $ibeg >= 0 ) {
12026 # - bail out on a large level change; we may have walked into a
12027 # data structure or anoymous sub code.
12028 if ( $level > $level_beg + 1 || $level < $level_beg ) {
12033 # - keep going on a continuation line of the same level, since
12034 # it is probably a continuation of our previous keyword,
12035 # - and keep going past hanging side comments because we never
12036 # want to interrupt them.
12037 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
12038 || $CODE_type eq 'HSC' )
12044 # - continue if if we are within in a container which started with
12045 # the line of the previous keyword.
12046 if ( defined($K_closing) && $K_first <= $K_closing ) {
12048 # continue if entire line is within container
12049 if ( $K_last <= $K_closing ) { $iend = $i; next }
12051 # continue at ); or }; or ];
12052 my $KK = $K_closing + 1;
12053 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
12054 if ( $KK < $K_last ) {
12055 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
12056 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
12069 # - end the group if none of the above
12074 # not in a keyword group; continue
12078 # end of loop over all lines
12080 return $rhash_of_desires;
12082 } ## end sub keyword_group_scan
12084 #######################################
12085 # CODE SECTION 7: Process lines of code
12086 #######################################
12088 { ## begin closure process_line_of_CODE
12090 # The routines in this closure receive lines of code and combine them into
12091 # 'batches' and send them along. A 'batch' is the unit of code which can be
12092 # processed further as a unit. It has the property that it is the largest
12093 # amount of code into which which perltidy is free to place one or more
12094 # line breaks within it without violating any constraints.
12096 # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
12098 # flags needed by the store routine
12099 my $line_of_tokens;
12100 my $no_internal_newlines;
12103 # range of K of tokens for the current line
12104 my ( $K_first, $K_last );
12106 my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
12107 $rblock_type_of_seqno, $ri_starting_one_line_block );
12109 # past stored nonblank tokens and flags
12111 $K_last_nonblank_code, $K_last_last_nonblank_code,
12112 $looking_for_else, $is_static_block_comment,
12113 $batch_CODE_type, $last_line_had_side_comment,
12114 $next_parent_seqno, $next_slevel,
12117 # Called once at the start of a new file
12118 sub initialize_process_line_of_CODE {
12119 $K_last_nonblank_code = undef;
12120 $K_last_last_nonblank_code = undef;
12121 $looking_for_else = 0;
12122 $is_static_block_comment = 0;
12123 $batch_CODE_type = "";
12124 $last_line_had_side_comment = 0;
12125 $next_parent_seqno = SEQ_ROOT;
12126 $next_slevel = undef;
12130 # Batch variables: these describe the current batch of code being formed
12131 # and sent down the pipeline. They are initialized in the next
12133 my ( $rbrace_follower, $index_start_one_line_block,
12134 $semicolons_before_block_self_destruct,
12135 $starting_in_quote, $ending_in_quote, );
12137 # Called before the start of each new batch
12138 sub initialize_batch_variables {
12140 $max_index_to_go = UNDEFINED_INDEX;
12141 @summed_lengths_to_go = @nesting_depth_to_go = (0);
12142 $ri_starting_one_line_block = [];
12144 # The initialization code for the remaining batch arrays is as follows
12145 # and can be activated for testing. But profiling shows that it is
12146 # time-consuming to re-initialize the batch arrays and is not necessary
12147 # because the maximum valid token, $max_index_to_go, is carefully
12148 # controlled. This means however that it is not possible to do any
12149 # type of filter or map operation directly on these arrays. And it is
12150 # not possible to use negative indexes. As a precaution against program
12151 # changes which might do this, sub pad_array_to_go adds some undefs at
12152 # the end of the current batch of data.
12154 # So 'long story short': this is a waste of time
12156 @block_type_to_go = ();
12157 @type_sequence_to_go = ();
12158 @bond_strength_to_go = ();
12159 @forced_breakpoint_to_go = ();
12160 @token_lengths_to_go = ();
12161 @levels_to_go = ();
12162 @mate_index_to_go = ();
12163 @ci_levels_to_go = ();
12164 @nobreak_to_go = ();
12165 @old_breakpoint_to_go = ();
12166 @tokens_to_go = ();
12169 @leading_spaces_to_go = ();
12170 @reduced_spaces_to_go = ();
12173 @parent_seqno_to_go = ();
12176 $rbrace_follower = undef;
12177 $ending_in_quote = 0;
12178 destroy_one_line_block();
12182 sub leading_spaces_to_go {
12184 # return the number of indentation spaces for a token in the output
12188 return 0 if ( $ii < 0 );
12189 my $indentation = $leading_spaces_to_go[$ii];
12190 return ref($indentation) ? $indentation->get_spaces() : $indentation;
12193 sub create_one_line_block {
12194 ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
12199 sub destroy_one_line_block {
12200 $index_start_one_line_block = UNDEFINED_INDEX;
12201 $semicolons_before_block_self_destruct = 0;
12205 # Routine to place the current token into the output stream.
12206 # Called once per output token.
12208 use constant DEBUG_STORE => 0;
12210 sub store_token_to_go {
12212 my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
12214 # Add one token to the next batch.
12215 # $Ktoken_vars = the index K in the global token array
12216 # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
12217 # unless they are temporarily being overridden
12219 my $type = $rtoken_vars->[_TYPE_];
12221 # Check for emergency flush...
12222 # The K indexes in the batch must always be a continuous sequence of
12223 # the global token array. The batch process programming assumes this.
12224 # If storing this token would cause this relation to fail we must dump
12225 # the current batch before storing the new token. It is extremely rare
12226 # for this to happen. One known example is the following two-line
12227 # snippet when run with parameters
12228 # --noadd-newlines --space-terminal-semicolon:
12229 # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
12231 if ( $max_index_to_go >= 0 ) {
12232 my $Klast = $K_to_go[$max_index_to_go];
12233 if ( $Ktoken_vars != $Klast + 1 ) {
12234 $self->flush_batch_of_CODE();
12237 # Do not output consecutive blank tokens ... this should not
12238 # happen, but it is worth checking. Later code can then make the
12239 # simplifying assumption that blank tokens are not consecutive.
12240 elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
12244 # if this happens, it is may be that consecutive blanks
12245 # were inserted into the token stream in 'respace_tokens'
12246 my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
12247 Fault("consecutive blanks near line $lno; please fix");
12253 # Do not start a batch with a blank token.
12254 # Fixes cases b149 b888 b984 b985 b986 b987
12256 if ( $type eq 'b' ) { return }
12259 ++$max_index_to_go;
12260 $batch_CODE_type = $CODE_type;
12261 $K_to_go[$max_index_to_go] = $Ktoken_vars;
12262 $types_to_go[$max_index_to_go] = $type;
12264 $old_breakpoint_to_go[$max_index_to_go] = 0;
12265 $forced_breakpoint_to_go[$max_index_to_go] = 0;
12266 $mate_index_to_go[$max_index_to_go] = -1;
12268 my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
12269 my $ci_level = $ci_levels_to_go[$max_index_to_go] =
12270 $rtoken_vars->[_CI_LEVEL_];
12272 # Clip levels to zero if there are level errors in the file.
12273 # We had to wait until now for reasons explained in sub 'write_line'.
12274 my $level = $rtoken_vars->[_LEVEL_];
12275 if ( $level < 0 ) { $level = 0 }
12276 $levels_to_go[$max_index_to_go] = $level;
12278 my $seqno = $type_sequence_to_go[$max_index_to_go] =
12279 $rtoken_vars->[_TYPE_SEQUENCE_];
12281 if ( $max_index_to_go == 0 ) {
12283 # Update the next parent sequence number for each new batch.
12285 #------------------------------------------
12286 # Begin coding from sub parent_seqno_from_K
12287 #------------------------------------------
12289 ## $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
12290 $next_parent_seqno = SEQ_ROOT;
12292 $next_parent_seqno = $rparent_of_seqno->{$seqno};
12295 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
12296 if ( defined($Kt) ) {
12297 my $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
12298 my $type = $rLL->[$Kt]->[_TYPE_];
12300 # if next container token is closing, it is the parent seqno
12301 if ( $is_closing_type{$type} ) {
12302 $next_parent_seqno = $type_sequence;
12305 # otherwise we want its parent container
12307 $next_parent_seqno =
12308 $rparent_of_seqno->{$type_sequence};
12312 $next_parent_seqno = SEQ_ROOT
12313 unless ( defined($next_parent_seqno) );
12315 #----------------------------------------
12316 # End coding from sub parent_seqno_from_K
12317 #----------------------------------------
12319 $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
12322 # Initialize some sequence-dependent variables to their normal values
12323 my $parent_seqno = $next_parent_seqno;
12324 my $slevel = $next_slevel;
12325 my $block_type = "";
12327 # Then fix them at container tokens:
12329 if ( $is_opening_token{$token} ) {
12330 $next_parent_seqno = $seqno;
12331 $slevel = $rdepth_of_opening_seqno->[$seqno];
12332 $next_slevel = $slevel + 1;
12333 $block_type = $rblock_type_of_seqno->{$seqno};
12335 elsif ( $is_closing_token{$token} ) {
12336 $next_slevel = $rdepth_of_opening_seqno->[$seqno];
12337 $slevel = $next_slevel + 1;
12338 $block_type = $rblock_type_of_seqno->{$seqno};
12339 $parent_seqno = $rparent_of_seqno->{$seqno};
12340 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
12341 $next_parent_seqno = $parent_seqno;
12344 # ternary token: nothing to do
12346 $block_type = "" unless ( defined($block_type) );
12349 $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
12350 $nesting_depth_to_go[$max_index_to_go] = $slevel;
12351 $block_type_to_go[$max_index_to_go] = $block_type;
12352 $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
12354 my $length = $rtoken_vars->[_TOKEN_LENGTH_];
12356 # Safety check that length is defined. Should not be needed now.
12357 # Former patch for indent-only, in which the entire set of tokens is
12358 # turned into type 'q'. Lengths may have not been defined because sub
12359 # 'respace_tokens' is bypassed. We do not need lengths in this case,
12360 # but we will use the character count to have a defined value. In the
12361 # future, it would be nicer to have 'respace_tokens' convert the lines
12362 # to quotes and get correct lengths.
12363 if ( !defined($length) ) { $length = length($token) }
12365 $token_lengths_to_go[$max_index_to_go] = $length;
12367 # We keep a running sum of token lengths from the start of this batch:
12368 # summed_lengths_to_go[$i] = total length to just before token $i
12369 # summed_lengths_to_go[$i+1] = total length to just after token $i
12370 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
12371 $summed_lengths_to_go[$max_index_to_go] + $length;
12373 my $in_continued_quote =
12374 ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
12375 if ( $max_index_to_go == 0 ) {
12376 $starting_in_quote = $in_continued_quote;
12379 # Define the indentation that this token will have in two cases:
12380 # Without CI = reduced_spaces_to_go
12381 # With CI = leading_spaces_to_go
12382 if ($in_continued_quote) {
12383 $leading_spaces_to_go[$max_index_to_go] = 0;
12384 $reduced_spaces_to_go[$max_index_to_go] = 0;
12387 $reduced_spaces_to_go[$max_index_to_go] = my $reduced_spaces =
12388 $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
12389 $leading_spaces_to_go[$max_index_to_go] =
12390 $reduced_spaces + $rOpts_continuation_indentation * $ci_level;
12392 $standard_spaces_to_go[$max_index_to_go] =
12393 $leading_spaces_to_go[$max_index_to_go];
12395 DEBUG_STORE && do {
12396 my ( $a, $b, $c ) = caller();
12398 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
12403 sub flush_batch_of_CODE {
12405 # Finish any batch packaging and call the process routine.
12406 # This must be the only call to grind_batch_of_CODE()
12409 return unless ( $max_index_to_go >= 0 );
12411 # Create an array to hold variables for this batch
12412 my $this_batch = [];
12413 $this_batch->[_starting_in_quote_] = $starting_in_quote;
12414 $this_batch->[_ending_in_quote_] = $ending_in_quote;
12415 $this_batch->[_max_index_to_go_] = $max_index_to_go;
12416 $this_batch->[_batch_CODE_type_] = $batch_CODE_type;
12418 # The flag $is_static_block_comment applies to the line which just
12419 # arrived. So it only applies if we are outputting that line.
12420 $this_batch->[_is_static_block_comment_] =
12422 && $max_index_to_go == 0
12423 && $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
12425 $this_batch->[_ri_starting_one_line_block_] =
12426 $ri_starting_one_line_block;
12428 $self->[_this_batch_] = $this_batch;
12430 $last_line_had_side_comment =
12431 $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
12433 $self->grind_batch_of_CODE();
12435 # Done .. this batch is history
12436 $self->[_this_batch_] = [];
12438 initialize_batch_variables();
12439 initialize_forced_breakpoint_vars();
12446 # end the current batch, EXCEPT for a few special cases
12449 if ( $max_index_to_go < 0 ) {
12451 # This is harmless but should be elimintated in development
12453 Fault("End batch called with nothing to do; please fix\n");
12458 # Exceptions when a line does not end with a comment... (fixes c058)
12459 if ( $types_to_go[$max_index_to_go] ne '#' ) {
12461 # Exception 1: Do not end line in a weld
12463 if ( $total_weld_count
12464 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
12466 # Exception 2: just set a tentative breakpoint if we might be in a
12468 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12469 $self->set_forced_breakpoint($max_index_to_go);
12474 $self->flush_batch_of_CODE();
12478 sub flush_vertical_aligner {
12480 my $vao = $self->[_vertical_aligner_object_];
12485 # flush is called to output any tokens in the pipeline, so that
12486 # an alternate source of lines can be written in the correct order
12488 my ( $self, $CODE_type ) = @_;
12490 # end the current batch with 1 exception
12492 destroy_one_line_block();
12494 # Exception: if we are flushing within the code stream only to insert
12495 # blank line(s), then we can keep the batch intact at a weld. This
12496 # improves formatting of -ce. See test 'ce1.ce'
12497 if ( $CODE_type && $CODE_type eq 'BL' ) {
12498 $self->end_batch() if ( $max_index_to_go >= 0 );
12501 # otherwise, we have to shut things down completely.
12502 else { $self->flush_batch_of_CODE() }
12504 $self->flush_vertical_aligner();
12508 sub process_line_of_CODE {
12510 my ( $self, $my_line_of_tokens ) = @_;
12512 #----------------------------------------------------------------
12513 # This routine is called once per INPUT line to format all of the
12514 # tokens on that line.
12515 #----------------------------------------------------------------
12517 # It outputs full-line comments and blank lines immediately.
12519 # The tokens are copied one-by-one from the global token array $rLL to
12520 # a set of '_to_go' arrays which collect batches of tokens for a
12521 # further processing via calls to 'sub store_token_to_go', until a well
12522 # defined 'structural' break point* or 'forced' breakpoint* is reached.
12523 # Then, the batch of collected '_to_go' tokens is passed along to 'sub
12524 # grind_batch_of_CODE' for further processing.
12526 # * 'structural' break points are basically line breaks corresponding
12527 # to code blocks. An example is a chain of if-elsif-else statements,
12528 # which should typically be broken at the opening and closing braces.
12530 # * 'forced' break points are breaks required by side comments or by
12531 # special user controls.
12533 # So this routine is just making an initial set of required line
12534 # breaks, basically regardless of the maximum requested line length.
12535 # The subsequent stage of formating make additional line breaks
12536 # appropriate for lists and logical structures, and to keep line
12537 # lengths below the requested maximum line length.
12539 #-----------------------------------
12540 # begin initialize closure variables
12541 #-----------------------------------
12542 $line_of_tokens = $my_line_of_tokens;
12543 $CODE_type = $line_of_tokens->{_code_type};
12544 my $rK_range = $line_of_tokens->{_rK_range};
12545 ( $K_first, $K_last ) = @{$rK_range};
12546 if ( !defined($K_first) ) {
12548 # Empty line: This can happen if tokens are deleted, for example
12549 # with the -mangle parameter
12552 $rLL = $self->[_rLL_];
12553 $radjusted_levels = $self->[_radjusted_levels_];
12554 $rparent_of_seqno = $self->[_rparent_of_seqno_];
12555 $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
12556 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12558 #---------------------------------
12559 # end initialize closure variables
12560 #---------------------------------
12562 # This flag will become nobreak_to_go and should be set to 2 to prevent
12563 # a line break AFTER the current token.
12564 $no_internal_newlines = 0;
12565 if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
12566 $no_internal_newlines = 2;
12569 my $input_line = $line_of_tokens->{_line_text};
12572 ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
12573 my $is_static_block_comment_without_leading_space =
12574 $CODE_type eq 'SBCX';
12575 $is_static_block_comment =
12576 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
12577 my $is_hanging_side_comment = $CODE_type eq 'HSC';
12578 my $is_VERSION_statement = $CODE_type eq 'VER';
12580 if ($is_VERSION_statement) {
12581 $self->[_saw_VERSION_in_this_file_] = 1;
12582 $no_internal_newlines = 2;
12585 # Add interline blank if any
12586 my $last_old_nonblank_type = "b";
12587 my $first_new_nonblank_token = "";
12588 my $K_first_true = $K_first;
12589 if ( $max_index_to_go >= 0 ) {
12590 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
12591 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
12593 && $types_to_go[$max_index_to_go] ne 'b'
12595 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
12601 my $rtok_first = $rLL->[$K_first];
12603 my $in_quote = $line_of_tokens->{_ending_in_quote};
12604 $ending_in_quote = $in_quote;
12606 #------------------------------------
12607 # Handle a block (full-line) comment.
12608 #------------------------------------
12611 if ( $rOpts->{'delete-block-comments'} ) {
12616 destroy_one_line_block();
12617 $self->end_batch() if ( $max_index_to_go >= 0 );
12619 # output a blank line before block comments
12621 # unless we follow a blank or comment line
12622 $self->[_last_line_leading_type_] ne '#'
12623 && $self->[_last_line_leading_type_] ne 'b'
12626 && $rOpts->{'blanks-before-comments'}
12628 # if this is NOT an empty comment, unless it follows a side
12629 # comment and could become a hanging side comment.
12631 $rtok_first->[_TOKEN_] ne '#'
12632 || ( $last_line_had_side_comment
12633 && $rLL->[$K_first]->[_LEVEL_] > 0 )
12636 # not after a short line ending in an opening token
12637 # because we already have space above this comment.
12638 # Note that the first comment in this if block, after
12639 # the 'if (', does not get a blank line because of this.
12640 && !$self->[_last_output_short_opening_token_]
12642 # never before static block comments
12643 && !$is_static_block_comment
12646 $self->flush(); # switching to new output stream
12647 my $file_writer_object = $self->[_file_writer_object_];
12648 $file_writer_object->write_blank_code_line();
12649 $self->[_last_line_leading_type_] = 'b';
12653 $rOpts->{'indent-block-comments'}
12654 && ( !$rOpts->{'indent-spaced-block-comments'}
12655 || $input_line =~ /^\s+/ )
12656 && !$is_static_block_comment_without_leading_space
12659 my $Ktoken_vars = $K_first;
12660 my $rtoken_vars = $rLL->[$Ktoken_vars];
12661 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12662 $self->end_batch();
12666 # switching to new output stream
12669 # Note that last arg in call here is 'undef' for comments
12670 my $file_writer_object = $self->[_file_writer_object_];
12671 $file_writer_object->write_code_line(
12672 $rtok_first->[_TOKEN_] . "\n", undef );
12673 $self->[_last_line_leading_type_] = '#';
12678 # compare input/output indentation except for continuation lines
12679 # (because they have an unknown amount of initial blank space)
12680 # and lines which are quotes (because they may have been outdented)
12681 my $guessed_indentation_level =
12682 $line_of_tokens->{_guessed_indentation_level};
12683 unless ( $is_hanging_side_comment
12684 || $rtok_first->[_CI_LEVEL_] > 0
12685 || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' )
12687 my $input_line_number = $line_of_tokens->{_line_number};
12688 $self->compare_indentation_levels( $K_first,
12689 $guessed_indentation_level, $input_line_number );
12692 #------------------------
12693 # Handle indentation-only
12694 #------------------------
12696 # NOTE: In previous versions we sent all qw lines out immediately here.
12697 # No longer doing this: also write a line which is entirely a 'qw' list
12698 # to allow stacking of opening and closing tokens. Note that interior
12699 # qw lines will still go out at the end of this routine.
12700 if ( $CODE_type eq 'IO' ) {
12702 my $line = $input_line;
12704 # Fix for rt #125506 Unexpected string formating
12705 # in which leading space of a terminal quote was removed
12707 $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
12709 my $Ktoken_vars = $K_first;
12711 # We work with a copy of the token variables and change the
12712 # first token to be the entire line as a quote variable
12713 my $rtoken_vars = $rLL->[$Ktoken_vars];
12714 $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
12716 # Patch: length is not really important here
12717 $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
12719 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12720 $self->end_batch();
12724 #---------------------------
12725 # Handle all other lines ...
12726 #---------------------------
12728 # If we just saw the end of an elsif block, write nag message
12729 # if we do not see another elseif or an else.
12730 if ($looking_for_else) {
12732 unless ( $rLL->[$K_first]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
12733 write_logfile_entry("(No else block)\n");
12735 $looking_for_else = 0;
12738 # This is a good place to kill incomplete one-line blocks
12739 if ( $max_index_to_go >= 0 ) {
12742 ( $semicolons_before_block_self_destruct == 0 )
12743 && ( $last_old_nonblank_type eq ';' )
12744 && ( $first_new_nonblank_token ne '}' )
12747 # Patch for RT #98902. Honor request to break at old commas.
12748 || ( $rOpts_break_at_old_comma_breakpoints
12749 && $last_old_nonblank_type eq ',' )
12752 $forced_breakpoint_to_go[$max_index_to_go] = 1
12753 if ($rOpts_break_at_old_comma_breakpoints);
12754 destroy_one_line_block();
12755 $self->end_batch();
12758 # Keep any requested breaks before this line. Note that we have to
12759 # use the original K_first because it may have been reduced above
12760 # to add a blank. The value of the flag is as follows:
12761 # 1 => hard break, flush the batch
12762 # 2 => soft break, set breakpoint and continue building the batch
12763 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
12764 destroy_one_line_block();
12765 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
12766 $self->set_forced_breakpoint($max_index_to_go);
12769 $self->end_batch() if ( $max_index_to_go >= 0 );
12774 #--------------------------------------
12775 # loop to process the tokens one-by-one
12776 #--------------------------------------
12778 # We do not want a leading blank if the previous batch just got output
12779 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
12783 foreach my $Ktoken_vars ( $K_first .. $K_last ) {
12785 my $rtoken_vars = $rLL->[$Ktoken_vars];
12786 my $type = $rtoken_vars->[_TYPE_];
12788 # If we are continuing after seeing a right curly brace, flush
12789 # buffer unless we see what we are looking for, as in
12791 if ( $rbrace_follower && $type ne 'b' ) {
12792 my $token = $rtoken_vars->[_TOKEN_];
12793 unless ( $rbrace_follower->{$token} ) {
12794 $self->end_batch() if ( $max_index_to_go >= 0 );
12796 $rbrace_follower = undef;
12800 $block_type, $type_sequence,
12801 $is_opening_BLOCK, $is_closing_BLOCK,
12802 $nobreak_BEFORE_BLOCK
12804 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
12806 my $token = $rtoken_vars->[_TOKEN_];
12807 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
12808 $block_type = $rblock_type_of_seqno->{$type_sequence};
12812 && $block_type ne 't'
12813 && !$self->[_rshort_nested_]->{$type_sequence} )
12816 if ( $type eq '{' ) {
12817 $is_opening_BLOCK = 1;
12818 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
12820 elsif ( $type eq '}' ) {
12821 $is_closing_BLOCK = 1;
12822 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
12827 # Find next nonblank token on this line and look for a side comment
12828 my ( $Knnb, $side_comment_follows );
12830 # if before last token ...
12831 if ( $Ktoken_vars < $K_last ) {
12832 $Knnb = $Ktoken_vars + 1;
12833 if ( $Knnb < $K_last
12834 && $rLL->[$Knnb]->[_TYPE_] eq 'b' )
12839 if ( $rLL->[$Knnb]->[_TYPE_] eq '#' ) {
12840 $side_comment_follows = 1;
12842 # Do not allow breaks which would promote a side comment to
12844 $no_internal_newlines = 2;
12848 # if at last token ...
12851 #---------------------
12852 # handle side comments
12853 #---------------------
12854 if ( $type eq '#' ) {
12855 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12863 if ( $type eq 'b' ) {
12864 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12868 # Process non-blank and non-comment tokens ...
12873 if ( $type eq ';' ) {
12875 my $next_nonblank_token_type = 'b';
12876 my $next_nonblank_token = '';
12877 if ( defined($Knnb) ) {
12878 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
12879 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
12882 my $break_before_semicolon = ( $Ktoken_vars == $K_first )
12883 && $rOpts_break_at_old_semicolon_breakpoints;
12885 # kill one-line blocks with too many semicolons
12886 $semicolons_before_block_self_destruct--;
12888 $break_before_semicolon
12889 || ( $semicolons_before_block_self_destruct < 0 )
12890 || ( $semicolons_before_block_self_destruct == 0
12891 && $next_nonblank_token_type !~ /^[b\}]$/ )
12894 destroy_one_line_block();
12896 if ( $break_before_semicolon
12897 && $max_index_to_go >= 0 );
12900 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12904 $no_internal_newlines
12905 || ( $rOpts_keep_interior_semicolons
12906 && $Ktoken_vars < $K_last )
12907 || ( $next_nonblank_token eq '}' )
12915 elsif ($is_opening_BLOCK) {
12917 # Tentatively output this token. This is required before
12918 # calling starting_one_line_block. We may have to unstore
12919 # it, though, if we have to break before it.
12920 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12922 # Look ahead to see if we might form a one-line block..
12924 $self->starting_one_line_block( $Ktoken_vars,
12925 $K_last_nonblank_code, $K_last );
12926 $self->clear_breakpoint_undo_stack();
12928 # to simplify the logic below, set a flag to indicate if
12929 # this opening brace is far from the keyword which introduces it
12930 my $keyword_on_same_line = 1;
12932 $max_index_to_go >= 0
12933 && defined($K_last_nonblank_code)
12934 && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
12935 && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
12939 $keyword_on_same_line = 0;
12942 # Break before '{' if requested with -bl or -bli flag
12943 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
12945 # But do not break if this token is welded to the left
12946 if ( $total_weld_count
12947 && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
12952 # Break BEFORE an opening '{' ...
12958 # and we were unable to start looking for a block,
12959 && $index_start_one_line_block == UNDEFINED_INDEX
12961 # or if it will not be on same line as its keyword, so that
12962 # it will be outdented (eval.t, overload.t), and the user
12963 # has not insisted on keeping it on the right
12964 || ( !$keyword_on_same_line
12965 && !$rOpts_opening_brace_always_on_right )
12969 # but only if allowed
12970 unless ($nobreak_BEFORE_BLOCK) {
12972 # since we already stored this token, we must unstore it
12973 $self->unstore_token_to_go();
12975 # then output the line
12976 $self->end_batch() if ( $max_index_to_go >= 0 );
12978 # and now store this token at the start of a new line
12979 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12983 # now output this line
12985 if ( $max_index_to_go >= 0 && !$no_internal_newlines );
12991 elsif ($is_closing_BLOCK) {
12993 my $next_nonblank_token_type = 'b';
12994 my $next_nonblank_token = '';
12995 if ( defined($Knnb) ) {
12996 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
12997 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
13000 # If there is a pending one-line block ..
13001 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13003 # Fix for b1208: if a side comment follows this closing
13004 # brace then we must include its length in the length test
13005 # ... unless the -issl flag is set (fixes b1307-1309).
13006 # Assume a minimum of 1 blank space to the comment.
13008 $side_comment_follows
13009 && !$rOpts_ignore_side_comment_lengths
13010 ? 1 + $rLL->[$Knnb]->[_TOKEN_LENGTH_]
13013 # we have to terminate it if..
13016 # it is too long (final length may be different from
13017 # initial estimate). note: must allow 1 space for this
13019 $self->excess_line_length( $index_start_one_line_block,
13020 $max_index_to_go ) + $added_length >= 0
13022 # or if it has too many semicolons
13023 || ( $semicolons_before_block_self_destruct == 0
13024 && defined($K_last_nonblank_code)
13025 && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' )
13028 destroy_one_line_block();
13032 # put a break before this closing curly brace if appropriate
13034 if ( $max_index_to_go >= 0
13035 && !$nobreak_BEFORE_BLOCK
13036 && $index_start_one_line_block == UNDEFINED_INDEX );
13038 # store the closing curly brace
13039 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13041 # ok, we just stored a closing curly brace. Often, but
13042 # not always, we want to end the line immediately.
13043 # So now we have to check for special cases.
13045 # if this '}' successfully ends a one-line block..
13046 my $is_one_line_block = 0;
13047 my $keep_going = 0;
13048 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13050 # Remember the type of token just before the
13051 # opening brace. It would be more general to use
13052 # a stack, but this will work for one-line blocks.
13053 $is_one_line_block =
13054 $types_to_go[$index_start_one_line_block];
13056 # we have to actually make it by removing tentative
13057 # breaks that were set within it
13058 $self->undo_forced_breakpoint_stack(0);
13060 # For -lp, extend the nobreak to include a trailing
13061 # terminal ','. This is because the -lp indentation was
13062 # not known when making one-line blocks, so we may be able
13063 # to move the line back to fit. Otherwise we may create a
13064 # needlessly stranded comma on the next line.
13065 my $iend_nobreak = $max_index_to_go - 1;
13066 if ( $rOpts_line_up_parentheses
13067 && $next_nonblank_token_type eq ','
13068 && $Knnb eq $K_last )
13070 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13072 $self->[_ris_excluded_lp_container_]->{$p_seqno};
13073 $iend_nobreak = $max_index_to_go if ( !$is_excluded );
13076 $self->set_nobreaks( $index_start_one_line_block,
13079 # save starting block indexes so that sub correct_lp can
13080 # check and adjust -lp indentation (c098)
13081 push @{$ri_starting_one_line_block},
13082 $index_start_one_line_block;
13084 # then re-initialize for the next one-line block
13085 destroy_one_line_block();
13087 # then decide if we want to break after the '}' ..
13088 # We will keep going to allow certain brace followers as in:
13089 # do { $ifclosed = 1; last } unless $losing;
13091 # But make a line break if the curly ends a
13092 # significant block:
13095 $is_block_without_semicolon{$block_type}
13097 # Follow users break point for
13098 # one line block types U & G, such as a 'try' block
13099 || $is_one_line_block =~ /^[UG]$/
13100 && $Ktoken_vars == $K_last
13103 # if needless semicolon follows we handle it later
13104 && $next_nonblank_token ne ';'
13108 unless ($no_internal_newlines);
13112 # set string indicating what we need to look for brace follower
13114 if ( $block_type eq 'do' ) {
13115 $rbrace_follower = \%is_do_follower;
13117 $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
13120 $rbrace_follower = { ')' => 1 };
13123 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
13124 $rbrace_follower = \%is_if_brace_follower;
13126 elsif ( $block_type eq 'else' ) {
13127 $rbrace_follower = \%is_else_brace_follower;
13130 # added eval for borris.t
13131 elsif ($is_sort_map_grep_eval{$block_type}
13132 || $is_one_line_block eq 'G' )
13134 $rbrace_follower = undef;
13139 elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
13140 if ($is_one_line_block) {
13142 $rbrace_follower = \%is_anon_sub_1_brace_follower;
13144 # Exceptions to help keep -lp intact, see git #74 ...
13145 # Exception 1: followed by '}' on this line
13146 if ( $Ktoken_vars < $K_last
13147 && $next_nonblank_token eq '}' )
13149 $rbrace_follower = undef;
13153 # Exception 2: followed by '}' on next line if -lp set.
13154 # The -lp requirement allows the formatting to follow
13155 # old breaks when -lp is not used, minimizing changes.
13156 # Fixes issue c087.
13157 elsif ($Ktoken_vars == $K_last
13158 && $rOpts_line_up_parentheses )
13160 my $K_closing_container =
13161 $self->[_K_closing_container_];
13162 my $K_opening_container =
13163 $self->[_K_opening_container_];
13164 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13165 my $Kc = $K_closing_container->{$p_seqno};
13167 $self->[_ris_excluded_lp_container_]->{$p_seqno};
13169 && $rLL->[$Kc]->[_TOKEN_] eq '}'
13171 && $Kc - $Ktoken_vars <= 2 )
13173 $rbrace_follower = undef;
13179 $rbrace_follower = \%is_anon_sub_brace_follower;
13183 # None of the above: specify what can follow a closing
13184 # brace of a block which is not an
13185 # if/elsif/else/do/sort/map/grep/eval
13187 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
13189 $rbrace_follower = \%is_other_brace_follower;
13192 # See if an elsif block is followed by another elsif or else;
13194 if ( $block_type eq 'elsif' ) {
13196 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
13197 $looking_for_else = 1; # ok, check on next line
13201 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
13202 write_logfile_entry("No else block :(\n");
13207 # keep going after certain block types (map,sort,grep,eval)
13208 # added eval for borris.t
13214 # if no more tokens, postpone decision until re-entring
13215 elsif ( ( $next_nonblank_token_type eq 'b' )
13216 && $rOpts_add_newlines )
13218 unless ($rbrace_follower) {
13220 unless ($no_internal_newlines);
13224 elsif ($rbrace_follower) {
13226 unless ( $rbrace_follower->{$next_nonblank_token} ) {
13228 unless ( $no_internal_newlines
13229 || $max_index_to_go < 0 );
13231 $rbrace_follower = undef;
13236 unless ( $no_internal_newlines
13237 || $max_index_to_go < 0 );
13240 } ## end treatment of closing block token
13242 #------------------------------
13243 # handle here_doc target string
13244 #------------------------------
13245 elsif ( $type eq 'h' ) {
13247 # no newlines after seeing here-target
13248 $no_internal_newlines = 2;
13249 ## destroy_one_line_block(); # deleted to fix case b529
13250 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13253 #-----------------------------
13254 # handle all other token types
13255 #-----------------------------
13258 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13260 # break after a label if requested
13261 if ( $type eq 'J' && $rOpts_break_after_labels == 1 ) {
13263 unless ($no_internal_newlines);
13267 # remember two previous nonblank, non-comment OUTPUT tokens
13268 $K_last_last_nonblank_code = $K_last_nonblank_code;
13269 $K_last_nonblank_code = $Ktoken_vars;
13271 } ## end of loop over all tokens in this line
13273 my $type = $rLL->[$K_last]->[_TYPE_];
13274 my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
13276 # we have to flush ..
13279 # if there is a side comment...
13282 # if this line ends in a quote
13283 # NOTE: This is critically important for insuring that quoted lines
13284 # do not get processed by things like -sot and -sct
13287 # if this is a VERSION statement
13288 || $is_VERSION_statement
13290 # to keep a label at the end of a line
13291 || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
13293 # if we have a hard break request
13294 || $break_flag && $break_flag != 2
13296 # if we are instructed to keep all old line breaks
13297 || !$rOpts->{'delete-old-newlines'}
13299 # if this is a line of the form 'use overload'. A break here
13300 # in the input file is a good break because it will allow
13301 # the operators which follow to be formatted well. Without
13302 # this break the formatting with -ci=4 -xci is poor, for example.
13306 # print length $_[2], "\n";
13307 # my ( $x, $y ) = _order(@_);
13308 # Number::Roman->new( int $x + $y );
13311 # my ( $x, $y ) = _order(@_);
13312 # Number::Roman->new( int $x - $y );
13314 || ( $max_index_to_go == 2
13315 && $types_to_go[0] eq 'k'
13316 && $tokens_to_go[0] eq 'use'
13317 && $tokens_to_go[$max_index_to_go] eq 'overload' )
13320 destroy_one_line_block();
13321 $self->end_batch() if ( $max_index_to_go >= 0 );
13324 # Check for a soft break request
13325 if ( $max_index_to_go >= 0 && $break_flag && $break_flag == 2 ) {
13326 $self->set_forced_breakpoint($max_index_to_go);
13329 # mark old line breakpoints in current output stream
13331 $max_index_to_go >= 0
13332 && ( !$rOpts_ignore_old_breakpoints
13333 || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
13336 my $jobp = $max_index_to_go;
13337 if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
13341 $old_breakpoint_to_go[$jobp] = 1;
13344 } ## end sub process_line_of_CODE
13345 } ## end closure process_line_of_CODE
13347 sub tight_paren_follows {
13349 my ( $self, $K_to_go_0, $K_ic ) = @_;
13351 # Input parameters:
13352 # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
13353 # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
13354 # Return parameter:
13355 # false if we want a break after the closing do brace
13356 # true if we do not want a break after the closing do brace
13358 # We are at the closing brace of a 'do' block. See if this brace is
13359 # followed by a closing paren, and if so, set a flag which indicates
13360 # that we do not want a line break between the '}' and ')'.
13362 # xxxxx ( ...... do { ... } ) {
13363 # ^-------looking at this brace, K_ic
13365 # Subscript notation:
13366 # _i = inner container (braces in this case)
13367 # _o = outer container (parens in this case)
13368 # _io = inner opening = '{'
13369 # _ic = inner closing = '}'
13370 # _oo = outer opening = '('
13371 # _oc = outer closing = ')'
13373 # |--K_oo |--K_oc = outer container
13374 # xxxxx ( ...... do { ...... } ) {
13375 # |--K_io |--K_ic = inner container
13377 # In general, the safe thing to do is return a 'false' value
13378 # if the statement appears to be complex. This will have
13379 # the downstream side-effect of opening up outer containers
13380 # to help make complex code readable. But for simpler
13381 # do blocks it can be preferable to keep the code compact
13382 # by returning a 'true' value.
13384 return unless defined($K_ic);
13385 my $rLL = $self->[_rLL_];
13387 # we should only be called at a closing block
13388 my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
13389 return unless ($seqno_i); # shouldn't happen;
13391 # This only applies if the next nonblank is a ')'
13392 my $K_oc = $self->K_next_nonblank($K_ic);
13393 return unless defined($K_oc);
13394 my $token_next = $rLL->[$K_oc]->[_TOKEN_];
13395 return unless ( $token_next eq ')' );
13397 my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
13398 my $K_io = $self->[_K_opening_container_]->{$seqno_i};
13399 my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
13400 return unless ( defined($K_io) && defined($K_oo) );
13402 # RULE 1: Do not break before a closing signature paren
13403 # (regardless of complexity). This is a fix for issue git#22.
13404 # Looking for something like:
13405 # sub xxx ( ... do { ... } ) {
13406 # ^----- next block_type
13407 my $K_test = $self->K_next_nonblank($K_oc);
13408 if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
13409 my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
13411 if ( $self->[_ris_asub_block_]->{$seqno_test}
13412 || $self->[_ris_sub_block_]->{$seqno_test} )
13419 # RULE 2: Break if the contents within braces appears to be 'complex'. We
13420 # base this decision on the number of tokens between braces.
13422 # xxxxx ( ... do { ... } ) {
13425 # Although very simple, it has the advantages of (1) being insensitive to
13426 # changes in lengths of identifier names, (2) easy to understand, implement
13427 # and test. A test case for this is 't/snippets/long_line.in'.
13429 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
13430 # if ( do { $2 !~ /&/ } ) { ... }
13432 # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
13433 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13435 # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
13436 # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
13438 return if ( $K_ic - $K_io > 16 );
13440 # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
13441 # As with the previous rule, we decide based on the token count
13443 # xxxxx ( ... do { ... } ) {
13446 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
13447 # $K_io - $K_oo = 4 [Pass Rule 3]
13448 # if ( do { $2 !~ /&/ } ) { ... }
13450 # Example: $K_ic - $K_oo = 10 [Pass rule 2]
13451 # $K_io - $K_oo = 9 [Pass rule 3]
13452 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13454 return if ( $K_io - $K_oo > 9 );
13456 # RULE 4: Break if we have already broken this batch of output tokens
13457 return if ( $K_oo < $K_to_go_0 );
13459 # RULE 5: Break if input is not on one line
13460 # For example, we will set the flag for the following expression
13461 # written in one line:
13463 # This has: $K_ic - $K_oo = 10 [Pass rule 2]
13464 # $K_io - $K_oo = 8 [Pass rule 3]
13465 # $self->debug( 'Error: ' . do { local $/; <$err> } );
13467 # but we break after the brace if it is on multiple lines on input, since
13468 # the user may prefer it on multiple lines:
13472 # 'Error: ' . do { local $/; <$err> }
13475 if ( !$rOpts_ignore_old_breakpoints ) {
13476 my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
13477 my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
13478 return if ( $iline_oo != $iline_oc );
13481 # OK to keep the paren tight
13485 my %is_brace_semicolon_colon;
13488 my @q = qw( { } ; : );
13489 @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
13492 sub starting_one_line_block {
13494 # after seeing an opening curly brace, look for the closing brace and see
13495 # if the entire block will fit on a line. This routine is not always right
13496 # so a check is made later (at the closing brace) to make sure we really
13497 # have a one-line block. We have to do this preliminary check, though,
13498 # because otherwise we would always break at a semicolon within a one-line
13499 # block if the block contains multiple statements.
13501 my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
13503 my $rbreak_container = $self->[_rbreak_container_];
13504 my $rshort_nested = $self->[_rshort_nested_];
13505 my $rLL = $self->[_rLL_];
13506 my $K_opening_container = $self->[_K_opening_container_];
13507 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13509 # kill any current block - we can only go 1 deep
13510 destroy_one_line_block();
13513 # 1=distance from start of block to opening brace exceeds line length
13518 # This routine should not have been called if there are no tokens in the
13519 # 'to_go' arrays of previously stored tokens. A previous call to
13520 # 'store_token_to_go' should have stored an opening brace. An error here
13521 # indicates that a programming change may have caused a flush operation to
13522 # clean out the previously stored tokens.
13523 if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
13524 Fault("program bug: store_token_to_go called incorrectly\n")
13529 # Return if block should be broken
13530 my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
13531 if ( $rbreak_container->{$type_sequence} ) {
13535 my $ris_bli_container = $self->[_ris_bli_container_];
13536 my $is_bli = $ris_bli_container->{$type_sequence};
13538 my $block_type = $rblock_type_of_seqno->{$type_sequence};
13539 $block_type = "" unless ( defined($block_type) );
13540 my $index_max_forced_break = get_index_max_forced_break();
13542 my $previous_nonblank_token = '';
13543 my $i_last_nonblank = -1;
13544 if ( defined($K_last_nonblank) ) {
13545 $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
13546 if ( $i_last_nonblank >= 0 ) {
13547 $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
13551 # find the starting keyword for this block (such as 'if', 'else', ...)
13553 $max_index_to_go == 0
13554 ##|| $block_type =~ /^[\{\}\;\:]$/
13555 || $is_brace_semicolon_colon{$block_type}
13556 || substr( $block_type, 0, 7 ) eq 'package'
13559 $i_start = $max_index_to_go;
13562 # the previous nonblank token should start these block types
13564 $i_last_nonblank >= 0
13565 && ( $previous_nonblank_token eq $block_type
13566 || $self->[_ris_asub_block_]->{$type_sequence}
13567 || $self->[_ris_sub_block_]->{$type_sequence}
13568 || substr( $block_type, -2, 2 ) eq '()' )
13571 $i_start = $i_last_nonblank;
13573 # For signatures and extended syntax ...
13574 # If this brace follows a parenthesized list, we should look back to
13575 # find the keyword before the opening paren because otherwise we might
13576 # form a one line block which stays intack, and cause the parenthesized
13577 # expression to break open. That looks bad.
13578 if ( $tokens_to_go[$i_start] eq ')' ) {
13580 # Find the opening paren
13581 my $K_start = $K_to_go[$i_start];
13582 return 0 unless defined($K_start);
13583 my $seqno = $type_sequence_to_go[$i_start];
13584 return 0 unless ($seqno);
13585 my $K_opening = $K_opening_container->{$seqno};
13586 return 0 unless defined($K_opening);
13587 my $i_opening = $i_start + ( $K_opening - $K_start );
13589 # give up if not on this line
13590 return 0 unless ( $i_opening >= 0 );
13591 $i_start = $i_opening; ##$index_max_forced_break + 1;
13593 # go back one token before the opening paren
13594 if ( $i_start > 0 ) { $i_start-- }
13595 if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
13596 my $lev = $levels_to_go[$i_start];
13597 if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
13601 elsif ( $previous_nonblank_token eq ')' ) {
13603 # For something like "if (xxx) {", the keyword "if" will be
13604 # just after the most recent break. This will be 0 unless
13605 # we have just killed a one-line block and are starting another.
13607 # Note: cannot use inext_index_to_go[] here because that array
13608 # is still being constructed.
13609 $i_start = $index_max_forced_break + 1;
13610 if ( $types_to_go[$i_start] eq 'b' ) {
13614 # Patch to avoid breaking short blocks defined with extended_syntax:
13615 # Strip off any trailing () which was added in the parser to mark
13616 # the opening keyword. For example, in the following
13617 # create( TypeFoo $e) {$bubba}
13618 # the blocktype would be marked as create()
13619 my $stripped_block_type = $block_type;
13620 if ( substr( $block_type, -2, 2 ) eq '()' ) {
13621 $stripped_block_type = substr( $block_type, 0, -2 );
13623 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
13628 # patch for SWITCH/CASE to retain one-line case/when blocks
13629 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
13631 # Note: cannot use inext_index_to_go[] here because that array
13632 # is still being constructed.
13633 $i_start = $index_max_forced_break + 1;
13634 if ( $types_to_go[$i_start] eq 'b' ) {
13637 unless ( $tokens_to_go[$i_start] eq $block_type ) {
13646 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
13648 my $maximum_line_length =
13649 $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
13651 # see if block starting location is too great to even start
13652 if ( $pos > $maximum_line_length ) {
13656 # See if everything to the closing token will fit on one line
13657 # This is part of an update to fix cases b562 .. b983
13658 my $K_closing = $self->[_K_closing_container_]->{$type_sequence};
13659 return 0 unless ( defined($K_closing) );
13660 my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
13661 $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
13663 my $excess = $pos + 1 + $container_length - $maximum_line_length;
13665 # Add a small tolerance for welded tokens (case b901)
13666 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
13670 if ( $excess > 0 ) {
13672 # line is too long... there is no chance of forming a one line block
13673 # if the excess is more than 1 char
13674 return 0 if ( $excess > 1 );
13676 # ... and give up if it is not a one-line block on input.
13677 # note: for a one-line block on input, it may be possible to keep
13678 # it as a one-line block (by removing a needless semicolon ).
13679 my $K_start = $K_to_go[$i_start];
13681 $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
13682 return 0 if ($ldiff);
13685 foreach my $Ki ( $Kj + 1 .. $K_last ) {
13687 # old whitespace could be arbitrarily large, so don't use it
13688 if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
13689 else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
13691 # ignore some small blocks
13692 my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
13693 my $nobreak = $rshort_nested->{$type_sequence};
13695 # Return false result if we exceed the maximum line length,
13696 if ( $pos > $maximum_line_length ) {
13700 # keep going for non-containers
13701 elsif ( !$type_sequence ) {
13705 # return if we encounter another opening brace before finding the
13707 elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
13708 && $rLL->[$Ki]->[_TYPE_] eq '{'
13709 && $rblock_type_of_seqno->{$type_sequence}
13715 # if we find our closing brace..
13716 elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
13717 && $rLL->[$Ki]->[_TYPE_] eq '}'
13718 && $rblock_type_of_seqno->{$type_sequence}
13722 # be sure any trailing comment also fits on the line
13723 my $Ki_nonblank = $Ki;
13724 if ( $Ki_nonblank < $K_last ) {
13726 if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
13727 && $Ki_nonblank < $K_last )
13733 # Patch for one-line sort/map/grep/eval blocks with side comments:
13734 # We will ignore the side comment length for sort/map/grep/eval
13735 # because this can lead to statements which change every time
13736 # perltidy is run. Here is an example from Denis Moskowitz which
13737 # oscillates between these two states without this patch:
13740 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13744 ## $_->foo ne 'bar'
13745 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13749 # When the first line is input it gets broken apart by the main
13750 # line break logic in sub process_line_of_CODE.
13751 # When the second line is input it gets recombined by
13752 # process_line_of_CODE and passed to the output routines. The
13753 # output routines (break_long_lines) do not break it apart
13754 # because the bond strengths are set to the highest possible value
13755 # for grep/map/eval/sort blocks, so the first version gets output.
13756 # It would be possible to fix this by changing bond strengths,
13757 # but they are high to prevent errors in older versions of perl.
13758 # See c100 for eval test.
13760 && $rLL->[$K_last]->[_TYPE_] eq '#'
13761 && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
13762 && !$rOpts_ignore_side_comment_lengths
13763 && !$is_sort_map_grep_eval{$block_type}
13764 && $K_last - $Ki_nonblank <= 2 )
13766 # Only include the side comment for if/else/elsif/unless if it
13767 # immediately follows (because the current '$rbrace_follower'
13768 # logic for these will give an immediate brake after these
13769 # closing braces). So for example a line like this
13770 # if (...) { ... } ; # very long comment......
13771 # will already break like this:
13773 # ; # very long comment......
13774 # so we do not need to include the length of the comment, which
13775 # would break the block. Project 'bioperl' has coding like this.
13776 if ( $block_type !~ /^(if|else|elsif|unless)$/
13777 || $K_last == $Ki_nonblank )
13779 $Ki_nonblank = $K_last;
13780 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
13782 if ( $Ki_nonblank > $Ki + 1 ) {
13784 # source whitespace could be anything, assume
13785 # at least one space before the hash on output
13786 if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
13789 else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
13792 if ( $pos >= $maximum_line_length ) {
13798 # ok, it's a one-line block
13799 create_one_line_block( $i_start, 20 );
13803 # just keep going for other characters
13808 # We haven't hit the closing brace, but there is still space. So the
13809 # question here is, should we keep going to look at more lines in hopes of
13810 # forming a new one-line block, or should we stop right now. The problem
13811 # with continuing is that we will not be able to honor breaks before the
13812 # opening brace if we continue.
13814 # Typically we will want to keep trying to make one-line blocks for things
13815 # like sort/map/grep/eval. But it is not always a good idea to make as
13816 # many one-line blocks as possible, so other types are not done. The user
13817 # can always use -mangle.
13819 # If we want to keep going, we will create a new one-line block.
13820 # The blocks which we can keep going are in a hash, but we never want
13821 # to continue if we are at a '-bli' block.
13822 if ( $want_one_line_block{$block_type} && !$is_bli ) {
13823 create_one_line_block( $i_start, 1 );
13828 sub unstore_token_to_go {
13830 # remove most recent token from output stream
13832 if ( $max_index_to_go > 0 ) {
13833 $max_index_to_go--;
13836 $max_index_to_go = UNDEFINED_INDEX;
13841 sub compare_indentation_levels {
13843 # Check to see if output line tabbing agrees with input line
13844 # this can be very useful for debugging a script which has an extra
13845 # or missing brace.
13847 my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
13848 return unless ( defined($K_first) );
13850 my $rLL = $self->[_rLL_];
13852 my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
13853 my $radjusted_levels = $self->[_radjusted_levels_];
13854 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
13855 $structural_indentation_level = $radjusted_levels->[$K_first];
13858 # record max structural depth for log file
13859 if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
13860 $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
13861 $self->[_maximum_BLOCK_level_at_line_] = $line_number;
13864 my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
13865 my $is_closing_block =
13867 && $self->[_rblock_type_of_seqno_]->{$type_sequence}
13868 && $rLL->[$K_first]->[_TYPE_] eq '}';
13870 if ( $guessed_indentation_level ne $structural_indentation_level ) {
13871 $self->[_last_tabbing_disagreement_] = $line_number;
13873 if ($is_closing_block) {
13875 if ( !$self->[_in_brace_tabbing_disagreement_] ) {
13876 $self->[_in_brace_tabbing_disagreement_] = $line_number;
13878 if ( !$self->[_first_brace_tabbing_disagreement_] ) {
13879 $self->[_first_brace_tabbing_disagreement_] = $line_number;
13883 if ( !$self->[_in_tabbing_disagreement_] ) {
13884 $self->[_tabbing_disagreement_count_]++;
13886 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
13887 write_logfile_entry(
13888 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
13891 $self->[_in_tabbing_disagreement_] = $line_number;
13892 $self->[_first_tabbing_disagreement_] = $line_number
13893 unless ( $self->[_first_tabbing_disagreement_] );
13898 $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
13900 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
13901 if ($in_tabbing_disagreement) {
13903 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
13904 write_logfile_entry(
13905 "End indentation disagreement from input line $in_tabbing_disagreement\n"
13908 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
13910 write_logfile_entry(
13911 "No further tabbing disagreements will be noted\n");
13914 $self->[_in_tabbing_disagreement_] = 0;
13921 ###################################################
13922 # CODE SECTION 8: Utilities for setting breakpoints
13923 ###################################################
13925 { ## begin closure set_forced_breakpoint
13927 my $forced_breakpoint_count;
13928 my $forced_breakpoint_undo_count;
13929 my @forced_breakpoint_undo_stack;
13930 my $index_max_forced_break;
13932 # Break before or after certain tokens based on user settings
13933 my %break_before_or_after_token;
13937 # Updated to use all operators. This fixes case b1054
13938 # Here is the previous simplified version:
13939 ## my @q = qw( . : ? and or xor && || );
13940 my @q = @all_operators;
13943 @break_before_or_after_token{@q} = (1) x scalar(@q);
13946 sub initialize_forced_breakpoint_vars {
13947 $forced_breakpoint_count = 0;
13948 $index_max_forced_break = UNDEFINED_INDEX;
13949 $forced_breakpoint_undo_count = 0;
13950 @forced_breakpoint_undo_stack = ();
13954 sub get_forced_breakpoint_count {
13955 return $forced_breakpoint_count;
13958 sub get_forced_breakpoint_undo_count {
13959 return $forced_breakpoint_undo_count;
13962 sub get_index_max_forced_break {
13963 return $index_max_forced_break;
13966 sub set_fake_breakpoint {
13968 # Just bump up the breakpoint count as a signal that there are breaks.
13969 # This is useful if we have breaks but may want to postpone deciding
13970 # where to make them.
13971 $forced_breakpoint_count++;
13975 use constant DEBUG_FORCE => 0;
13977 sub set_forced_breakpoint {
13978 my ( $self, $i ) = @_;
13980 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
13983 # - If the token at index $i is a blank, backup to $i-1 to
13984 # get to the previous nonblank token.
13985 # - For certain tokens, the break may be placed BEFORE the token
13986 # at index $i, depending on user break preference settings.
13987 # - If a break is made after an opening token, then a break will
13988 # also be made before the corresponding closing token.
13990 # Returns '$i_nonblank':
13991 # = index of the token after which the breakpoint was actually placed
13992 # = undef if breakpoint was not set.
13995 if ( !defined($i) || $i < 0 ) {
13997 # Calls with bad index $i are harmless but waste time and should
13998 # be caught and eliminated during code development.
14000 my ( $a, $b, $c ) = caller();
14002 "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
14008 # Break after token $i
14009 $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
14011 # If we break at an opening container..break at the closing
14013 if ( defined($i_nonblank)
14014 && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
14017 $self->set_closing_breakpoint($i_nonblank);
14020 DEBUG_FORCE && do {
14021 my ( $a, $b, $c ) = caller();
14023 "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
14024 if ( !defined($i_nonblank) ) {
14025 $i = "" unless defined($i);
14026 $msg .= " but could not set break after i='$i'\n";
14030 set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
14032 if ( defined($set_closing) ) {
14034 " Also set closing breakpoint corresponding to this token\n";
14040 return $i_nonblank;
14043 sub set_forced_breakpoint_AFTER {
14044 my ( $self, $i ) = @_;
14046 # This routine is only called by sub set_forced_breakpoint and
14047 # sub set_closing_breakpoint.
14049 # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
14052 # - If the token at index $i is a blank, backup to $i-1 to
14053 # get to the previous nonblank token.
14054 # - For certain tokens, the break may be placed BEFORE the token
14055 # at index $i, depending on user break preference settings.
14058 # - the index of the token after which the break was set, or
14059 # - undef if no break was set
14061 return unless ( defined($i) && $i >= 0 );
14063 # Back up at a blank so we have a token to examine.
14064 # This was added to fix for cases like b932 involving an '=' break.
14065 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
14067 # Never break between welded tokens
14069 if ( $total_weld_count
14070 && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
14072 my $token = $tokens_to_go[$i];
14073 my $type = $types_to_go[$i];
14075 # For certain tokens, use user settings to decide if we break before or
14077 if ( $break_before_or_after_token{$token}
14078 && ( $type eq $token || $type eq 'k' ) )
14080 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14083 # breaks are forced before 'if' and 'unless'
14084 elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
14086 if ( $i >= 0 && $i <= $max_index_to_go ) {
14087 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14089 if ( $i_nonblank >= 0
14090 && $nobreak_to_go[$i_nonblank] == 0
14091 && !$forced_breakpoint_to_go[$i_nonblank] )
14093 $forced_breakpoint_to_go[$i_nonblank] = 1;
14095 if ( $i_nonblank > $index_max_forced_break ) {
14096 $index_max_forced_break = $i_nonblank;
14098 $forced_breakpoint_count++;
14099 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
14103 return $i_nonblank;
14109 sub clear_breakpoint_undo_stack {
14111 $forced_breakpoint_undo_count = 0;
14115 use constant DEBUG_UNDOBP => 0;
14117 sub undo_forced_breakpoint_stack {
14119 my ( $self, $i_start ) = @_;
14121 # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
14122 # remove all breakpoints from the top of the 'undo stack' down to and
14123 # including index $i_start.
14125 # The 'undo stack' is a stack of all breakpoints made for a batch of
14128 if ( $i_start < 0 ) {
14130 my ( $a, $b, $c ) = caller();
14132 # Bad call, can only be due to a recent programming change.
14134 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
14139 while ( $forced_breakpoint_undo_count > $i_start ) {
14141 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14142 if ( $i >= 0 && $i <= $max_index_to_go ) {
14143 $forced_breakpoint_to_go[$i] = 0;
14144 $forced_breakpoint_count--;
14146 DEBUG_UNDOBP && do {
14147 my ( $a, $b, $c ) = caller();
14149 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
14153 # shouldn't happen, but not a critical error
14155 DEBUG_UNDOBP && do {
14156 my ( $a, $b, $c ) = caller();
14158 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
14164 } ## end closure set_forced_breakpoint
14166 { ## begin closure set_closing_breakpoint
14168 my %postponed_breakpoint;
14170 sub initialize_postponed_breakpoint {
14171 %postponed_breakpoint = ();
14175 sub has_postponed_breakpoint {
14177 return $postponed_breakpoint{$seqno};
14180 sub set_closing_breakpoint {
14182 # set a breakpoint at a matching closing token
14183 my ( $self, $i_break ) = @_;
14185 if ( $mate_index_to_go[$i_break] >= 0 ) {
14187 # Don't reduce the '2' in the statement below.
14188 # Test files: attrib.t, BasicLyx.pm.html
14189 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
14191 # break before } ] and ), but sub set_forced_breakpoint will decide
14192 # to break before or after a ? and :
14193 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
14194 $self->set_forced_breakpoint_AFTER(
14195 $mate_index_to_go[$i_break] - $inc );
14199 my $type_sequence = $type_sequence_to_go[$i_break];
14200 if ($type_sequence) {
14201 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
14202 $postponed_breakpoint{$type_sequence} = 1;
14207 } ## end closure set_closing_breakpoint
14209 #########################################
14210 # CODE SECTION 9: Process batches of code
14211 #########################################
14213 { ## begin closure grind_batch_of_CODE
14215 # The routines in this closure begin the processing of a 'batch' of code.
14217 # A variable to keep track of consecutive nonblank lines so that we can
14218 # insert occasional blanks
14219 my @nonblank_lines_at_depth;
14221 # A variable to remember maximum size of previous batches; this is needed
14222 # by the logical padding routine
14223 my $peak_batch_size;
14226 # variables to keep track of unbalanced containers.
14227 my %saved_opening_indentation;
14228 my @unmatched_opening_indexes_in_this_batch;
14230 sub initialize_grind_batch_of_CODE {
14231 @nonblank_lines_at_depth = ();
14232 $peak_batch_size = 0;
14234 %saved_opening_indentation = ();
14238 # sub grind_batch_of_CODE receives sections of code which are the longest
14239 # possible lines without a break. In other words, it receives what is left
14240 # after applying all breaks forced by blank lines, block comments, side
14241 # comments, pod text, and structural braces. Its job is to break this code
14242 # down into smaller pieces, if necessary, which fit within the maximum
14243 # allowed line length. Then it sends the resulting lines of code on down
14244 # the pipeline to the VerticalAligner package, breaking the code into
14245 # continuation lines as necessary. The batch of tokens are in the "to_go"
14246 # arrays. The name 'grind' is slightly suggestive of a machine continually
14247 # breaking down long lines of code, but mainly it is unique and easy to
14248 # remember and find with an editor search.
14250 # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
14251 # together in the following way:
14253 # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
14254 # combines them into the largest sequences of tokens which might form a new
14256 # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
14259 # So sub 'process_line_of_CODE' builds up the longest possible continouus
14260 # sequences of tokens, regardless of line length, and then
14261 # grind_batch_of_CODE breaks these sequences back down into the new output
14264 # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
14266 use constant DEBUG_GRIND => 0;
14268 sub check_grind_input {
14270 # Check for valid input to sub grind_batch_of_CODE. An error here
14271 # would most likely be due to an error in 'sub store_token_to_go'.
14274 # Be sure there are tokens in the batch
14275 if ( $max_index_to_go < 0 ) {
14277 sub grind incorrectly called with max_index_to_go=$max_index_to_go
14280 my $Klimit = $self->[_Klimit_];
14282 # The local batch tokens must be a continous part of the global token
14285 foreach my $ii ( 0 .. $max_index_to_go ) {
14289 $KK = $K_to_go[$ii];
14290 if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
14291 $KK = '(undef)' unless defined($KK);
14293 at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
14297 if ( $ii > 0 && $KK != $Km + 1 ) {
14300 Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
14307 sub grind_batch_of_CODE {
14311 my $this_batch = $self->[_this_batch_];
14314 $self->check_grind_input() if (DEVEL_MODE);
14316 # This routine is only called from sub flush_batch_of_code, so that
14317 # routine is a better spot for debugging.
14318 DEBUG_GRIND && do {
14319 my $token = my $type = "";
14320 if ( $max_index_to_go >= 0 ) {
14321 $token = $tokens_to_go[$max_index_to_go];
14322 $type = $types_to_go[$max_index_to_go];
14324 my $output_str = "";
14325 if ( $max_index_to_go > 20 ) {
14326 my $mm = $max_index_to_go - 10;
14327 $output_str = join( "", @tokens_to_go[ 0 .. 10 ] ) . " ... "
14328 . join( "", @tokens_to_go[ $mm .. $max_index_to_go ] );
14331 $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
14333 print STDERR <<EOM;
14334 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
14339 return if ( $max_index_to_go < 0 );
14341 $self->set_lp_indentation()
14342 if ($rOpts_line_up_parentheses);
14344 #----------------------------
14345 # Shortcut for block comments
14346 #----------------------------
14348 $max_index_to_go == 0
14349 && $types_to_go[0] eq '#'
14351 # this shortcut does not work for -lp yet
14352 && !$rOpts_line_up_parentheses
14356 $this_batch->[_ri_first_] = [$ibeg];
14357 $this_batch->[_ri_last_] = [$ibeg];
14358 $this_batch->[_peak_batch_size_] = $peak_batch_size;
14359 $this_batch->[_do_not_pad_] = 0;
14360 $this_batch->[_batch_count_] = $batch_count;
14361 $this_batch->[_rix_seqno_controlling_ci_] = [];
14363 $self->convey_batch_to_vertical_aligner();
14365 my $level = $levels_to_go[$ibeg];
14366 $self->[_last_last_line_leading_level_] =
14367 $self->[_last_line_leading_level_];
14368 $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
14369 $self->[_last_line_leading_level_] = $level;
14370 $nonblank_lines_at_depth[$level] = 1;
14378 my $rLL = $self->[_rLL_];
14379 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
14380 my $rwant_container_open = $self->[_rwant_container_open_];
14382 my $starting_in_quote = $this_batch->[_starting_in_quote_];
14383 my $ending_in_quote = $this_batch->[_ending_in_quote_];
14384 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
14386 #-------------------------------------------------------
14387 # Loop over the batch to initialize some batch variables
14388 #-------------------------------------------------------
14389 my $comma_count_in_batch = 0;
14390 my $ilast_nonblank = -1;
14392 my @ix_seqno_controlling_ci;
14393 my %comma_arrow_count = ();
14394 my $comma_arrow_count_contained = 0;
14395 my @unmatched_closing_indexes_in_this_batch;
14397 @unmatched_opening_indexes_in_this_batch = ();
14399 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
14400 $bond_strength_to_go[$i] = 0;
14401 $iprev_to_go[$i] = $ilast_nonblank;
14402 $inext_to_go[$i] = $i + 1;
14404 my $type = $types_to_go[$i];
14405 if ( $type ne 'b' ) {
14406 if ( $ilast_nonblank >= 0 ) {
14407 $inext_to_go[$ilast_nonblank] = $i;
14409 # just in case there are two blanks in a row (shouldn't
14411 if ( ++$ilast_nonblank < $i ) {
14412 $inext_to_go[$ilast_nonblank] = $i;
14415 $ilast_nonblank = $i;
14417 # This is a good spot to efficiently collect information needed
14418 # for breaking lines...
14420 # gather info needed by sub break_long_lines
14421 if ( $type_sequence_to_go[$i] ) {
14422 my $seqno = $type_sequence_to_go[$i];
14423 my $token = $tokens_to_go[$i];
14425 # remember indexes of any tokens controlling xci
14426 # in this batch. This list is needed by sub undo_ci.
14427 if ( $ris_seqno_controlling_ci->{$seqno} ) {
14428 push @ix_seqno_controlling_ci, $i;
14431 if ( $is_opening_sequence_token{$token} ) {
14432 if ( $rwant_container_open->{$seqno} ) {
14433 $self->set_forced_breakpoint($i);
14435 push @unmatched_opening_indexes_in_this_batch, $i;
14436 if ( $type eq '?' ) {
14437 push @colon_list, $type;
14440 elsif ( $is_closing_sequence_token{$token} ) {
14442 if ( $i > 0 && $rwant_container_open->{$seqno} ) {
14443 $self->set_forced_breakpoint( $i - 1 );
14447 pop @unmatched_opening_indexes_in_this_batch;
14448 if ( defined($i_mate) && $i_mate >= 0 ) {
14449 if ( $type_sequence_to_go[$i_mate] ==
14450 $type_sequence_to_go[$i] )
14452 $mate_index_to_go[$i] = $i_mate;
14453 $mate_index_to_go[$i_mate] = $i;
14454 my $seqno = $type_sequence_to_go[$i];
14455 if ( $comma_arrow_count{$seqno} ) {
14456 $comma_arrow_count_contained +=
14457 $comma_arrow_count{$seqno};
14461 push @unmatched_opening_indexes_in_this_batch,
14463 push @unmatched_closing_indexes_in_this_batch,
14468 push @unmatched_closing_indexes_in_this_batch, $i;
14470 if ( $type eq ':' ) {
14471 push @colon_list, $type;
14473 } ## end elsif ( $is_closing_sequence_token...)
14475 } ## end if ($seqno)
14477 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
14478 elsif ( $tokens_to_go[$i] eq '=>' ) {
14479 if (@unmatched_opening_indexes_in_this_batch) {
14480 my $j = $unmatched_opening_indexes_in_this_batch[-1];
14481 my $seqno = $type_sequence_to_go[$j];
14482 $comma_arrow_count{$seqno}++;
14485 } ## end if ( $type ne 'b' )
14486 } ## end for ( my $i = 0 ; $i <=...)
14488 my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
14489 @unmatched_closing_indexes_in_this_batch;
14491 #------------------------
14492 # Set special breakpoints
14493 #------------------------
14494 # If this line ends in a code block brace, set breaks at any
14495 # previous closing code block braces to breakup a chain of code
14496 # blocks on one line. This is very rare but can happen for
14497 # user-defined subs. For example we might be looking at this:
14498 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
14499 my $saw_good_break = 0; # flag to force breaks even if short line
14502 # looking for opening or closing block brace
14503 $block_type_to_go[$max_index_to_go]
14505 # never any good breaks if just one token
14506 && $max_index_to_go > 0
14508 # but not one of these which are never duplicated on a line:
14509 # until|while|for|if|elsif|else
14510 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
14514 my $lev = $nesting_depth_to_go[$max_index_to_go];
14516 # Walk backwards from the end and
14517 # set break at any closing block braces at the same level.
14518 # But quit if we are not in a chain of blocks.
14519 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
14520 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
14521 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
14523 if ( $block_type_to_go[$i] ) {
14524 if ( $tokens_to_go[$i] eq '}' ) {
14525 $self->set_forced_breakpoint($i);
14526 $saw_good_break = 1;
14530 # quit if we see anything besides words, function, blanks
14532 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
14536 #-----------------------------------------------
14537 # insertion of any blank lines before this batch
14538 #-----------------------------------------------
14541 my $imax = $max_index_to_go;
14543 # trim any blank tokens
14544 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14545 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14547 if ( $imin > $imax ) {
14549 my $K0 = $K_to_go[0];
14551 if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
14553 Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
14559 my $last_line_leading_type = $self->[_last_line_leading_type_];
14560 my $last_line_leading_level = $self->[_last_line_leading_level_];
14561 my $last_last_line_leading_level =
14562 $self->[_last_last_line_leading_level_];
14564 # add a blank line before certain key types but not after a comment
14565 if ( $last_line_leading_type ne '#' ) {
14566 my $want_blank = 0;
14567 my $leading_token = $tokens_to_go[$imin];
14568 my $leading_type = $types_to_go[$imin];
14570 # blank lines before subs except declarations and one-liners
14571 if ( $leading_type eq 'i' ) {
14576 substr( $leading_token, 0, 3 ) eq 'sub'
14577 || $rOpts_sub_alias_list
14581 && $leading_token =~ /$SUB_PATTERN/
14584 $want_blank = $rOpts->{'blank-lines-before-subs'}
14585 if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
14588 # break before all package declarations
14589 elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
14590 $want_blank = $rOpts->{'blank-lines-before-packages'};
14594 # break before certain key blocks except one-liners
14595 if ( $leading_type eq 'k' ) {
14596 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
14597 $want_blank = $rOpts->{'blank-lines-before-subs'}
14598 if ( terminal_type_i( $imin, $imax ) ne '}' );
14601 # Break before certain block types if we haven't had a
14602 # break at this level for a while. This is the
14603 # difficult decision..
14604 elsif ($last_line_leading_type ne 'b'
14605 && $is_if_unless_while_until_for_foreach{$leading_token} )
14607 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
14608 if ( !defined($lc) ) { $lc = 0 }
14610 # patch for RT #128216: no blank line inserted at a level
14612 if ( $levels_to_go[$imin] != $last_line_leading_level ) {
14617 $rOpts->{'blanks-before-blocks'}
14618 && $lc >= $rOpts->{'long-block-line-count'}
14619 && $self->consecutive_nonblank_lines() >=
14620 $rOpts->{'long-block-line-count'}
14621 && terminal_type_i( $imin, $imax ) ne '}';
14625 # Check for blank lines wanted before a closing brace
14626 if ( $leading_token eq '}' ) {
14627 if ( $rOpts->{'blank-lines-before-closing-block'}
14628 && $block_type_to_go[$imin]
14629 && $block_type_to_go[$imin] =~
14630 /$blank_lines_before_closing_block_pattern/ )
14632 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
14633 if ( $nblanks > $want_blank ) {
14634 $want_blank = $nblanks;
14641 # future: send blank line down normal path to VerticalAligner
14642 $self->flush_vertical_aligner();
14643 my $file_writer_object = $self->[_file_writer_object_];
14644 $file_writer_object->require_blank_code_lines($want_blank);
14648 # update blank line variables and count number of consecutive
14649 # non-blank, non-comment lines at this level
14650 $last_last_line_leading_level = $last_line_leading_level;
14651 $last_line_leading_level = $levels_to_go[$imin];
14652 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
14653 $last_line_leading_type = $types_to_go[$imin];
14654 if ( $last_line_leading_level == $last_last_line_leading_level
14655 && $last_line_leading_type ne 'b'
14656 && $last_line_leading_type ne '#'
14657 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
14659 $nonblank_lines_at_depth[$last_line_leading_level]++;
14662 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
14665 $self->[_last_line_leading_type_] = $last_line_leading_type;
14666 $self->[_last_line_leading_level_] = $last_line_leading_level;
14667 $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
14669 #--------------------------
14670 # scan lists and long lines
14671 #--------------------------
14673 # Flag to remember if we called sub 'pad_array_to_go'.
14674 # Some routines (break_lists(), break_long_lines() ) need some
14675 # extra tokens added at the end of the batch. Most batches do not
14676 # use these routines, so we will avoid calling 'pad_array_to_go'
14677 # unless it is needed.
14678 my $called_pad_array_to_go;
14680 # set all forced breakpoints for good list formatting
14681 my $is_long_line = $max_index_to_go > 0
14682 && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
14684 my $old_line_count_in_batch = 1;
14685 if ( $max_index_to_go > 0 ) {
14686 my $Kbeg = $K_to_go[0];
14687 my $Kend = $K_to_go[$max_index_to_go];
14688 $old_line_count_in_batch +=
14689 $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
14694 || $old_line_count_in_batch > 1
14696 # must always call break_lists() with unbalanced batches because
14697 # it is maintaining some stacks
14698 || $is_unbalanced_batch
14700 # call break_lists if we might want to break at commas
14702 $comma_count_in_batch
14703 && ( $rOpts_maximum_fields_per_table > 0
14704 && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
14705 || $rOpts_comma_arrow_breakpoints == 0 )
14708 # call break_lists if user may want to break open some one-line
14710 || ( $comma_arrow_count_contained
14711 && $rOpts_comma_arrow_breakpoints != 3 )
14714 # add a couple of extra terminal blank tokens
14715 $self->pad_array_to_go();
14716 $called_pad_array_to_go = 1;
14718 my $sgb = $self->break_lists($is_long_line);
14719 $saw_good_break ||= $sgb;
14722 # let $ri_first and $ri_last be references to lists of
14723 # first and last tokens of line fragments to output..
14724 my ( $ri_first, $ri_last );
14726 #-------------------------
14727 # write a single line if..
14728 #-------------------------
14731 # we aren't allowed to add any newlines
14732 !$rOpts_add_newlines
14737 # this line is 'short'
14740 # and we didn't see a good breakpoint
14741 && !$saw_good_break
14743 # and we don't already have an interior breakpoint
14744 && !get_forced_breakpoint_count()
14748 @{$ri_first} = ($imin);
14749 @{$ri_last} = ($imax);
14752 #-----------------------------
14753 # otherwise use multiple lines
14754 #-----------------------------
14757 # add a couple of extra terminal blank tokens if we haven't
14759 $self->pad_array_to_go() unless ($called_pad_array_to_go);
14761 ( $ri_first, $ri_last ) =
14762 $self->break_long_lines( $saw_good_break, \@colon_list );
14764 $self->break_all_chain_tokens( $ri_first, $ri_last );
14766 $self->break_equals( $ri_first, $ri_last );
14768 # now we do a correction step to clean this up a bit
14769 # (The only time we would not do this is for debugging)
14770 $self->recombine_breakpoints( $ri_first, $ri_last )
14771 if ( $rOpts_recombine && @{$ri_first} > 1 );
14773 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
14777 $self->insert_breaks_before_list_opening_containers( $ri_first,
14779 if ( %break_before_container_types && $max_index_to_go > 0 );
14781 #-------------------
14782 # -lp corrector step
14783 #-------------------
14784 my $do_not_pad = 0;
14785 if ($rOpts_line_up_parentheses) {
14786 $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last );
14789 #--------------------------
14790 # unmask phantom semicolons
14791 #--------------------------
14792 if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
14796 if ( $want_left_space{';'} != WS_NO ) {
14800 $tokens_to_go[$i] = $tok;
14801 $token_lengths_to_go[$i] = $tok_len;
14802 my $KK = $K_to_go[$i];
14803 $rLL->[$KK]->[_TOKEN_] = $tok;
14804 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
14805 my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
14806 $self->note_added_semicolon($line_number);
14808 foreach ( $imax .. $max_index_to_go ) {
14809 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
14813 if ( $rOpts_one_line_block_semicolons == 0 ) {
14814 $self->delete_one_line_semicolons( $ri_first, $ri_last );
14817 #--------------------
14818 # ship this batch out
14819 #--------------------
14820 $this_batch->[_ri_first_] = $ri_first;
14821 $this_batch->[_ri_last_] = $ri_last;
14822 $this_batch->[_peak_batch_size_] = $peak_batch_size;
14823 $this_batch->[_do_not_pad_] = $do_not_pad;
14824 $this_batch->[_batch_count_] = $batch_count;
14825 $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
14827 $self->convey_batch_to_vertical_aligner();
14829 #-------------------------------------------------------------------
14830 # Write requested number of blank lines after an opening block brace
14831 #-------------------------------------------------------------------
14832 if ($rOpts_blank_lines_after_opening_block) {
14834 if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
14836 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
14841 if ( $types_to_go[$iterm] eq '{'
14842 && $block_type_to_go[$iterm]
14843 && $block_type_to_go[$iterm] =~
14844 /$blank_lines_after_opening_block_pattern/ )
14846 my $nblanks = $rOpts_blank_lines_after_opening_block;
14847 $self->flush_vertical_aligner();
14848 my $file_writer_object = $self->[_file_writer_object_];
14849 $file_writer_object->require_blank_code_lines($nblanks);
14853 # Remember the largest batch size processed. This is needed by the
14854 # logical padding routine to avoid padding the first nonblank token
14855 if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
14856 $peak_batch_size = $max_index_to_go;
14862 sub save_opening_indentation {
14864 # This should be called after each batch of tokens is output. It
14865 # saves indentations of lines of all unmatched opening tokens.
14866 # These will be used by sub get_opening_indentation.
14868 my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
14870 # QW INDENTATION PATCH 1:
14871 # Also save indentation for multiline qw quotes
14873 my $seqno_qw_opening;
14874 if ( $types_to_go[$max_index_to_go] eq 'q' ) {
14875 my $KK = $K_to_go[$max_index_to_go];
14876 $seqno_qw_opening =
14877 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
14878 if ($seqno_qw_opening) {
14879 push @i_qw, $max_index_to_go;
14883 # we need to save indentations of any unmatched opening tokens
14884 # in this batch because we may need them in a subsequent batch.
14885 foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
14887 my $seqno = $type_sequence_to_go[$_];
14890 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
14891 $seqno = $seqno_qw_opening;
14896 $seqno = 'UNKNOWN';
14900 $saved_opening_indentation{$seqno} = [
14901 lookup_opening_indentation(
14902 $_, $ri_first, $ri_last, $rindentation_list
14909 sub get_saved_opening_indentation {
14911 my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
14914 if ( $saved_opening_indentation{$seqno} ) {
14915 ( $indent, $offset, $is_leading ) =
14916 @{ $saved_opening_indentation{$seqno} };
14921 # some kind of serious error it doesn't exist
14922 # (example is badfile.t)
14924 return ( $indent, $offset, $is_leading, $exists );
14926 } ## end closure grind_batch_of_CODE
14928 sub lookup_opening_indentation {
14930 # get the indentation of the line in the current output batch
14931 # which output a selected opening token
14934 # $i_opening - index of an opening token in the current output batch
14935 # whose line indentation we need
14936 # $ri_first - reference to list of the first index $i for each output
14937 # line in this batch
14938 # $ri_last - reference to list of the last index $i for each output line
14940 # $rindentation_list - reference to a list containing the indentation
14941 # used for each line. (NOTE: the first slot in
14942 # this list is the last returned line number, and this is
14943 # followed by the list of indentations).
14946 # -the indentation of the line which contained token $i_opening
14947 # -and its offset (number of columns) from the start of the line
14949 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
14951 if ( !@{$ri_last} ) {
14953 # An error here implies a bug introduced by a recent program change.
14954 # Every batch of code has lines, so this should never happen.
14956 Fault("Error in opening_indentation: no lines");
14958 return ( 0, 0, 0 );
14961 my $nline = $rindentation_list->[0]; # line number of previous lookup
14963 # reset line location if necessary
14964 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
14966 # find the correct line
14967 unless ( $i_opening > $ri_last->[-1] ) {
14968 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
14971 # Error - token index is out of bounds - shouldn't happen
14972 # A program bug has been introduced in one of the calling routines.
14973 # We better stop here.
14975 my $i_last_line = $ri_last->[-1];
14978 Program bug in call to lookup_opening_indentation - index out of range
14979 called with index i_opening=$i_opening > $i_last_line = max index of last line
14980 This batch has max index = $max_index_to_go,
14983 $nline = $#{$ri_last};
14986 $rindentation_list->[0] =
14987 $nline; # save line number to start looking next call
14988 my $ibeg = $ri_start->[$nline];
14989 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
14990 my $is_leading = ( $ibeg == $i_opening );
14991 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
14994 sub terminal_type_i {
14996 # returns type of last token on this line (terminal token), as follows:
14997 # returns # for a full-line comment
14998 # returns ' ' for a blank line
14999 # otherwise returns final token type
15001 my ( $ibeg, $iend ) = @_;
15003 # Start at the end and work backwards
15005 my $type_i = $types_to_go[$i];
15007 # Check for side comment
15008 if ( $type_i eq '#' ) {
15010 if ( $i < $ibeg ) {
15011 return wantarray ? ( $type_i, $ibeg ) : $type_i;
15013 $type_i = $types_to_go[$i];
15016 # Skip past a blank
15017 if ( $type_i eq 'b' ) {
15019 if ( $i < $ibeg ) {
15020 return wantarray ? ( $type_i, $ibeg ) : $type_i;
15022 $type_i = $types_to_go[$i];
15025 # Found it..make sure it is a BLOCK termination,
15026 # but hide a terminal } after sort/map/grep/eval/do because it is not
15027 # necessarily the end of the line. (terminal.t)
15028 my $block_type = $block_type_to_go[$i];
15032 || $is_sort_map_grep_eval_do{$block_type} )
15037 return wantarray ? ( $type_i, $i ) : $type_i;
15040 sub pad_array_to_go {
15042 # To simplify coding in break_lists and set_bond_strengths, it helps to
15043 # create some extra blank tokens at the end of the arrays. We also add
15044 # some undef's to help guard against using invalid data.
15046 $K_to_go[ $max_index_to_go + 1 ] = undef;
15047 $tokens_to_go[ $max_index_to_go + 1 ] = '';
15048 $tokens_to_go[ $max_index_to_go + 2 ] = '';
15049 $tokens_to_go[ $max_index_to_go + 3 ] = undef;
15050 $types_to_go[ $max_index_to_go + 1 ] = 'b';
15051 $types_to_go[ $max_index_to_go + 2 ] = 'b';
15052 $types_to_go[ $max_index_to_go + 3 ] = undef;
15053 $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
15054 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
15055 $nesting_depth_to_go[$max_index_to_go];
15058 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
15059 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
15061 # Nesting depths are set to be >=0 in sub write_line, so it should
15062 # not be possible to get here unless the code has a bracing error
15063 # which leaves a closing brace with zero nesting depth.
15064 unless ( get_saw_brace_error() ) {
15067 Program bug in pad_array_to_go: hit nesting error which should have been caught
15073 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
15078 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
15079 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
15084 sub break_all_chain_tokens {
15086 # scan the current breakpoints looking for breaks at certain "chain
15087 # operators" (. : && || + etc) which often occur repeatedly in a long
15088 # statement. If we see a break at any one, break at all similar tokens
15089 # within the same container.
15091 my ( $self, $ri_left, $ri_right ) = @_;
15093 my %saw_chain_type;
15094 my %left_chain_type;
15095 my %right_chain_type;
15096 my %interior_chain_type;
15097 my $nmax = @{$ri_right} - 1;
15099 # scan the left and right end tokens of all lines
15101 for my $n ( 0 .. $nmax ) {
15102 my $il = $ri_left->[$n];
15103 my $ir = $ri_right->[$n];
15104 my $typel = $types_to_go[$il];
15105 my $typer = $types_to_go[$ir];
15106 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
15107 $typer = '+' if ( $typer eq '-' );
15108 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
15109 $typer = '*' if ( $typer eq '/' );
15110 my $tokenl = $tokens_to_go[$il];
15111 my $tokenr = $tokens_to_go[$ir];
15113 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
15114 next if ( $typel eq '?' );
15115 push @{ $left_chain_type{$typel} }, $il;
15116 $saw_chain_type{$typel} = 1;
15119 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
15120 next if ( $typer eq '?' );
15121 push @{ $right_chain_type{$typer} }, $ir;
15122 $saw_chain_type{$typer} = 1;
15126 return unless $count;
15128 # now look for any interior tokens of the same types
15130 for my $n ( 0 .. $nmax ) {
15131 my $il = $ri_left->[$n];
15132 my $ir = $ri_right->[$n];
15133 foreach my $i ( $il + 1 .. $ir - 1 ) {
15134 my $type = $types_to_go[$i];
15135 $type = '+' if ( $type eq '-' );
15136 $type = '*' if ( $type eq '/' );
15137 if ( $saw_chain_type{$type} ) {
15138 push @{ $interior_chain_type{$type} }, $i;
15143 return unless $count;
15145 # now make a list of all new break points
15148 # loop over all chain types
15149 foreach my $type ( keys %saw_chain_type ) {
15151 # quit if just ONE continuation line with leading . For example--
15152 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
15154 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
15156 # loop over all interior chain tokens
15157 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
15159 # loop over all left end tokens of same type
15160 if ( $left_chain_type{$type} ) {
15161 next if $nobreak_to_go[ $itest - 1 ];
15162 foreach my $i ( @{ $left_chain_type{$type} } ) {
15163 next unless $self->in_same_container_i( $i, $itest );
15164 push @insert_list, $itest - 1;
15166 # Break at matching ? if this : is at a different level.
15167 # For example, the ? before $THRf_DEAD in the following
15168 # should get a break if its : gets a break.
15171 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
15172 # : ( $_ & 4 ) ? $THRf_R_DETACHED
15173 # : $THRf_R_JOINABLE;
15175 && $levels_to_go[$i] != $levels_to_go[$itest] )
15177 my $i_question = $mate_index_to_go[$itest];
15178 if ( $i_question > 0 ) {
15179 push @insert_list, $i_question - 1;
15186 # loop over all right end tokens of same type
15187 if ( $right_chain_type{$type} ) {
15188 next if $nobreak_to_go[$itest];
15189 foreach my $i ( @{ $right_chain_type{$type} } ) {
15190 next unless $self->in_same_container_i( $i, $itest );
15191 push @insert_list, $itest;
15193 # break at matching ? if this : is at a different level
15195 && $levels_to_go[$i] != $levels_to_go[$itest] )
15197 my $i_question = $mate_index_to_go[$itest];
15198 if ( $i_question >= 0 ) {
15199 push @insert_list, $i_question;
15208 # insert any new break points
15209 if (@insert_list) {
15210 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15215 sub insert_additional_breaks {
15217 # this routine will add line breaks at requested locations after
15218 # sub break_long_lines has made preliminary breaks.
15220 my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
15223 my $line_number = 0;
15224 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
15226 next if ( $nobreak_to_go[$i_break_left] );
15228 $i_f = $ri_first->[$line_number];
15229 $i_l = $ri_last->[$line_number];
15230 while ( $i_break_left >= $i_l ) {
15233 # shouldn't happen unless caller passes bad indexes
15234 if ( $line_number >= @{$ri_last} ) {
15237 Non-fatal program bug: couldn't set break at $i_break_left
15242 $i_f = $ri_first->[$line_number];
15243 $i_l = $ri_last->[$line_number];
15246 # Do not leave a blank at the end of a line; back up if necessary
15247 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
15249 my $i_break_right = $inext_to_go[$i_break_left];
15250 if ( $i_break_left >= $i_f
15251 && $i_break_left < $i_l
15252 && $i_break_right > $i_f
15253 && $i_break_right <= $i_l )
15255 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
15256 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
15262 { ## begin closure in_same_container_i
15263 my $ris_break_token;
15264 my $ris_comma_token;
15268 # all cases break on seeing commas at same level
15271 @{$ris_comma_token}{@q} = (1) x scalar(@q);
15273 # Non-ternary text also breaks on seeing any of qw(? : || or )
15274 # Example: we would not want to break at any of these .'s
15275 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
15276 push @q, qw( or || ? : );
15277 @{$ris_break_token}{@q} = (1) x scalar(@q);
15280 sub in_same_container_i {
15282 # Check to see if tokens at i1 and i2 are in the same container, and
15283 # not separated by certain characters: => , ? : || or
15284 # This is an interface between the _to_go arrays to the rLL array
15285 my ( $self, $i1, $i2 ) = @_;
15288 my $parent_seqno_1 = $parent_seqno_to_go[$i1];
15289 return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
15291 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
15292 my $K1 = $K_to_go[$i1];
15293 my $K2 = $K_to_go[$i2];
15294 my $rLL = $self->[_rLL_];
15296 my $depth_1 = $nesting_depth_to_go[$i1];
15297 return if ( $depth_1 < 0 );
15299 # Shouldn't happen since i1 and i2 have same parent:
15300 return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
15302 # Select character set to scan for
15303 my $type_1 = $types_to_go[$i1];
15304 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
15306 # Fast preliminary loop to verify that tokens are in the same container
15309 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
15310 last if !defined($KK);
15311 last if ( $KK >= $K2 );
15312 my $ii = $i1 + $KK - $K1;
15313 my $depth_i = $nesting_depth_to_go[$ii];
15314 return if ( $depth_i < $depth_1 );
15315 next if ( $depth_i > $depth_1 );
15316 if ( $type_1 ne ':' ) {
15317 my $tok_i = $tokens_to_go[$ii];
15318 return if ( $tok_i eq '?' || $tok_i eq ':' );
15322 # Slow loop checking for certain characters
15324 #-----------------------------------------------------
15325 # This is potentially a slow routine and not critical.
15326 # For safety just give up for large differences.
15327 # See test file 'infinite_loop.txt'
15328 #-----------------------------------------------------
15329 return if ( $i2 - $i1 > 200 );
15331 foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
15333 my $depth_i = $nesting_depth_to_go[$ii];
15334 next if ( $depth_i > $depth_1 );
15335 return if ( $depth_i < $depth_1 );
15336 my $tok_i = $tokens_to_go[$ii];
15337 return if ( $rbreak->{$tok_i} );
15341 } ## end closure in_same_container_i
15345 # Look for assignment operators that could use a breakpoint.
15346 # For example, in the following snippet
15348 # $HOME = $ENV{HOME}
15351 # || die "no home directory for user $<";
15353 # we could break at the = to get this, which is a little nicer:
15358 # || die "no home directory for user $<";
15360 # The logic here follows the logic in set_logical_padding, which
15361 # will add the padding in the second line to improve alignment.
15363 my ( $self, $ri_left, $ri_right ) = @_;
15364 my $nmax = @{$ri_right} - 1;
15365 return unless ( $nmax >= 2 );
15367 # scan the left ends of first two lines
15370 for my $n ( 1 .. 2 ) {
15371 my $il = $ri_left->[$n];
15372 my $typel = $types_to_go[$il];
15373 my $tokenl = $tokens_to_go[$il];
15375 my $has_leading_op = ( $tokenl =~ /^\w/ )
15376 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
15377 : $is_chain_operator{$typel}; # and, or
15378 return unless ($has_leading_op);
15381 unless ( $tokenl eq $tokbeg
15382 && $nesting_depth_to_go[$il] eq $depth_beg );
15385 $depth_beg = $nesting_depth_to_go[$il];
15388 # now look for any interior tokens of the same types
15389 my $il = $ri_left->[0];
15390 my $ir = $ri_right->[0];
15392 # now make a list of all new break points
15394 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
15395 my $type = $types_to_go[$i];
15396 if ( $is_assignment{$type}
15397 && $nesting_depth_to_go[$i] eq $depth_beg )
15399 if ( $want_break_before{$type} ) {
15400 push @insert_list, $i - 1;
15403 push @insert_list, $i;
15408 # Break after a 'return' followed by a chain of operators
15409 # return ( $^O !~ /win32|dos/i )
15410 # && ( $^O ne 'VMS' )
15411 # && ( $^O ne 'OS2' )
15412 # && ( $^O ne 'MacOS' );
15415 # ( $^O !~ /win32|dos/i )
15416 # && ( $^O ne 'VMS' )
15417 # && ( $^O ne 'OS2' )
15418 # && ( $^O ne 'MacOS' );
15420 if ( $types_to_go[$i] eq 'k'
15421 && $tokens_to_go[$i] eq 'return'
15423 && $nesting_depth_to_go[$i] eq $depth_beg )
15425 push @insert_list, $i;
15428 return unless (@insert_list);
15430 # One final check...
15431 # scan second and third lines and be sure there are no assignments
15432 # we want to avoid breaking at an = to make something like this:
15434 # $html_icons{"$type-$state"}
15435 # or $icon = $html_icons{$type}
15436 # or $icon = $html_icons{$state} )
15437 for my $n ( 1 .. 2 ) {
15438 my $il = $ri_left->[$n];
15439 my $ir = $ri_right->[$n];
15440 foreach my $i ( $il + 1 .. $ir ) {
15441 my $type = $types_to_go[$i];
15443 if ( $is_assignment{$type}
15444 && $nesting_depth_to_go[$i] eq $depth_beg );
15448 # ok, insert any new break point
15449 if (@insert_list) {
15450 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15455 { ## begin closure recombine_breakpoints
15457 # This routine is called once per batch to see if it would be better
15458 # to combine some of the lines into which the batch has been broken.
15470 @is_amp_amp{@q} = (1) x scalar(@q);
15473 @is_ternary{@q} = (1) x scalar(@q);
15475 @q = qw( + - * / );
15476 @is_math_op{@q} = (1) x scalar(@q);
15479 @is_plus_minus{@q} = (1) x scalar(@q);
15482 @is_mult_div{@q} = (1) x scalar(@q);
15485 sub Debug_dump_breakpoints {
15487 # Debug routine to dump current breakpoints...not normally called
15488 # We are given indexes to the current lines:
15489 # $ri_beg = ref to array of BEGinning indexes of each line
15490 # $ri_end = ref to array of ENDing indexes of each line
15491 my ( $self, $ri_beg, $ri_end, $msg ) = @_;
15492 print STDERR "----Dumping breakpoints from: $msg----\n";
15493 for my $n ( 0 .. @{$ri_end} - 1 ) {
15494 my $ibeg = $ri_beg->[$n];
15495 my $iend = $ri_end->[$n];
15497 foreach my $i ( $ibeg .. $iend ) {
15498 $text .= $tokens_to_go[$i];
15500 print STDERR "$n ($ibeg:$iend) $text\n";
15502 print STDERR "----\n";
15506 sub delete_one_line_semicolons {
15508 my ( $self, $ri_beg, $ri_end ) = @_;
15509 my $rLL = $self->[_rLL_];
15510 my $K_opening_container = $self->[_K_opening_container_];
15512 # Walk down the lines of this batch and delete any semicolons
15513 # terminating one-line blocks;
15514 my $nmax = @{$ri_end} - 1;
15516 foreach my $n ( 0 .. $nmax ) {
15517 my $i_beg = $ri_beg->[$n];
15518 my $i_e = $ri_end->[$n];
15519 my $K_beg = $K_to_go[$i_beg];
15520 my $K_e = $K_to_go[$i_e];
15522 my $type_end = $rLL->[$K_end]->[_TYPE_];
15523 if ( $type_end eq '#' ) {
15524 $K_end = $self->K_previous_nonblank($K_end);
15525 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
15528 # we are looking for a line ending in closing brace
15530 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
15532 # ...and preceded by a semicolon on the same line
15533 my $K_semicolon = $self->K_previous_nonblank($K_end);
15534 next unless defined($K_semicolon);
15535 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
15536 next if ( $i_semicolon <= $i_beg );
15537 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
15539 # Safety check - shouldn't happen - not critical
15540 # This is not worth throwing a Fault, except in DEVEL_MODE
15541 if ( $types_to_go[$i_semicolon] ne ';' ) {
15543 && Fault("unexpected type looking for semicolon");
15547 # ... with the corresponding opening brace on the same line
15548 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
15549 my $K_opening = $K_opening_container->{$type_sequence};
15550 next unless ( defined($K_opening) );
15551 my $i_opening = $i_beg + ( $K_opening - $K_beg );
15552 next if ( $i_opening < $i_beg );
15554 # ... and only one semicolon between these braces
15555 my $semicolon_count = 0;
15556 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
15557 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
15558 $semicolon_count++;
15562 next if ($semicolon_count);
15564 # ...ok, then make the semicolon invisible
15565 my $len = $token_lengths_to_go[$i_semicolon];
15566 $tokens_to_go[$i_semicolon] = "";
15567 $token_lengths_to_go[$i_semicolon] = 0;
15568 $rLL->[$K_semicolon]->[_TOKEN_] = "";
15569 $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
15570 foreach ( $i_semicolon .. $max_index_to_go ) {
15571 $summed_lengths_to_go[ $_ + 1 ] -= $len;
15577 use constant DEBUG_RECOMBINE => 0;
15579 sub recombine_breakpoints {
15581 # We are given indexes to the current lines:
15582 # $ri_beg = ref to array of BEGinning indexes of each line
15583 # $ri_end = ref to array of ENDing indexes of each line
15584 my ( $self, $ri_beg, $ri_end ) = @_;
15586 # sub break_long_lines is very liberal in setting line breaks
15587 # for long lines, always setting breaks at good breakpoints, even
15588 # when that creates small lines. Sometimes small line fragments
15589 # are produced which would look better if they were combined.
15590 # That's the task of this routine.
15592 # do nothing under extreme stress
15593 return if ( $stress_level_alpha < 1 && !DEVEL_MODE );
15595 my $rK_weld_right = $self->[_rK_weld_right_];
15596 my $rK_weld_left = $self->[_rK_weld_left_];
15598 my $nmax = @{$ri_end} - 1;
15599 return if ( $nmax <= 0 );
15601 my $nmax_start = $nmax;
15603 # Make a list of all good joining tokens between the lines
15607 # Break the total batch sub-sections with lengths short enough to
15609 my $rsections = [];
15612 my $nmax_section = 0;
15613 foreach my $nn ( 1 .. $nmax ) {
15614 my $ibeg_1 = $ri_beg->[ $nn - 1 ];
15615 my $iend_1 = $ri_end->[ $nn - 1 ];
15616 my $iend_2 = $ri_end->[$nn];
15617 my $ibeg_2 = $ri_beg->[$nn];
15619 # Define the joint variable
15620 my ( $itok, $itokp, $itokm );
15621 foreach my $itest ( $iend_1, $ibeg_2 ) {
15622 my $type = $types_to_go[$itest];
15623 if ( $is_math_op{$type}
15624 || $is_amp_amp{$type}
15625 || $is_assignment{$type}
15631 $joint[$nn] = [$itok];
15633 # Update the section list
15634 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15638 # The number 5 here is an arbitrary small number intended
15639 # to keep most small matches in one sub-section.
15640 || ( defined($nend) && ( $nn < 5 || $nmax - $nn < 5 ) )
15646 if ( defined($nend) ) {
15647 push @{$rsections}, [ $nbeg, $nend ];
15648 my $num = $nend - $nbeg;
15649 if ( $num > $nmax_section ) { $nmax_section = $num }
15656 if ( defined($nend) ) {
15657 push @{$rsections}, [ $nbeg, $nend ];
15658 my $num = $nend - $nbeg;
15659 if ( $num > $nmax_section ) { $nmax_section = $num }
15662 my $num_sections = @{$rsections};
15664 # This is potentially an O(n-squared) loop, but not critical, so we can
15665 # put a finite limit on the total number of iterations. This is
15666 # suggested by issue c118, which pushed about 5.e5 lines through here
15667 # and caused an excessive run time.
15669 # Three lines of defence have been put in place to prevent excessive
15671 # 1. do nothing if formatting under stress (c118 was under stress)
15672 # 2. break into small sub-sections to decrease the maximum n-squared.
15673 # 3. put a finite limit on the number of iterations.
15675 # Testing shows that most batches only require one or two iterations.
15676 # A very large batch which is broken into sub-sections can require one
15677 # iteration per section. This suggests the limit here, which allows
15678 # up to 10 iterations plus one pass per sub-section.
15681 10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
15683 if ( DEBUG_RECOMBINE > 1 ) {
15685 print STDERR "-----\n$num_sections sections found for nmax=$nmax\n";
15686 foreach my $sect ( @{$rsections} ) {
15687 my ( $nbeg, $nend ) = @{$sect};
15688 my $num = $nend - $nbeg;
15689 if ( $num > $max ) { $max = $num }
15690 print STDERR "$nbeg $nend\n";
15692 print STDERR "max size=$max of $nmax lines\n";
15695 # Loop over all sub-sections. Note that we have to work backwards
15696 # from the end of the batch since the sections use original line
15697 # numbers, and the line numbers change as we go.
15698 while ( my $section = pop @{$rsections} ) {
15699 my ( $nbeg, $nend ) = @{$section};
15701 # number of ending lines to leave untouched in this pass
15702 $nmax = @{$ri_end} - 1;
15703 my $num_freeze = $nmax - $nend;
15705 my $more_to_do = 1;
15707 # We keep looping over all of the lines of this batch
15708 # until there are no more possible recombinations
15709 my $nmax_last = $nmax + 1;
15712 while ($more_to_do) {
15714 # Safety check for excess total iterations
15716 if ( $it_count > $it_count_max ) {
15722 my $nmax = @{$ri_end} - 1;
15724 # Safety check for infinite loop: the line count must decrease
15725 unless ( $nmax < $nmax_last ) {
15727 # Shouldn't happen because splice below decreases nmax on
15728 # each iteration. An error can only be due to a recent
15729 # programming change. We better stop here.
15732 "Program bug-infinite loop in recombine breakpoints\n"
15738 $nmax_last = $nmax;
15740 my $skip_Section_3;
15741 my $leading_amp_count = 0;
15742 my $this_line_is_semicolon_terminated;
15744 # loop over all remaining lines in this batch
15745 my $nstop = $nmax - $num_freeze;
15746 for my $iter ( $nbeg + 1 .. $nstop ) {
15748 # alternating sweep direction gives symmetric results
15749 # for recombining lines which exceed the line length
15750 # such as eval {{{{.... }}}}
15752 if ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
15753 else { $n = $iter }
15755 #----------------------------------------------------------
15756 # If we join the current pair of lines,
15757 # line $n-1 will become the left part of the joined line
15758 # line $n will become the right part of the joined line
15760 # Here are Indexes of the endpoint tokens of the two lines:
15762 # -----line $n-1--- | -----line $n-----
15763 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
15766 # We want to decide if we should remove the line break
15767 # between the tokens at $iend_1 and $ibeg_2
15769 # We will apply a number of ad-hoc tests to see if joining
15770 # here will look ok. The code will just issue a 'next'
15771 # command if the join doesn't look good. If we get through
15772 # the gauntlet of tests, the lines will be recombined.
15773 #----------------------------------------------------------
15775 # beginning and ending tokens of the lines we are working on
15776 my $ibeg_1 = $ri_beg->[ $n - 1 ];
15777 my $iend_1 = $ri_end->[ $n - 1 ];
15778 my $iend_2 = $ri_end->[$n];
15779 my $ibeg_2 = $ri_beg->[$n];
15780 my $ibeg_nmax = $ri_beg->[$nmax];
15782 # combined line cannot be too long
15784 $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15785 next if ( $excess > 0 );
15787 my $type_iend_1 = $types_to_go[$iend_1];
15788 my $type_iend_2 = $types_to_go[$iend_2];
15789 my $type_ibeg_1 = $types_to_go[$ibeg_1];
15790 my $type_ibeg_2 = $types_to_go[$ibeg_2];
15792 # terminal token of line 2 if any side comment is ignored:
15793 my $iend_2t = $iend_2;
15794 my $type_iend_2t = $type_iend_2;
15796 # some beginning indexes of other lines, which may not exist
15797 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
15798 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
15799 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
15803 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15804 # $nesting_depth_to_go[$ibeg_1] );
15806 DEBUG_RECOMBINE > 1 && do {
15808 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
15811 # If line $n is the last line, we set some flags and
15812 # do any special checks for it
15813 if ( $n == $nmax ) {
15815 # a terminal '{' should stay where it is
15816 # unless preceded by a fat comma
15817 next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
15819 if ( $type_iend_2 eq '#'
15820 && $iend_2 - $ibeg_2 >= 2
15821 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
15823 $iend_2t = $iend_2 - 2;
15824 $type_iend_2t = $types_to_go[$iend_2t];
15827 $this_line_is_semicolon_terminated =
15828 $type_iend_2t eq ';';
15831 #----------------------------------------------------------
15832 # Recombine Section 0:
15833 # Examine the special token joining this line pair, if any.
15834 # Put as many tests in this section to avoid duplicate code
15835 # and to make formatting independent of whether breaks are
15836 # to the left or right of an operator.
15837 #----------------------------------------------------------
15839 my ($itok) = @{ $joint[$n] };
15842 my $type = $types_to_go[$itok];
15844 if ( $type eq ':' ) {
15846 # do not join at a colon unless it disobeys the
15848 if ( $itok eq $iend_1 ) {
15849 next unless $want_break_before{$type};
15852 $leading_amp_count++;
15853 next if $want_break_before{$type};
15857 # handle math operators + - * /
15858 elsif ( $is_math_op{$type} ) {
15860 # Combine these lines if this line is a single
15861 # number, or if it is a short term with same
15862 # operator as the previous line. For example, in
15863 # the following code we will combine all of the
15864 # short terms $A, $B, $C, $D, $E, $F, together
15865 # instead of leaving them one per line:
15867 # $A * $B * $C * $D * $E * $F *
15868 # ( 2. * $eps * $sigma * $area ) *
15869 # ( 1. / $tcold**3 - 1. / $thot**3 );
15871 # This can be important in math-intensive code.
15875 my $itokp = min( $inext_to_go[$itok], $iend_2 );
15876 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
15877 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
15878 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
15880 # check for a number on the right
15881 if ( $types_to_go[$itokp] eq 'n' ) {
15883 # ok if nothing else on right
15884 if ( $itokp == $iend_2 ) {
15889 # look one more token to right..
15890 # okay if math operator or some termination
15892 ( ( $itokpp == $iend_2 )
15893 && $is_math_op{ $types_to_go[$itokpp]
15895 || $types_to_go[$itokpp] =~ /^[#,;]$/;
15899 # check for a number on the left
15900 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
15902 # okay if nothing else to left
15903 if ( $itokm == $ibeg_1 ) {
15907 # otherwise look one more token to left
15910 # okay if math operator, comma, or assignment
15911 $good_combo = ( $itokmm == $ibeg_1 )
15912 && ( $is_math_op{ $types_to_go[$itokmm] }
15913 || $types_to_go[$itokmm] =~ /^[,]$/
15914 || $is_assignment{ $types_to_go[$itokmm]
15919 # look for a single short token either side of the
15921 if ( !$good_combo ) {
15923 # Slight adjustment factor to make results
15924 # independent of break before or after operator
15925 # in long summed lists. (An operator and a
15926 # space make two spaces).
15927 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
15931 # numbers or id's on both sides of this joint
15932 $types_to_go[$itokp] =~ /^[in]$/
15933 && $types_to_go[$itokm] =~ /^[in]$/
15935 # one of the two lines must be short:
15938 # no more than 2 nonblank tokens right
15943 && token_sequence_length(
15946 $rOpts_short_concatenation_item_length
15949 # no more than 2 nonblank tokens left of
15954 && token_sequence_length(
15957 $rOpts_short_concatenation_item_length
15962 # keep pure terms; don't mix +- with */
15964 $is_plus_minus{$type}
15965 && ( $is_mult_div{ $types_to_go[$itokmm] }
15966 || $is_mult_div{ $types_to_go[$itokpp] }
15970 $is_mult_div{$type}
15971 && ( $is_plus_minus{ $types_to_go[$itokmm] }
15972 || $is_plus_minus{ $types_to_go[$itokpp]
15979 # it is also good to combine if we can reduce to 2
15981 if ( !$good_combo ) {
15983 # index on other line where same token would be
15986 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
15991 && $types_to_go[$iother] ne $type;
15994 next unless ($good_combo);
15998 elsif ( $is_amp_amp{$type} ) {
16002 elsif ( $is_assignment{$type} ) {
16004 } ## end assignment
16007 #----------------------------------------------------------
16008 # Recombine Section 1:
16009 # Join welded nested containers immediately
16010 #----------------------------------------------------------
16014 && ( $type_sequence_to_go[$iend_1]
16015 && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
16016 || $type_sequence_to_go[$ibeg_2]
16017 && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
16026 #----------------------------------------------------------
16027 # Recombine Section 2:
16028 # Examine token at $iend_1 (right end of first line of pair)
16029 #----------------------------------------------------------
16031 # an isolated '}' may join with a ';' terminated segment
16032 if ( $type_iend_1 eq '}' ) {
16034 # Check for cases where combining a semicolon terminated
16035 # statement with a previous isolated closing paren will
16036 # allow the combined line to be outdented. This is
16037 # generally a good move. For example, we can join up
16038 # the last two lines here:
16040 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
16041 # $size, $atime, $mtime, $ctime, $blksize, $blocks
16047 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
16048 # $size, $atime, $mtime, $ctime, $blksize, $blocks
16051 # which makes the parens line up.
16053 # Another example, from Joe Matarazzo, probably looks best
16054 # with the 'or' clause appended to the trailing paren:
16055 # $self->some_method(
16058 # ) or die "Some_method didn't work";
16060 # But we do not want to do this for something like the -lp
16061 # option where the paren is not outdentable because the
16062 # trailing clause will be far to the right.
16064 # The logic here is synchronized with the logic in sub
16065 # sub final_indentation_adjustment, which actually does
16068 $skip_Section_3 ||= $this_line_is_semicolon_terminated
16070 # only one token on last line
16071 && $ibeg_1 == $iend_1
16073 # must be structural paren
16074 && $tokens_to_go[$iend_1] eq ')'
16076 # style must allow outdenting,
16077 && !$closing_token_indentation{')'}
16079 # only leading '&&', '||', and ':' if no others seen
16080 # (but note: our count made below could be wrong
16081 # due to intervening comments)
16082 && ( $leading_amp_count == 0
16083 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
16085 # but leading colons probably line up with a
16086 # previous colon or question (count could be wrong).
16087 && $type_ibeg_2 ne ':'
16089 # only one step in depth allowed. this line must not
16090 # begin with a ')' itself.
16091 && ( $nesting_depth_to_go[$iend_1] ==
16092 $nesting_depth_to_go[$iend_2] + 1 );
16094 # YVES patch 2 of 2:
16095 # Allow cuddled eval chains, like this:
16102 # This patch works together with a patch in
16103 # setting adjusted indentation (where the closing eval
16104 # brace is outdented if possible).
16105 # The problem is that an 'eval' block has continuation
16106 # indentation and it looks better to undo it in some
16107 # cases. If we do not use this patch we would get:
16115 # The alternative, for uncuddled style, is to create
16116 # a patch in final_indentation_adjustment which undoes
16117 # the indentation of a leading line like 'or do {'.
16118 # This doesn't work well with -icb through
16120 $block_type_to_go[$iend_1] eq 'eval'
16121 && !ref( $leading_spaces_to_go[$iend_1] )
16122 && !$rOpts_indent_closing_brace
16123 && $tokens_to_go[$iend_2] eq '{'
16125 ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
16126 || ( $type_ibeg_2 eq 'k'
16127 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16128 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16132 $skip_Section_3 ||= 1;
16139 # handle '.' and '?' specially below
16140 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
16144 elsif ( $type_iend_1 eq '{' ) {
16147 # honor breaks at opening brace
16148 # Added to prevent recombining something like this:
16149 # } || eval { package main;
16150 next if $forced_breakpoint_to_go[$iend_1];
16153 # do not recombine lines with ending &&, ||,
16154 elsif ( $is_amp_amp{$type_iend_1} ) {
16155 next unless $want_break_before{$type_iend_1};
16158 # Identify and recombine a broken ?/: chain
16159 elsif ( $type_iend_1 eq '?' ) {
16161 # Do not recombine different levels
16164 $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16166 # do not recombine unless next line ends in :
16167 next unless $type_iend_2 eq ':';
16170 # for lines ending in a comma...
16171 elsif ( $type_iend_1 eq ',' ) {
16173 # Do not recombine at comma which is following the
16175 # TODO: might be best to make a special flag
16176 next if ( $old_breakpoint_to_go[$iend_1] );
16178 # An isolated '},' may join with an identifier + ';'
16179 # This is useful for the class of a 'bless' statement
16181 if ( $type_ibeg_1 eq '}'
16182 && $type_ibeg_2 eq 'i' )
16185 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16186 && ( $iend_2 == ( $ibeg_2 + 1 ) )
16187 && $this_line_is_semicolon_terminated );
16189 # override breakpoint
16190 $forced_breakpoint_to_go[$iend_1] = 0;
16196 # do not recombine after a comma unless this will
16197 # leave just 1 more line
16198 next unless ( $n + 1 >= $nmax );
16200 # do not recombine if there is a change in
16201 # indentation depth
16203 if ( $levels_to_go[$iend_1] !=
16204 $levels_to_go[$iend_2] );
16206 # do not recombine a "complex expression" after a
16207 # comma. "complex" means no parens.
16209 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16210 if ( $tokens_to_go[$ii] eq '(' ) {
16215 next if $saw_paren;
16220 elsif ( $type_iend_1 eq '(' ) {
16222 # No longer doing this
16225 elsif ( $type_iend_1 eq ')' ) {
16227 # No longer doing this
16230 # keep a terminal for-semicolon
16231 elsif ( $type_iend_1 eq 'f' ) {
16235 # if '=' at end of line ...
16236 elsif ( $is_assignment{$type_iend_1} ) {
16238 # keep break after = if it was in input stream
16239 # this helps prevent 'blinkers'
16242 $old_breakpoint_to_go[$iend_1]
16244 # don't strand an isolated '='
16245 && $iend_1 != $ibeg_1
16248 my $is_short_quote =
16249 ( $type_ibeg_2 eq 'Q'
16250 && $ibeg_2 == $iend_2
16251 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
16252 $rOpts_short_concatenation_item_length );
16254 $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
16255 && $types_to_go[$ibeg_3] eq ':' )
16258 # always join an isolated '=', a short quote, or if this
16259 # will put ?/: at start of adjacent lines
16260 if ( $ibeg_1 != $iend_1
16261 && !$is_short_quote
16268 # unless we can reduce this to two lines
16271 # or three lines, the last with a leading
16273 || ( $nmax == $n + 2
16274 && $types_to_go[$ibeg_nmax] eq ';' )
16276 # or the next line ends with a here doc
16277 || $type_iend_2 eq 'h'
16279 # or the next line ends in an open paren or
16280 # brace and the break hasn't been forced
16282 || ( !$forced_breakpoint_to_go[$iend_1]
16283 && $type_iend_2 eq '{' )
16286 # do not recombine if the two lines might align
16287 # well this is a very approximate test for this
16290 # RT#127633 - the leading tokens are not
16292 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
16294 # or they are different
16297 $types_to_go[$ibeg_3] )
16303 # Recombine if we can make two lines
16306 # -lp users often prefer this:
16307 # my $title = function($env, $env, $sysarea,
16308 # "bubba Borrower Entry");
16309 # so we will recombine if -lp is used we have
16313 && ref( $leading_spaces_to_go[$ibeg_3] )
16314 && $type_iend_2 eq ','
16319 # otherwise, scan the rhs line up to last token
16320 # for complexity. Note that we are not
16321 # counting the last token in case it is an
16324 my $depth = $nesting_depth_to_go[$ibeg_2];
16325 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
16326 if ( $nesting_depth_to_go[$i] != $depth ) {
16328 last if ( $tv > 1 );
16330 $depth = $nesting_depth_to_go[$i];
16333 # ok to recombine if no level changes before
16337 # otherwise, do not recombine if more than
16338 # two level changes.
16339 next if ( $tv > 1 );
16341 # check total complexity of the two
16342 # adjacent lines that will occur if we do
16346 ? $ri_end->[ $n + 1 ]
16348 foreach my $i ( $iend_2 .. $istop ) {
16350 $nesting_depth_to_go[$i] != $depth )
16353 last if ( $tv > 2 );
16355 $depth = $nesting_depth_to_go[$i];
16358 # do not recombine if total is more than 2
16360 next if ( $tv > 2 );
16365 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16366 $forced_breakpoint_to_go[$iend_1] = 0;
16371 elsif ( $type_iend_1 eq 'k' ) {
16373 # make major control keywords stand out
16378 #/^(last|next|redo|return)$/
16379 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16381 # but only if followed by multiple lines
16385 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16387 unless $want_break_before{ $tokens_to_go[$iend_1]
16392 #----------------------------------------------------------
16393 # Recombine Section 3:
16394 # Examine token at $ibeg_2 (left end of second line of pair)
16395 #----------------------------------------------------------
16397 # join lines identified above as capable of
16398 # causing an outdented line with leading closing paren
16399 # Note that we are skipping the rest of this section
16400 # and the rest of the loop to do the join
16401 if ($skip_Section_3) {
16402 $forced_breakpoint_to_go[$iend_1] = 0;
16407 # handle lines with leading &&, ||
16408 elsif ( $is_amp_amp{$type_ibeg_2} ) {
16410 $leading_amp_count++;
16412 # ok to recombine if it follows a ? or :
16413 # and is followed by an open paren..
16415 ( $is_ternary{$type_ibeg_1}
16416 && $tokens_to_go[$iend_2] eq '(' )
16418 # or is followed by a ? or : at same depth
16420 # We are looking for something like this. We can
16421 # recombine the && line with the line above to make the
16422 # structure more clear:
16424 # exists $G->{Attr}->{V}
16425 # && exists $G->{Attr}->{V}->{$u}
16426 # ? %{ $G->{Attr}->{V}->{$u} }
16429 # We should probably leave something like this alone:
16431 # exists $G->{Attr}->{E}
16432 # && exists $G->{Attr}->{E}->{$u}
16433 # && exists $G->{Attr}->{E}->{$u}->{$v}
16434 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16436 # so that we either have all of the &&'s (or ||'s)
16437 # on one line, as in the first example, or break at
16438 # each one as in the second example. However, it
16439 # sometimes makes things worse to check for this because
16440 # it prevents multiple recombinations. So this is not done.
16442 && $is_ternary{ $types_to_go[$ibeg_3] }
16443 && $nesting_depth_to_go[$ibeg_3] ==
16444 $nesting_depth_to_go[$ibeg_2] );
16446 # Combine a trailing && term with an || term: fix for
16447 # c060 This is rare but can happen.
16450 && $type_ibeg_2 eq '&&'
16451 && $type_ibeg_1 eq '||'
16452 && $nesting_depth_to_go[$ibeg_2] ==
16453 $nesting_depth_to_go[$ibeg_1] );
16455 next if !$ok && $want_break_before{$type_ibeg_2};
16456 $forced_breakpoint_to_go[$iend_1] = 0;
16458 # tweak the bond strength to give this joint priority
16463 # Identify and recombine a broken ?/: chain
16464 elsif ( $type_ibeg_2 eq '?' ) {
16466 # Do not recombine different levels
16467 my $lev = $levels_to_go[$ibeg_2];
16468 next if ( $lev ne $levels_to_go[$ibeg_1] );
16470 # Do not recombine a '?' if either next line or
16471 # previous line does not start with a ':'. The reasons
16472 # are that (1) no alignment of the ? will be possible
16473 # and (2) the expression is somewhat complex, so the
16474 # '?' is harder to see in the interior of the line.
16475 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
16476 my $precedes_colon =
16477 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16478 next unless ( $follows_colon || $precedes_colon );
16480 # we will always combining a ? line following a : line
16481 if ( !$follows_colon ) {
16483 # ...otherwise recombine only if it looks like a
16484 # chain. we will just look at a few nearby lines
16485 # to see if this looks like a chain.
16486 my $local_count = 0;
16488 my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 )
16492 && $types_to_go[$ii] eq ':'
16493 && $levels_to_go[$ii] == $lev;
16495 next unless ( $local_count > 1 );
16497 $forced_breakpoint_to_go[$iend_1] = 0;
16500 # do not recombine lines with leading '.'
16501 elsif ( $type_ibeg_2 eq '.' ) {
16502 my $i_next_nonblank =
16503 min( $inext_to_go[$ibeg_2], $iend_2 );
16507 # ... unless there is just one and we can reduce
16508 # this to two lines if we do. For example, this
16512 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16514 # looks better than this:
16515 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16516 # . '$args .= $pat;'
16521 && $type_ibeg_1 ne $type_ibeg_2
16524 # ... or this would strand a short quote , like this
16525 # . "some long quote"
16528 || ( $types_to_go[$i_next_nonblank] eq 'Q'
16529 && $i_next_nonblank >= $iend_2 - 1
16530 && $token_lengths_to_go[$i_next_nonblank] <
16531 $rOpts_short_concatenation_item_length )
16535 # handle leading keyword..
16536 elsif ( $type_ibeg_2 eq 'k' ) {
16538 # handle leading "or"
16539 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16542 $this_line_is_semicolon_terminated
16544 $type_ibeg_1 eq '}'
16547 # following 'if' or 'unless' or 'or'
16548 $type_ibeg_1 eq 'k'
16549 && $is_if_unless{ $tokens_to_go[$ibeg_1]
16552 # important: only combine a very simple
16553 # or statement because the step below
16554 # may have combined a trailing 'and'
16555 # with this or, and we do not want to
16556 # then combine everything together
16557 && ( $iend_2 - $ibeg_2 <= 7 )
16563 $forced_breakpoint_to_go[$iend_1] = 0
16564 unless ( $old_breakpoint_to_go[$iend_1] );
16567 # handle leading 'and' and 'xor'
16568 elsif ($tokens_to_go[$ibeg_2] eq 'and'
16569 || $tokens_to_go[$ibeg_2] eq 'xor' )
16572 # Decide if we will combine a single terminal 'and'
16573 # after an 'if' or 'unless'.
16575 # This looks best with the 'and' on the same
16576 # line as the 'if':
16579 # if $seconds and $nu < 2;
16581 # But this looks better as shown:
16584 # if !$this->{Parents}{$_}
16585 # or $this->{Parents}{$_} eq $_;
16589 $this_line_is_semicolon_terminated
16592 # following 'if' or 'unless' or 'or'
16593 $type_ibeg_1 eq 'k'
16594 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
16595 || $tokens_to_go[$ibeg_1] eq 'or' )
16600 # handle leading "if" and "unless"
16601 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16603 # Combine something like:
16605 # if ( $lang !~ /${l}$/i );
16607 # next if ( $lang !~ /${l}$/i );
16610 $this_line_is_semicolon_terminated
16612 # previous line begins with 'and' or 'or'
16613 && $type_ibeg_1 eq 'k'
16614 && $is_and_or{ $tokens_to_go[$ibeg_1] }
16619 # handle all other leading keywords
16622 # keywords look best at start of lines,
16623 # but combine things like "1 while"
16624 unless ( $is_assignment{$type_iend_1} ) {
16626 if ( ( $type_iend_1 ne 'k' )
16627 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16632 # similar treatment of && and || as above for 'and' and
16633 # 'or': NOTE: This block of code is currently bypassed
16634 # because of a previous block but is retained for possible
16636 elsif ( $is_amp_amp{$type_ibeg_2} ) {
16638 # maybe looking at something like:
16639 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16643 $this_line_is_semicolon_terminated
16645 # previous line begins with an 'if' or 'unless'
16647 && $type_ibeg_1 eq 'k'
16648 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16653 # handle line with leading = or similar
16654 elsif ( $is_assignment{$type_ibeg_2} ) {
16655 next unless ( $n == 1 || $n == $nmax );
16656 next if ( $old_breakpoint_to_go[$iend_1] );
16660 # unless we can reduce this to two lines
16663 # or three lines, the last with a leading semicolon
16664 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16666 # or the next line ends with a here doc
16667 || $type_iend_2 eq 'h'
16669 # or this is a short line ending in ;
16671 && $this_line_is_semicolon_terminated )
16673 $forced_breakpoint_to_go[$iend_1] = 0;
16676 #----------------------------------------------------------
16677 # Recombine Section 4:
16678 # Combine the lines if we arrive here and it is possible
16679 #----------------------------------------------------------
16681 # honor hard breakpoints
16682 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16684 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16686 # Require a few extra spaces before recombining lines if we are
16687 # at an old breakpoint unless this is a simple list or terminal
16688 # line. The goal is to avoid oscillating between two
16689 # quasi-stable end states. For example this snippet caused
16693 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
16697 if ( $old_breakpoint_to_go[$iend_1]
16698 && !$this_line_is_semicolon_terminated
16701 && $type_iend_2 ne ',' );
16703 # do not recombine if we would skip in indentation levels
16704 if ( $n < $nmax ) {
16705 my $if_next = $ri_beg->[ $n + 1 ];
16708 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16709 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16711 # but an isolated 'if (' is undesirable
16714 && $iend_1 - $ibeg_1 <= 2
16715 && $type_ibeg_1 eq 'k'
16716 && $tokens_to_go[$ibeg_1] eq 'if'
16717 && $tokens_to_go[$iend_1] ne '('
16723 ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
16725 # remember the pair with the greatest bond strength
16732 if ( $bs > $bs_best ) {
16739 # recombine the pair with the greatest bond strength
16741 splice @{$ri_beg}, $n_best, 1;
16742 splice @{$ri_end}, $n_best - 1, 1;
16743 splice @joint, $n_best, 1;
16745 # keep going if we are still making progress
16748 } # end iteration loop
16750 } # end loop over sections
16754 if (DEBUG_RECOMBINE) {
16755 my $nmax = @{$ri_end} - 1;
16757 "exiting recombine with $nmax lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
16761 } ## end closure recombine_breakpoints
16763 sub insert_final_ternary_breaks {
16765 my ( $self, $ri_left, $ri_right ) = @_;
16767 # Called once per batch to look for and do any final line breaks for
16768 # long ternary chains
16770 my $nmax = @{$ri_right} - 1;
16772 # scan the left and right end tokens of all lines
16774 my $i_first_colon = -1;
16775 for my $n ( 0 .. $nmax ) {
16776 my $il = $ri_left->[$n];
16777 my $ir = $ri_right->[$n];
16778 my $typel = $types_to_go[$il];
16779 my $typer = $types_to_go[$ir];
16780 return if ( $typel eq '?' );
16781 return if ( $typer eq '?' );
16782 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
16783 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16786 # For long ternary chains,
16787 # if the first : we see has its ? is in the interior
16788 # of a preceding line, then see if there are any good
16789 # breakpoints before the ?.
16790 if ( $i_first_colon > 0 ) {
16791 my $i_question = $mate_index_to_go[$i_first_colon];
16792 if ( $i_question > 0 ) {
16794 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16795 my $token = $tokens_to_go[$ii];
16796 my $type = $types_to_go[$ii];
16798 # For now, a good break is either a comma or,
16799 # in a long chain, a 'return'.
16800 # Patch for RT #126633: added the $nmax>1 check to avoid
16801 # breaking after a return for a simple ternary. For longer
16802 # chains the break after return allows vertical alignment, so
16803 # it is still done. So perltidy -wba='?' will not break
16804 # immediately after the return in the following statement:
16806 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
16807 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
16812 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
16814 && $self->in_same_container_i( $ii, $i_question )
16817 push @insert_list, $ii;
16822 # insert any new break points
16823 if (@insert_list) {
16824 $self->insert_additional_breaks( \@insert_list, $ri_left,
16832 sub insert_breaks_before_list_opening_containers {
16834 my ( $self, $ri_left, $ri_right ) = @_;
16836 # This routine is called once per batch to implement the parameters
16837 # --break-before-hash-brace, etc.
16839 # Nothing to do if none of these parameters has been set
16840 return unless %break_before_container_types;
16842 my $nmax = @{$ri_right} - 1;
16843 return unless ( $nmax >= 0 );
16845 my $rLL = $self->[_rLL_];
16847 my $rbreak_before_container_by_seqno =
16848 $self->[_rbreak_before_container_by_seqno_];
16849 my $rK_weld_left = $self->[_rK_weld_left_];
16851 # scan the ends of all lines
16853 for my $n ( 0 .. $nmax ) {
16854 my $il = $ri_left->[$n];
16855 my $ir = $ri_right->[$n];
16856 next unless ( $ir > $il );
16857 my $Kl = $K_to_go[$il];
16858 my $Kr = $K_to_go[$ir];
16860 my $type_end = $rLL->[$Kr]->[_TYPE_];
16862 # Backup before any side comment
16863 if ( $type_end eq '#' ) {
16864 $Kend = $self->K_previous_nonblank($Kr);
16865 next unless defined($Kend);
16866 $type_end = $rLL->[$Kend]->[_TYPE_];
16869 # Backup to the start of any weld; fix for b1173.
16870 if ($total_weld_count) {
16871 my $Kend_test = $rK_weld_left->{$Kend};
16872 if ( defined($Kend_test) && $Kend_test > $Kl ) {
16873 $Kend = $Kend_test;
16874 $Kend_test = $rK_weld_left->{$Kend};
16877 # Do not break if we did not back up to the start of a weld
16878 # (shouldn't happen)
16879 next if ( defined($Kend_test) );
16882 my $token = $rLL->[$Kend]->[_TOKEN_];
16883 next unless ( $is_opening_token{$token} );
16884 next unless ( $Kl < $Kend - 1 );
16886 my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
16887 next unless ( defined($seqno) );
16889 # Use the flag which was previously set
16890 next unless ( $rbreak_before_container_by_seqno->{$seqno} );
16892 # Install a break before this opening token.
16893 my $Kbreak = $self->K_previous_nonblank($Kend);
16894 my $ibreak = $Kbreak - $Kl + $il;
16895 next if ( $ibreak < $il );
16896 next if ( $nobreak_to_go[$ibreak] );
16897 push @insert_list, $ibreak;
16900 # insert any new break points
16901 if (@insert_list) {
16902 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16907 sub note_added_semicolon {
16908 my ( $self, $line_number ) = @_;
16909 $self->[_last_added_semicolon_at_] = $line_number;
16910 if ( $self->[_added_semicolon_count_] == 0 ) {
16911 $self->[_first_added_semicolon_at_] = $line_number;
16913 $self->[_added_semicolon_count_]++;
16914 write_logfile_entry("Added ';' here\n");
16918 sub note_deleted_semicolon {
16919 my ( $self, $line_number ) = @_;
16920 $self->[_last_deleted_semicolon_at_] = $line_number;
16921 if ( $self->[_deleted_semicolon_count_] == 0 ) {
16922 $self->[_first_deleted_semicolon_at_] = $line_number;
16924 $self->[_deleted_semicolon_count_]++;
16925 write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
16929 sub note_embedded_tab {
16930 my ( $self, $line_number ) = @_;
16931 $self->[_embedded_tab_count_]++;
16932 $self->[_last_embedded_tab_at_] = $line_number;
16933 if ( !$self->[_first_embedded_tab_at_] ) {
16934 $self->[_first_embedded_tab_at_] = $line_number;
16937 if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
16938 write_logfile_entry("Embedded tabs in quote or pattern\n");
16943 use constant DEBUG_CORRECT_LP => 0;
16945 sub correct_lp_indentation {
16947 # When the -lp option is used, we need to make a last pass through
16948 # each line to correct the indentation positions in case they differ
16949 # from the predictions. This is necessary because perltidy uses a
16950 # predictor/corrector method for aligning with opening parens. The
16951 # predictor is usually good, but sometimes stumbles. The corrector
16952 # tries to patch things up once the actual opening paren locations
16954 my ( $self, $ri_first, $ri_last ) = @_;
16955 my $K_opening_container = $self->[_K_opening_container_];
16956 my $K_closing_container = $self->[_K_closing_container_];
16957 my $do_not_pad = 0;
16959 # Note on flag '$do_not_pad':
16960 # We want to avoid a situation like this, where the aligner inserts
16961 # whitespace before the '=' to align it with a previous '=', because
16962 # otherwise the parens might become mis-aligned in a situation like
16963 # this, where the '=' has become aligned with the previous line,
16964 # pushing the opening '(' forward beyond where we want it.
16966 # $mkFloor::currentRoom = '';
16967 # $mkFloor::c_entry = $c->Entry(
16969 # -relief => 'sunken',
16973 # We leave it to the aligner to decide how to do this.
16975 # first remove continuation indentation if appropriate
16976 my $rLL = $self->[_rLL_];
16977 my $max_line = @{$ri_first} - 1;
16979 #---------------------------------------------------------------------------
16980 # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
16981 #---------------------------------------------------------------------------
16983 # The point is that sub 'starting_one_line_block' made one-line blocks based
16984 # on default indentation, not -lp indentation. So some of the one-line
16985 # blocks may be too long when given -lp indentation. We will fix that now
16986 # if possible, using the list of these closing block indexes.
16987 my $ri_starting_one_line_block =
16988 $self->[_this_batch_]->[_ri_starting_one_line_block_];
16989 if ( @{$ri_starting_one_line_block} ) {
16990 my @ilist = @{$ri_starting_one_line_block};
16991 my $inext = shift(@ilist);
16993 # loop over lines, checking length of each with a one-line block
16994 my ( $ibeg, $iend );
16995 foreach my $line ( 0 .. $max_line ) {
16996 $iend = $ri_last->[$line];
16997 next if ( $inext > $iend );
16998 $ibeg = $ri_first->[$line];
17000 # This is just for lines with indentation objects (c098)
17002 ref( $leading_spaces_to_go[$ibeg] )
17003 ? $self->excess_line_length( $ibeg, $iend )
17006 if ( $excess > 0 ) {
17007 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
17009 if ( $available_spaces > 0 ) {
17010 my $delete_want = min( $available_spaces, $excess );
17011 my $deleted_spaces =
17012 $self->reduce_lp_indentation( $ibeg, $delete_want );
17013 $available_spaces =
17014 $self->get_available_spaces_to_go($ibeg);
17018 # skip forward to next one-line block to check
17020 $inext = shift @ilist;
17021 next if ( $inext <= $iend );
17022 last if ( $inext > $iend );
17024 last if ( $inext <= $iend );
17028 #-------------------------------------------------------------------
17029 # PASS 2: look for and fix other problems in each line of this batch
17030 #-------------------------------------------------------------------
17032 # look at each output line ...
17033 my ( $ibeg, $iend );
17034 foreach my $line ( 0 .. $max_line ) {
17035 $ibeg = $ri_first->[$line];
17036 $iend = $ri_last->[$line];
17038 # looking at each token in this output line ...
17039 foreach my $i ( $ibeg .. $iend ) {
17041 # How many space characters to place before this token
17042 # for special alignment. Actual padding is done in the
17045 # looking for next unvisited indentation item ...
17046 my $indentation = $leading_spaces_to_go[$i];
17048 # This is just for indentation objects (c098)
17049 next unless ( ref($indentation) );
17051 # Visit each indentation object just once
17052 next if ( $indentation->get_marked() );
17055 $indentation->set_marked(1);
17057 # Skip indentation objects which do not align with container tokens
17058 my $align_seqno = $indentation->get_align_seqno();
17059 next unless ($align_seqno);
17061 # Skip a container which is entirely on this line
17062 my $Ko = $K_opening_container->{$align_seqno};
17063 my $Kc = $K_closing_container->{$align_seqno};
17064 if ( defined($Ko) && defined($Kc) ) {
17065 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
17068 if ( $line == 1 && $i == $ibeg ) {
17072 #--------------------------------------------
17073 # Now see what the error is and try to fix it
17074 #--------------------------------------------
17075 my $closing_index = $indentation->get_closed();
17076 my $predicted_pos = $indentation->get_spaces();
17078 # Find actual position:
17081 if ( $i == $ibeg ) {
17083 # Case 1: token is first character of of batch - table lookup
17084 if ( $line == 0 ) {
17086 $actual_pos = $predicted_pos;
17088 my ( $indent, $offset, $is_leading, $exists ) =
17089 get_saved_opening_indentation($align_seqno);
17090 if ( defined($indent) ) {
17092 # FIXME: should use '1' here if no space after opening
17093 # and '2' if want space; hardwired at 1 like -gnu-style
17094 $actual_pos = get_spaces($indent) + $offset + 1;
17098 # Case 2: token starts a new line - use length of previous line
17101 my $ibegm = $ri_first->[ $line - 1 ];
17102 my $iendm = $ri_last->[ $line - 1 ];
17103 $actual_pos = total_line_length( $ibegm, $iendm );
17107 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
17112 # Case 3: $i>$ibeg: token is mid-line - use length to previous token
17115 $actual_pos = total_line_length( $ibeg, $i - 1 );
17117 # for mid-line token, we must check to see if all
17118 # additional lines have continuation indentation,
17119 # and remove it if so. Otherwise, we do not get
17121 if ( $closing_index > $iend ) {
17122 my $ibeg_next = $ri_first->[ $line + 1 ];
17123 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
17124 $self->undo_lp_ci( $line, $i, $closing_index,
17125 $ri_first, $ri_last );
17130 # By how many spaces (plus or minus) would we need to increase the
17131 # indentation to get alignment with the opening token?
17132 my $move_right = $actual_pos - $predicted_pos;
17134 if (DEBUG_CORRECT_LP) {
17135 my $tok = substr( $tokens_to_go[$i], 0, 8 );
17136 my $avail = $self->get_available_spaces_to_go($ibeg);
17138 "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
17141 # nothing more to do if no error to correct (gnu2.t)
17142 if ( $move_right == 0 ) {
17143 $indentation->set_recoverable_spaces($move_right);
17147 # Get any collapsed length defined for -xlp
17148 my $collapsed_length =
17149 $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
17150 $collapsed_length = 0 unless ( defined($collapsed_length) );
17152 if (DEBUG_CORRECT_LP) {
17154 "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
17157 # if we have not seen closure for this indentation in this batch,
17158 # and do not have a collapsed length estimate, we can only pass on
17159 # a request to the vertical aligner
17160 if ( $closing_index < 0 && !$collapsed_length ) {
17161 $indentation->set_recoverable_spaces($move_right);
17165 # If necessary, look ahead to see if there is really any leading
17166 # whitespace dependent on this whitespace, and also find the
17167 # longest line using this whitespace. Since it is always safe to
17168 # move left if there are no dependents, we only need to do this if
17169 # we may have dependent nodes or need to move right.
17171 my $have_child = $indentation->get_have_child();
17172 my %saw_indentation;
17173 my $line_count = 1;
17174 $saw_indentation{$indentation} = $indentation;
17176 # How far can we move right before we hit the limit?
17177 # let $right_margen = the number of spaces that we can increase
17178 # the current indentation before hitting the maximum line length.
17179 my $right_margin = 0;
17181 if ( $have_child || $move_right > 0 ) {
17184 # include estimated collapsed length for incomplete containers
17185 my $max_length = 0;
17186 if ( $Kc > $K_to_go[$max_index_to_go] ) {
17187 $max_length = $collapsed_length + $predicted_pos;
17190 if ( $i == $ibeg ) {
17191 my $length = total_line_length( $ibeg, $iend );
17192 if ( $length > $max_length ) { $max_length = $length }
17195 # look ahead at the rest of the lines of this batch..
17196 foreach my $line_t ( $line + 1 .. $max_line ) {
17197 my $ibeg_t = $ri_first->[$line_t];
17198 my $iend_t = $ri_last->[$line_t];
17199 last if ( $closing_index <= $ibeg_t );
17201 # remember all different indentation objects
17202 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
17203 $saw_indentation{$indentation_t} = $indentation_t;
17206 # remember longest line in the group
17207 my $length_t = total_line_length( $ibeg_t, $iend_t );
17208 if ( $length_t > $max_length ) {
17209 $max_length = $length_t;
17214 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
17216 if ( $right_margin < 0 ) { $right_margin = 0 }
17219 my $first_line_comma_count =
17220 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
17221 my $comma_count = $indentation->get_comma_count();
17222 my $arrow_count = $indentation->get_arrow_count();
17224 # This is a simple approximate test for vertical alignment:
17225 # if we broke just after an opening paren, brace, bracket,
17226 # and there are 2 or more commas in the first line,
17227 # and there are no '=>'s,
17228 # then we are probably vertically aligned. We could set
17229 # an exact flag in sub break_lists, but this is good
17231 my $indentation_count = keys %saw_indentation;
17232 my $is_vertically_aligned =
17234 && $first_line_comma_count > 1
17235 && $indentation_count == 1
17236 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
17238 # Make the move if possible ..
17241 # we can always move left
17246 # incomplete container
17247 || ( $rOpts_extended_line_up_parentheses
17248 && $Kc > $K_to_go[$max_index_to_go] )
17249 || $closing_index < 0
17251 # but we should only move right if we are sure it will
17252 # not spoil vertical alignment
17253 || ( $comma_count == 0 )
17254 || ( $comma_count > 0 && !$is_vertically_aligned )
17258 ( $move_right <= $right_margin )
17262 if (DEBUG_CORRECT_LP) {
17264 "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
17267 foreach ( keys %saw_indentation ) {
17268 $saw_indentation{$_}
17269 ->permanently_decrease_available_spaces( -$move );
17273 # Otherwise, record what we want and the vertical aligner
17274 # will try to recover it.
17276 $indentation->set_recoverable_spaces($move_right);
17278 } ## end loop over tokens in a line
17279 } ## end loop over lines
17280 return $do_not_pad;
17285 # If there is a single, long parameter within parens, like this:
17287 # $self->command( "/msg "
17288 # . $infoline->chan
17289 # . " You said $1, but did you know that it's square was "
17290 # . $1 * $1 . " ?" );
17292 # we can remove the continuation indentation of the 2nd and higher lines
17293 # to achieve this effect, which is more pleasing:
17295 # $self->command("/msg "
17296 # . $infoline->chan
17297 # . " You said $1, but did you know that it's square was "
17298 # . $1 * $1 . " ?");
17300 my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
17302 my $max_line = @{$ri_first} - 1;
17304 # must be multiple lines
17305 return unless $max_line > $line_open;
17307 my $lev_start = $levels_to_go[$i_start];
17308 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
17310 # see if all additional lines in this container have continuation
17313 my $line_1 = 1 + $line_open;
17314 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
17315 my $ibeg = $ri_first->[$n];
17316 my $iend = $ri_last->[$n];
17317 if ( $ibeg eq $closing_index ) { $n--; last }
17318 return if ( $lev_start != $levels_to_go[$ibeg] );
17319 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
17320 last if ( $closing_index <= $iend );
17323 # we can reduce the indentation of all continuation lines
17324 my $continuation_line_count = $n - $line_open;
17325 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17326 (0) x ($continuation_line_count);
17327 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17328 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
17332 ###############################################
17333 # CODE SECTION 10: Code to break long statments
17334 ###############################################
17336 sub break_long_lines {
17338 #-----------------------------------------------------------
17339 # Break a batch of tokens into lines which do not exceed the
17340 # maximum line length.
17341 #-----------------------------------------------------------
17343 # Define an array of indexes for inserting newline characters to
17344 # keep the line lengths below the maximum desired length. There is
17345 # an implied break after the last token, so it need not be included.
17348 # This routine is part of series of routines which adjust line
17349 # lengths. It is only called if a statement is longer than the
17350 # maximum line length, or if a preliminary scanning located
17351 # desirable break points. Sub break_lists has already looked at
17352 # these tokens and set breakpoints (in array
17353 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17354 # after commas, after opening parens, and before closing parens).
17355 # This routine will honor these breakpoints and also add additional
17356 # breakpoints as necessary to keep the line length below the maximum
17357 # requested. It bases its decision on where the 'bond strength' is
17360 # Output: returns references to the arrays:
17363 # which contain the indexes $i of the first and last tokens on each
17366 # In addition, the array:
17367 # $forced_breakpoint_to_go[$i]
17368 # may be updated to be =1 for any index $i after which there must be
17369 # a break. This signals later routines not to undo the breakpoint.
17371 my ( $self, $saw_good_break, $rcolon_list ) = @_;
17373 # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
17376 use constant DEBUG_BREAK_LINES => 0;
17378 my @i_first = (); # the first index to output
17379 my @i_last = (); # the last index to output
17380 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
17381 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17383 $self->set_bond_strengths();
17386 my $imax = $max_index_to_go;
17387 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17388 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17389 my $i_begin = $imin; # index for starting next iteration
17391 my $leading_spaces = leading_spaces_to_go($imin);
17392 my $line_count = 0;
17393 my $last_break_strength = NO_BREAK;
17394 my $i_last_break = -1;
17395 my $max_bias = 0.001;
17396 my $tiny_bias = 0.0001;
17397 my $leading_alignment_token = "";
17398 my $leading_alignment_type = "";
17400 # see if any ?/:'s are in order
17401 my $colons_in_order = 1;
17403 foreach ( @{$rcolon_list} ) {
17404 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17408 # This is a sufficient but not necessary condition for colon chain
17409 my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
17413 #-------------------------------------------------------
17414 # BEGINNING of main loop to set continuation breakpoints
17415 # Keep iterating until we reach the end
17416 #-------------------------------------------------------
17417 while ( $i_begin <= $imax ) {
17418 my $lowest_strength = NO_BREAK;
17419 my $starting_sum = $summed_lengths_to_go[$i_begin];
17422 my $lowest_next_token = '';
17423 my $lowest_next_type = 'b';
17424 my $i_lowest_next_nonblank = -1;
17425 my $maximum_line_length =
17426 $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
17428 # Do not separate an isolated bare word from an opening paren.
17429 # Alternate Fix #2 for issue b1299. This waits as long as possible
17430 # to make the decision.
17431 if ( $types_to_go[$i_begin] eq 'i'
17432 && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
17434 my $i_next_nonblank = $inext_to_go[$i_begin];
17435 if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
17436 $bond_strength_to_go[$i_begin] = NO_BREAK;
17440 #-------------------------------------------------------
17441 # BEGINNING of inner loop to find the best next breakpoint
17442 #-------------------------------------------------------
17443 my $strength = NO_BREAK;
17444 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17445 my $type = $types_to_go[$i_test];
17446 my $token = $tokens_to_go[$i_test];
17447 my $next_type = $types_to_go[ $i_test + 1 ];
17448 my $next_token = $tokens_to_go[ $i_test + 1 ];
17449 my $i_next_nonblank = $inext_to_go[$i_test];
17450 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
17451 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17452 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17454 # adjustments to the previous bond strength may have been made, and
17455 # we must keep the bond strength of a token and its following blank
17457 my $last_strength = $strength;
17458 $strength = $bond_strength_to_go[$i_test];
17459 if ( $type eq 'b' ) { $strength = $last_strength }
17461 # reduce strength a bit to break ties at an old comma breakpoint ...
17464 $old_breakpoint_to_go[$i_test]
17466 # Patch: limited to just commas to avoid blinking states
17469 # which is a 'good' breakpoint, meaning ...
17470 # we don't want to break before it
17471 && !$want_break_before{$type}
17473 # and either we want to break before the next token
17474 # or the next token is not short (i.e. not a '*', '/' etc.)
17475 && $i_next_nonblank <= $imax
17476 && ( $want_break_before{$next_nonblank_type}
17477 || $token_lengths_to_go[$i_next_nonblank] > 2
17478 || $next_nonblank_type eq ','
17479 || $is_opening_type{$next_nonblank_type} )
17482 $strength -= $tiny_bias;
17483 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
17486 # otherwise increase strength a bit if this token would be at the
17487 # maximum line length. This is necessary to avoid blinking
17488 # in the above example when the -iob flag is added.
17492 $summed_lengths_to_go[ $i_test + 1 ] -
17494 if ( $len >= $maximum_line_length ) {
17495 $strength += $tiny_bias;
17496 DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
17500 my $must_break = 0;
17502 # Force an immediate break at certain operators
17503 # with lower level than the start of the line,
17504 # unless we've already seen a better break.
17506 #------------------------------------
17507 # Note on an issue with a preceding ?
17508 #------------------------------------
17509 # We don't include a ? in the above list, but there may
17510 # be a break at a previous ? if the line is long.
17511 # Because of this we do not want to force a break if
17512 # there is a previous ? on this line. For now the best way
17513 # to do this is to not break if we have seen a lower strength
17514 # point, which is probably a ?.
17516 # Example of unwanted breaks we are avoiding at a '.' following a ?
17517 # from pod2html using perltidy -gnu:
17519 # ? "\n<A NAME=\""
17521 # . "\">\n$text</A>\n"
17522 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
17524 ( $strength <= $lowest_strength )
17525 && ( $nesting_depth_to_go[$i_begin] >
17526 $nesting_depth_to_go[$i_next_nonblank] )
17528 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17529 || ( $next_nonblank_type eq 'k'
17530 && $next_nonblank_token =~ /^(and|or)$/ )
17534 $self->set_forced_breakpoint($i_next_nonblank);
17536 && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
17541 # Try to put a break where requested by break_lists
17542 $forced_breakpoint_to_go[$i_test]
17544 # break between ) { in a continued line so that the '{' can
17546 # See similar logic in break_lists which catches instances
17547 # where a line is just something like ') {'. We have to
17548 # be careful because the corresponding block keyword might
17549 # not be on the first line, such as 'for' here:
17553 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
17559 && ( $token eq ')' )
17560 && ( $next_nonblank_type eq '{' )
17561 && ($next_nonblank_block_type)
17562 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
17564 # RT #104427: Dont break before opening sub brace because
17565 # sub block breaks handled at higher level, unless
17566 # it looks like the preceding list is long and broken
17570 $next_nonblank_block_type =~ /$SUB_PATTERN/
17571 || $next_nonblank_block_type =~ /$ASUB_PATTERN/
17573 && ( $nesting_depth_to_go[$i_begin] ==
17574 $nesting_depth_to_go[$i_next_nonblank] )
17577 && !$rOpts_opening_brace_always_on_right
17580 # There is an implied forced break at a terminal opening brace
17581 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17585 # Forced breakpoints must sometimes be overridden, for example
17586 # because of a side comment causing a NO_BREAK. It is easier
17587 # to catch this here than when they are set.
17588 if ( $strength < NO_BREAK - 1 ) {
17589 $strength = $lowest_strength - $tiny_bias;
17592 && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
17596 # quit if a break here would put a good terminal token on
17597 # the next line and we already have a possible break
17600 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
17604 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
17606 ) > $maximum_line_length
17610 if ( $i_lowest >= 0 ) {
17611 DEBUG_BREAK_LINES && do {
17612 $Msg .= " :quit at good terminal='$next_nonblank_type'";
17618 # Avoid a break which would strand a single punctuation
17619 # token. For example, we do not want to strand a leading
17620 # '.' which is followed by a long quoted string.
17621 # But note that we do want to do this with -extrude (l=1)
17622 # so please test any changes to this code on -extrude.
17625 && ( $i_test == $i_begin )
17626 && ( $i_test < $imax )
17627 && ( $token eq $type )
17631 $summed_lengths_to_go[ $i_test + 1 ] -
17633 ) < $maximum_line_length
17637 $i_test = min( $imax, $inext_to_go[$i_test] );
17638 DEBUG_BREAK_LINES && do {
17639 $Msg .= " :redo at i=$i_test";
17644 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17647 # break at previous best break if it would have produced
17648 # a leading alignment of certain common tokens, and it
17649 # is different from the latest candidate break
17650 if ($leading_alignment_type) {
17651 DEBUG_BREAK_LINES && do {
17653 " :last at leading_alignment='$leading_alignment_type'";
17658 # Force at least one breakpoint if old code had good
17659 # break It is only called if a breakpoint is required or
17660 # desired. This will probably need some adjustments
17661 # over time. A goal is to try to be sure that, if a new
17662 # side comment is introduced into formatted text, then
17663 # the same breakpoints will occur. scbreak.t
17665 $i_test == $imax # we are at the end
17666 && !get_forced_breakpoint_count()
17667 && $saw_good_break # old line had good break
17668 && $type =~ /^[#;\{]$/ # and this line ends in
17669 # ';' or side comment
17670 && $i_last_break < 0 # and we haven't made a break
17671 && $i_lowest >= 0 # and we saw a possible break
17672 && $i_lowest < $imax - 1 # (but not just before this ;)
17673 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17677 DEBUG_BREAK_LINES && do {
17678 $Msg .= " :last at good old break\n";
17683 # Do not skip past an important break point in a short final
17684 # segment. For example, without this check we would miss the
17685 # break at the final / in the following code:
17688 # ( $tau * $mass_pellet * $q_0 *
17689 # ( 1. - exp( -$t_stop / $tau ) ) -
17690 # 4. * $pi * $factor * $k_ice *
17691 # ( $t_melt - $t_ice ) *
17694 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
17698 && $i_lowest >= 0 # and we saw a possible break
17699 && $i_lowest < $i_test
17700 && $i_test > $imax - 2
17701 && $nesting_depth_to_go[$i_begin] >
17702 $nesting_depth_to_go[$i_lowest]
17703 && $lowest_strength < $last_break_strength - .5 * WEAK
17706 # Make this break for math operators for now
17707 my $ir = $inext_to_go[$i_lowest];
17708 my $il = $iprev_to_go[$ir];
17709 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
17710 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
17712 DEBUG_BREAK_LINES && do {
17713 $Msg .= " :last-noskip_short";
17719 # Update the minimum bond strength location
17720 $lowest_strength = $strength;
17721 $i_lowest = $i_test;
17722 $lowest_next_token = $next_nonblank_token;
17723 $lowest_next_type = $next_nonblank_type;
17724 $i_lowest_next_nonblank = $i_next_nonblank;
17726 DEBUG_BREAK_LINES && do {
17727 $Msg .= " :last-must_break";
17732 # set flags to remember if a break here will produce a
17733 # leading alignment of certain common tokens
17734 if ( $line_count > 0
17736 && ( $lowest_strength - $last_break_strength <= $max_bias )
17739 my $i_last_end = $iprev_to_go[$i_begin];
17740 my $tok_beg = $tokens_to_go[$i_begin];
17741 my $type_beg = $types_to_go[$i_begin];
17744 # check for leading alignment of certain tokens
17746 $tok_beg eq $next_nonblank_token
17747 && $is_chain_operator{$tok_beg}
17748 && ( $type_beg eq 'k'
17749 || $type_beg eq $tok_beg )
17750 && $nesting_depth_to_go[$i_begin] >=
17751 $nesting_depth_to_go[$i_next_nonblank]
17754 || ( $tokens_to_go[$i_last_end] eq $token
17755 && $is_chain_operator{$token}
17756 && ( $type eq 'k' || $type eq $token )
17757 && $nesting_depth_to_go[$i_last_end] >=
17758 $nesting_depth_to_go[$i_test] )
17761 $leading_alignment_token = $next_nonblank_token;
17762 $leading_alignment_type = $next_nonblank_type;
17767 my $too_long = ( $i_test >= $imax );
17768 if ( !$too_long ) {
17771 $summed_lengths_to_go[ $i_test + 2 ] -
17773 $too_long = $next_length > $maximum_line_length;
17775 # To prevent blinkers we will avoid leaving a token exactly at
17776 # the line length limit unless it is the last token or one of
17777 # several "good" types.
17779 # The following code was a blinker with -pbp before this
17781 ## $last_nonblank_token eq '('
17782 ## && $is_indirect_object_taker{ $paren_type
17783 ## [$paren_depth] }
17784 # The issue causing the problem is that if the
17785 # term [$paren_depth] gets broken across a line then
17786 # the whitespace routine doesn't see both opening and closing
17787 # brackets and will format like '[ $paren_depth ]'. This
17788 # leads to an oscillation in length depending if we break
17789 # before the closing bracket or not.
17791 && $i_test + 1 < $imax
17792 && $next_nonblank_type ne ','
17793 && !$is_closing_type{$next_nonblank_type} )
17795 $too_long = $next_length >= $maximum_line_length;
17796 DEBUG_BREAK_LINES && do {
17797 $Msg .= " :too_long=$too_long" if ($too_long);
17802 DEBUG_BREAK_LINES && do {
17804 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
17805 my $i_testp2 = $i_test + 2;
17806 if ( $i_testp2 > $max_index_to_go + 1 ) {
17807 $i_testp2 = $max_index_to_go + 1;
17809 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
17810 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
17812 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
17815 # allow one extra terminal token after exceeding line length
17816 # if it would strand this token.
17817 if ( $rOpts_fuzzy_line_length
17819 && $i_lowest == $i_test
17820 && $token_lengths_to_go[$i_test] > 1
17821 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
17825 DEBUG_BREAK_LINES && do {
17826 $Msg .= " :do_not_strand next='$next_nonblank_type'";
17830 # we are done if...
17833 # ... no more space and we have a break
17834 $too_long && $i_lowest >= 0
17836 # ... or no more tokens
17837 || $i_test == $imax
17840 DEBUG_BREAK_LINES && do {
17842 " :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
17848 #-------------------------------------------------------
17849 # END of inner loop to find the best next breakpoint
17850 # Now decide exactly where to put the breakpoint
17851 #-------------------------------------------------------
17853 # it's always ok to break at imax if no other break was found
17854 if ( $i_lowest < 0 ) { $i_lowest = $imax }
17856 # semi-final index calculation
17857 my $i_next_nonblank = $inext_to_go[$i_lowest];
17858 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
17859 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17861 #-------------------------------------------------------
17862 # ?/: rule 1 : if a break here will separate a '?' on this
17863 # line from its closing ':', then break at the '?' instead.
17864 #-------------------------------------------------------
17865 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17866 next unless ( $tokens_to_go[$i] eq '?' );
17868 # do not break if probable sequence of ?/: statements
17869 next if ($is_colon_chain);
17871 # do not break if statement is broken by side comment
17873 if ( $tokens_to_go[$max_index_to_go] eq '#'
17874 && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
17876 # no break needed if matching : is also on the line
17878 if ( $mate_index_to_go[$i] >= 0
17879 && $mate_index_to_go[$i] <= $i_next_nonblank );
17882 if ( $want_break_before{'?'} ) { $i_lowest-- }
17886 #-------------------------------------------------------
17887 # END of inner loop to find the best next breakpoint:
17888 # Break the line after the token with index i=$i_lowest
17889 #-------------------------------------------------------
17891 # final index calculation
17892 $i_next_nonblank = $inext_to_go[$i_lowest];
17893 $next_nonblank_type = $types_to_go[$i_next_nonblank];
17894 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17898 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
17901 #-------------------------------------------------------
17902 # ?/: rule 2 : if we break at a '?', then break at its ':'
17904 # Note: this rule is also in sub break_lists to handle a break
17905 # at the start and end of a line (in case breaks are dictated
17906 # by side comments).
17907 #-------------------------------------------------------
17908 if ( $next_nonblank_type eq '?' ) {
17909 $self->set_closing_breakpoint($i_next_nonblank);
17911 elsif ( $types_to_go[$i_lowest] eq '?' ) {
17912 $self->set_closing_breakpoint($i_lowest);
17915 #-------------------------------------------------------
17916 # ?/: rule 3 : if we break at a ':' then we save
17917 # its location for further work below. We may need to go
17918 # back and break at its '?'.
17919 #-------------------------------------------------------
17920 if ( $next_nonblank_type eq ':' ) {
17921 push @i_colon_breaks, $i_next_nonblank;
17923 elsif ( $types_to_go[$i_lowest] eq ':' ) {
17924 push @i_colon_breaks, $i_lowest;
17927 # here we should set breaks for all '?'/':' pairs which are
17928 # separated by this line
17932 # save this line segment, after trimming blanks at the ends
17934 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17936 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17938 # set a forced breakpoint at a container opening, if necessary, to
17939 # signal a break at a closing container. Excepting '(' for now.
17942 $tokens_to_go[$i_lowest] eq '{'
17943 || $tokens_to_go[$i_lowest] eq '['
17945 && !$forced_breakpoint_to_go[$i_lowest]
17948 $self->set_closing_breakpoint($i_lowest);
17951 # get ready to go again
17952 $i_begin = $i_lowest + 1;
17953 $last_break_strength = $lowest_strength;
17954 $i_last_break = $i_lowest;
17955 $leading_alignment_token = "";
17956 $leading_alignment_type = "";
17957 $lowest_next_token = '';
17958 $lowest_next_type = 'b';
17960 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17964 # update indentation size
17965 if ( $i_begin <= $imax ) {
17966 $leading_spaces = leading_spaces_to_go($i_begin);
17969 "updating leading spaces to be $leading_spaces at i=$i_begin\n";
17973 #-------------------------------------------------------
17974 # END of main loop to set continuation breakpoints
17975 # Now go back and make any necessary corrections
17976 #-------------------------------------------------------
17978 #-------------------------------------------------------
17979 # ?/: rule 4 -- if we broke at a ':', then break at
17980 # corresponding '?' unless this is a chain of ?: expressions
17981 #-------------------------------------------------------
17982 if (@i_colon_breaks) {
17984 # using a simple method for deciding if we are in a ?/: chain --
17985 # this is a chain if it has multiple ?/: pairs all in order;
17987 # Note that if line starts in a ':' we count that above as a break
17988 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17990 unless ($is_chain) {
17991 my @insert_list = ();
17992 foreach (@i_colon_breaks) {
17993 my $i_question = $mate_index_to_go[$_];
17994 if ( $i_question >= 0 ) {
17995 if ( $want_break_before{'?'} ) {
17996 $i_question = $iprev_to_go[$i_question];
17999 if ( $i_question >= 0 ) {
18000 push @insert_list, $i_question;
18003 $self->insert_additional_breaks( \@insert_list, \@i_first,
18008 return ( \@i_first, \@i_last );
18011 ###########################################
18012 # CODE SECTION 11: Code to break long lists
18013 ###########################################
18015 { ## begin closure break_lists
18017 # These routines and variables are involved in finding good
18018 # places to break long lists.
18020 use constant DEBUG_BREAK_LISTS => 0;
18023 $block_type, $current_depth,
18025 $i_last_nonblank_token, $last_nonblank_token,
18026 $last_nonblank_type, $last_nonblank_block_type,
18027 $last_old_breakpoint_count, $minimum_depth,
18028 $next_nonblank_block_type, $next_nonblank_token,
18029 $next_nonblank_type, $old_breakpoint_count,
18030 $starting_breakpoint_count, $starting_depth,
18036 @breakpoint_stack, @breakpoint_undo_stack,
18037 @comma_index, @container_type,
18038 @identifier_count_stack, @index_before_arrow,
18039 @interrupted_list, @item_count_stack,
18040 @last_comma_index, @last_dot_index,
18041 @last_nonblank_type, @old_breakpoint_count_stack,
18042 @opening_structure_index_stack, @rfor_semicolon_list,
18043 @has_old_logical_breakpoints, @rand_or_list,
18044 @i_equals, @override_cab3,
18045 @type_sequence_stack,
18048 # these arrays must retain values between calls
18049 my ( @has_broken_sublist, @dont_align, @want_comma_break );
18053 my $list_stress_level;
18055 sub initialize_break_lists {
18057 @has_broken_sublist = ();
18058 @want_comma_break = ();
18060 #---------------------------------------------------
18061 # Set tolerances to prevent formatting instabilities
18062 #---------------------------------------------------
18064 # Define tolerances to use when checking if closed
18065 # containers will fit on one line. This is necessary to avoid
18066 # formatting instability. The basic tolerance is based on the
18069 # - Always allow for at least one extra space after a closing token so
18070 # that we do not strand a comma or semicolon. (oneline.t).
18072 # - Use an increased line length tolerance when -ci > -i to avoid
18073 # blinking states (case b923 and others).
18075 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
18077 # In addition, it may be necessary to use a few extra tolerance spaces
18078 # when -lp is used and/or when -xci is used. The history of this
18079 # so far is as follows:
18081 # FIX1: At least 3 characters were been found to be required for -lp
18082 # to fixes cases b1059 b1063 b1117.
18084 # FIX2: Further testing showed that we need a total of 3 extra spaces
18085 # when -lp is set for non-lists, and at least 2 spaces when -lp and
18087 # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
18088 # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
18091 # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
18092 # 'find_token_starting_list' to go back before an initial blank space.
18093 # This fixed these three cases, and allowed the tolerances to be
18094 # reduced to continue to fix all other known cases of instability.
18095 # This gives the current tolerance formulation.
18099 if ($rOpts_line_up_parentheses) {
18101 # boost tol for combination -lp -xci
18102 if ($rOpts_extended_continuation_indentation) {
18106 # boost tol for combination -lp and any -vtc > 0, but only for
18107 # non-list containers
18109 foreach ( keys %closing_vertical_tightness ) {
18111 unless ( $closing_vertical_tightness{$_} );
18112 $lp_tol_boost = 1; # Fixes B1193;
18118 # Define a level where list formatting becomes highly stressed and
18119 # needs to be simplified. Introduced for case b1262.
18120 $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
18125 # routine to define essential variables when we go 'up' to
18127 sub check_for_new_minimum_depth {
18129 if ( $depth < $minimum_depth ) {
18131 $minimum_depth = $depth;
18133 # these arrays need not retain values between calls
18134 $breakpoint_stack[$depth] = $starting_breakpoint_count;
18135 $container_type[$depth] = "";
18136 $identifier_count_stack[$depth] = 0;
18137 $index_before_arrow[$depth] = -1;
18138 $interrupted_list[$depth] = 1;
18139 $item_count_stack[$depth] = 0;
18140 $last_nonblank_type[$depth] = "";
18141 $opening_structure_index_stack[$depth] = -1;
18143 $breakpoint_undo_stack[$depth] = undef;
18144 $comma_index[$depth] = undef;
18145 $last_comma_index[$depth] = undef;
18146 $last_dot_index[$depth] = undef;
18147 $old_breakpoint_count_stack[$depth] = undef;
18148 $has_old_logical_breakpoints[$depth] = 0;
18149 $rand_or_list[$depth] = [];
18150 $rfor_semicolon_list[$depth] = [];
18151 $i_equals[$depth] = -1;
18153 # these arrays must retain values between calls
18154 if ( !defined( $has_broken_sublist[$depth] ) ) {
18155 $dont_align[$depth] = 0;
18156 $has_broken_sublist[$depth] = 0;
18157 $want_comma_break[$depth] = 0;
18163 # routine to decide which commas to break at within a container;
18165 # $bp_count = number of comma breakpoints set
18166 # $do_not_break_apart = a flag indicating if container need not
18168 sub set_comma_breakpoints {
18170 my ( $self, $dd ) = @_;
18172 my $do_not_break_apart = 0;
18174 # Do not break a list unless there are some non-line-ending commas.
18175 # This avoids getting different results with only non-essential commas,
18177 my $seqno = $type_sequence_stack[$dd];
18178 my $real_comma_count =
18179 $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
18182 if ( $item_count_stack[$dd] ) {
18184 # handle commas not in containers...
18185 if ( $dont_align[$dd] ) {
18186 $self->do_uncontained_comma_breaks($dd);
18189 # handle commas within containers...
18190 elsif ($real_comma_count) {
18191 my $fbc = get_forced_breakpoint_count();
18193 # always open comma lists not preceded by keywords,
18194 # barewords, identifiers (that is, anything that doesn't
18195 # look like a function call)
18196 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
18198 $self->set_comma_breakpoints_do(
18201 i_opening_paren => $opening_structure_index_stack[$dd],
18202 i_closing_paren => $i,
18203 item_count => $item_count_stack[$dd],
18204 identifier_count => $identifier_count_stack[$dd],
18205 rcomma_index => $comma_index[$dd],
18206 next_nonblank_type => $next_nonblank_type,
18207 list_type => $container_type[$dd],
18208 interrupted => $interrupted_list[$dd],
18209 rdo_not_break_apart => \$do_not_break_apart,
18210 must_break_open => $must_break_open,
18211 has_broken_sublist => $has_broken_sublist[$dd],
18214 $bp_count = get_forced_breakpoint_count() - $fbc;
18215 $do_not_break_apart = 0 if $must_break_open;
18218 return ( $bp_count, $do_not_break_apart );
18221 # These types are excluded at breakpoints to prevent blinking
18222 # Switched from excluded to included as part of fix for b1214
18223 ##my %is_uncontained_comma_break_excluded_type;
18224 my %is_uncontained_comma_break_included_type;
18227 ##my @q = qw< L { ( [ ? : + - =~ >;
18228 ##@is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
18230 my @q = qw< k R } ) ] Y Z U w i q Q .
18231 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
18232 @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
18235 sub do_uncontained_comma_breaks {
18237 # Handle commas not in containers...
18238 # This is a catch-all routine for commas that we
18239 # don't know what to do with because the don't fall
18240 # within containers. We will bias the bond strength
18241 # to break at commas which ended lines in the input
18242 # file. This usually works better than just trying
18243 # to put as many items on a line as possible. A
18244 # downside is that if the input file is garbage it
18245 # won't work very well. However, the user can always
18246 # prevent following the old breakpoints with the
18248 my ( $self, $dd ) = @_;
18250 my $old_comma_break_count = 0;
18251 foreach my $ii ( @{ $comma_index[$dd] } ) {
18252 if ( $old_breakpoint_to_go[$ii] ) {
18253 $old_comma_break_count++;
18254 $bond_strength_to_go[$ii] = $bias;
18256 # reduce bias magnitude to force breaks in order
18261 # Also put a break before the first comma if
18262 # (1) there was a break there in the input, and
18263 # (2) there was exactly one old break before the first comma break
18264 # (3) OLD: there are multiple old comma breaks
18265 # (3) NEW: there are one or more old comma breaks (see return example)
18266 # (4) the first comma is at the starting level ...
18267 # ... fixes cases b064 b065 b068 b210 b747
18268 # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
18269 # ... fixes b1220. If ci>0 we are in the middle of a snippet,
18270 # maybe because -boc has been forcing out previous lines.
18272 # For example, we will follow the user and break after
18273 # 'print' in this snippet:
18275 # "conformability (Not the same dimension)\n",
18276 # "\t", $have, " is ", text_unit($hu), "\n",
18277 # "\t", $want, " is ", text_unit($wu), "\n",
18280 # Another example, just one comma, where we will break after
18283 # $x * cos($a) - $y * sin($a),
18284 # $x * sin($a) + $y * cos($a);
18286 # Breaking a print statement:
18288 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
18289 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
18291 # But we will not force a break after the opening paren here
18292 # (causes a blinker):
18293 # $heap->{stream}->set_output_filter(
18294 # poe::filter::reference->new('myotherfreezer') ),
18297 my $i_first_comma = $comma_index[$dd]->[0];
18298 my $level_comma = $levels_to_go[$i_first_comma];
18299 my $ci_start = $ci_levels_to_go[0];
18301 # Here we want to use the value of ci before any -xci adjustment
18302 if ( $ci_start && $rOpts_extended_continuation_indentation ) {
18303 my $K0 = $K_to_go[0];
18304 if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
18307 && $old_breakpoint_to_go[$i_first_comma]
18308 && $level_comma == $levels_to_go[0] )
18312 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
18313 if ( $old_breakpoint_to_go[$ii] ) {
18315 last if ( $obp_count > 1 );
18317 if ( $levels_to_go[$ii] == $level_comma );
18321 # Changed rule from multiple old commas to just one here:
18322 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
18324 my $ibreak_m = $ibreak;
18325 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
18326 if ( $ibreak_m >= 0 ) {
18328 # In order to avoid blinkers we have to be fairly
18332 # Rule 1: Do not to break before an opening token
18333 # Rule 2: avoid breaking at ternary operators
18334 # (see b931, which is similar to the above print example)
18335 # Rule 3: Do not break at chain operators to fix case b1119
18336 # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
18338 # NEW Rule, replaced above rules after case b1214:
18339 # only break at one of the included types
18341 # Be sure to test any changes to these rules against runs
18342 # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
18344 my $type_m = $types_to_go[$ibreak_m];
18346 # Switched from excluded to included for b1214. If necessary
18347 # the token could also be checked if type_m eq 'k'
18348 ##if ( !$is_uncontained_comma_break_excluded_type{$type_m} ) {
18349 ##my $token_m = $tokens_to_go[$ibreak_m];
18350 if ( $is_uncontained_comma_break_included_type{$type_m} ) {
18351 $self->set_forced_breakpoint($ibreak);
18359 my %is_logical_container;
18363 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
18364 @is_logical_container{@q} = (1) x scalar(@q);
18366 # This filter will allow most tokens to skip past a section of code
18367 %quick_filter = %is_assignment;
18368 @q = qw# => . ; < > ~ #;
18370 @quick_filter{@q} = (1) x scalar(@q);
18373 sub set_for_semicolon_breakpoints {
18374 my ( $self, $dd ) = @_;
18375 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
18376 $self->set_forced_breakpoint($_);
18381 sub set_logical_breakpoints {
18382 my ( $self, $dd ) = @_;
18384 $item_count_stack[$dd] == 0
18385 && $is_logical_container{ $container_type[$dd] }
18387 || $has_old_logical_breakpoints[$dd]
18391 # Look for breaks in this order:
18394 foreach my $i ( 0 .. 3 ) {
18395 if ( $rand_or_list[$dd][$i] ) {
18396 foreach ( @{ $rand_or_list[$dd][$i] } ) {
18397 $self->set_forced_breakpoint($_);
18400 # break at any 'if' and 'unless' too
18401 foreach ( @{ $rand_or_list[$dd][4] } ) {
18402 $self->set_forced_breakpoint($_);
18404 $rand_or_list[$dd] = [];
18412 sub is_unbreakable_container {
18414 # never break a container of one of these types
18415 # because bad things can happen (map1.t)
18417 return $is_sort_map_grep{ $container_type[$dd] };
18422 my ( $self, $is_long_line ) = @_;
18424 #----------------------------------------------------------------------
18425 # This routine is called once per batch, if the batch is a list, to set
18426 # line breaks so that hierarchical structure can be displayed and so
18427 # that list items can be vertically aligned. The output of this
18428 # routine is stored in the array @forced_breakpoint_to_go, which is
18429 # used by sub 'break_long_lines' to set final breakpoints.
18430 #----------------------------------------------------------------------
18432 my $rLL = $self->[_rLL_];
18433 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
18434 my $ris_broken_container = $self->[_ris_broken_container_];
18435 my $rbreak_before_container_by_seqno =
18436 $self->[_rbreak_before_container_by_seqno_];
18438 $starting_depth = $nesting_depth_to_go[0];
18441 $current_depth = $starting_depth;
18443 $last_nonblank_token = ';';
18444 $last_nonblank_type = ';';
18445 $last_nonblank_block_type = ' ';
18446 $last_old_breakpoint_count = 0;
18447 $minimum_depth = $current_depth + 1; # forces update in check below
18448 $old_breakpoint_count = 0;
18449 $starting_breakpoint_count = get_forced_breakpoint_count();
18452 $type_sequence = '';
18454 my $total_depth_variation = 0;
18455 my $i_old_assignment_break;
18456 my $depth_last = $starting_depth;
18457 my $comma_follows_last_closing_token;
18459 check_for_new_minimum_depth($current_depth);
18461 my $want_previous_breakpoint = -1;
18463 my $saw_good_breakpoint;
18464 my $i_line_end = -1;
18465 my $i_line_start = -1;
18466 my $i_last_colon = -1;
18468 #----------------------------------------
18469 # Main loop over all tokens in this batch
18470 #----------------------------------------
18471 while ( ++$i <= $max_index_to_go ) {
18472 if ( $type ne 'b' ) {
18473 $i_last_nonblank_token = $i - 1;
18474 $last_nonblank_type = $type;
18475 $last_nonblank_token = $token;
18476 $last_nonblank_block_type = $block_type;
18477 } ## end if ( $type ne 'b' )
18478 $type = $types_to_go[$i];
18479 $block_type = $block_type_to_go[$i];
18480 $token = $tokens_to_go[$i];
18481 $type_sequence = $type_sequence_to_go[$i];
18482 my $next_type = $types_to_go[ $i + 1 ];
18483 my $next_token = $tokens_to_go[ $i + 1 ];
18484 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
18485 $next_nonblank_type = $types_to_go[$i_next_nonblank];
18486 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18487 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18489 # set break if flag was set
18490 if ( $want_previous_breakpoint >= 0 ) {
18491 $self->set_forced_breakpoint($want_previous_breakpoint);
18492 $want_previous_breakpoint = -1;
18495 $last_old_breakpoint_count = $old_breakpoint_count;
18497 # Fixed for case b1097 to not consider old breaks at highly
18498 # stressed locations, such as types 'L' and 'R'. It might be
18499 # useful to generalize this concept in the future by looking at
18500 # actual bond strengths.
18501 if ( $old_breakpoint_to_go[$i]
18503 && $next_nonblank_type ne 'R' )
18506 $i_line_start = $i_next_nonblank;
18508 $old_breakpoint_count++;
18510 # Break before certain keywords if user broke there and
18511 # this is a 'safe' break point. The idea is to retain
18512 # any preferred breaks for sequential list operations,
18513 # like a schwartzian transform.
18514 if ($rOpts_break_at_old_keyword_breakpoints) {
18516 $next_nonblank_type eq 'k'
18517 && $is_keyword_returning_list{$next_nonblank_token}
18518 && ( $type =~ /^[=\)\]\}Riw]$/
18520 && $is_keyword_returning_list{$token} )
18524 # we actually have to set this break next time through
18525 # the loop because if we are at a closing token (such
18526 # as '}') which forms a one-line block, this break might
18529 # And do not do this at an equals if the user wants
18530 # breaks before an equals (blinker cases b434 b903)
18531 unless ( $type eq '=' && $want_break_before{$type} ) {
18532 $want_previous_breakpoint = $i;
18534 } ## end if ( $next_nonblank_type...)
18535 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
18537 # Break before attributes if user broke there
18538 if ($rOpts_break_at_old_attribute_breakpoints) {
18539 if ( $next_nonblank_type eq 'A' ) {
18540 $want_previous_breakpoint = $i;
18544 # remember an = break as possible good break point
18545 if ( $is_assignment{$type} ) {
18546 $i_old_assignment_break = $i;
18548 elsif ( $is_assignment{$next_nonblank_type} ) {
18549 $i_old_assignment_break = $i_next_nonblank;
18551 } ## end if ( $old_breakpoint_to_go...)
18553 next if ( $type eq 'b' );
18554 $depth = $nesting_depth_to_go[ $i + 1 ];
18556 $total_depth_variation += abs( $depth - $depth_last );
18557 $depth_last = $depth;
18559 # safety check - be sure we always break after a comment
18560 # Shouldn't happen .. an error here probably means that the
18561 # nobreak flag did not get turned off correctly during
18563 if ( $type eq '#' ) {
18564 if ( $i != $max_index_to_go ) {
18567 Non-fatal program bug: backup logic required to break after a comment
18570 $nobreak_to_go[$i] = 0;
18571 $self->set_forced_breakpoint($i);
18572 } ## end if ( $i != $max_index_to_go)
18573 } ## end if ( $type eq '#' )
18575 # Force breakpoints at certain tokens in long lines.
18576 # Note that such breakpoints will be undone later if these tokens
18577 # are fully contained within parens on a line.
18580 # break before a keyword within a line
18584 # if one of these keywords:
18585 && $is_if_unless_while_until_for_foreach{$token}
18587 # but do not break at something like '1 while'
18588 && ( $last_nonblank_type ne 'n' || $i > 2 )
18590 # and let keywords follow a closing 'do' brace
18591 && $last_nonblank_block_type ne 'do'
18596 # or container is broken (by side-comment, etc)
18597 || ( $next_nonblank_token eq '('
18598 && $mate_index_to_go[$i_next_nonblank] < $i )
18602 $self->set_forced_breakpoint( $i - 1 );
18603 } ## end if ( $type eq 'k' && $i...)
18605 # remember locations of '||' and '&&' for possible breaks if we
18606 # decide this is a long logical expression.
18607 if ( $type eq '||' ) {
18608 push @{ $rand_or_list[$depth][2] }, $i;
18609 ++$has_old_logical_breakpoints[$depth]
18610 if ( ( $i == $i_line_start || $i == $i_line_end )
18611 && $rOpts_break_at_old_logical_breakpoints );
18612 } ## end elsif ( $type eq '||' )
18613 elsif ( $type eq '&&' ) {
18614 push @{ $rand_or_list[$depth][3] }, $i;
18615 ++$has_old_logical_breakpoints[$depth]
18616 if ( ( $i == $i_line_start || $i == $i_line_end )
18617 && $rOpts_break_at_old_logical_breakpoints );
18618 } ## end elsif ( $type eq '&&' )
18619 elsif ( $type eq 'f' ) {
18620 push @{ $rfor_semicolon_list[$depth] }, $i;
18622 elsif ( $type eq 'k' ) {
18623 if ( $token eq 'and' ) {
18624 push @{ $rand_or_list[$depth][1] }, $i;
18625 ++$has_old_logical_breakpoints[$depth]
18626 if ( ( $i == $i_line_start || $i == $i_line_end )
18627 && $rOpts_break_at_old_logical_breakpoints );
18628 } ## end if ( $token eq 'and' )
18630 # break immediately at 'or's which are probably not in a logical
18631 # block -- but we will break in logical breaks below so that
18632 # they do not add to the forced_breakpoint_count
18633 elsif ( $token eq 'or' ) {
18634 push @{ $rand_or_list[$depth][0] }, $i;
18635 ++$has_old_logical_breakpoints[$depth]
18636 if ( ( $i == $i_line_start || $i == $i_line_end )
18637 && $rOpts_break_at_old_logical_breakpoints );
18638 if ( $is_logical_container{ $container_type[$depth] } ) {
18641 if ($is_long_line) { $self->set_forced_breakpoint($i) }
18642 elsif ( ( $i == $i_line_start || $i == $i_line_end )
18643 && $rOpts_break_at_old_logical_breakpoints )
18645 $saw_good_breakpoint = 1;
18647 } ## end else [ if ( $is_logical_container...)]
18648 } ## end elsif ( $token eq 'or' )
18649 elsif ( $token eq 'if' || $token eq 'unless' ) {
18650 push @{ $rand_or_list[$depth][4] }, $i;
18651 if ( ( $i == $i_line_start || $i == $i_line_end )
18652 && $rOpts_break_at_old_logical_breakpoints )
18654 $self->set_forced_breakpoint($i);
18656 } ## end elsif ( $token eq 'if' ||...)
18657 } ## end elsif ( $type eq 'k' )
18658 elsif ( $is_assignment{$type} ) {
18659 $i_equals[$depth] = $i;
18662 if ($type_sequence) {
18664 # handle any postponed closing breakpoints
18665 if ( $is_closing_sequence_token{$token} ) {
18666 if ( $type eq ':' ) {
18667 $i_last_colon = $i;
18669 # retain break at a ':' line break
18670 if ( ( $i == $i_line_start || $i == $i_line_end )
18671 && $rOpts_break_at_old_ternary_breakpoints
18672 && $levels_to_go[$i] < $list_stress_level )
18675 $self->set_forced_breakpoint($i);
18677 # Break at a previous '=', but only if it is before
18678 # the mating '?'. Mate_index test fixes b1287.
18679 my $ieq = $i_equals[$depth];
18680 if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
18681 $self->set_forced_breakpoint(
18682 $i_equals[$depth] );
18683 $i_equals[$depth] = -1;
18685 } ## end if ( ( $i == $i_line_start...))
18686 } ## end if ( $type eq ':' )
18687 if ( has_postponed_breakpoint($type_sequence) ) {
18688 my $inc = ( $type eq ':' ) ? 0 : 1;
18689 if ( $i >= $inc ) {
18690 $self->set_forced_breakpoint( $i - $inc );
18693 } ## end if ( $is_closing_sequence_token{$token} )
18695 # set breaks at ?/: if they will get separated (and are
18696 # not a ?/: chain), or if the '?' is at the end of the
18698 elsif ( $token eq '?' ) {
18699 my $i_colon = $mate_index_to_go[$i];
18701 $i_colon <= 0 # the ':' is not in this batch
18702 || $i == 0 # this '?' is the first token of the line
18704 $max_index_to_go # or this '?' is the last token
18708 # don't break if # this has a side comment, and
18709 # don't break at a '?' if preceded by ':' on
18710 # this line of previous ?/: pair on this line.
18711 # This is an attempt to preserve a chain of ?/:
18712 # expressions (elsif2.t).
18716 || $parent_seqno_to_go[$i_last_colon] !=
18717 $parent_seqno_to_go[$i]
18719 && $tokens_to_go[$max_index_to_go] ne '#'
18722 $self->set_forced_breakpoint($i);
18724 $self->set_closing_breakpoint($i);
18725 } ## end if ( $i_colon <= 0 ||...)
18726 } ## end elsif ( $token eq '?' )
18728 elsif ( $is_opening_token{$token} ) {
18730 # do requeste -lp breaks at the OPENING token for BROKEN
18731 # blocks. NOTE: this can be done for both -lp and -xlp,
18732 # but only -xlp can really take advantage of this. So this
18733 # is currently restricted to -xlp to avoid excess changes to
18734 # existing -lp formatting.
18735 if ( $rOpts_extended_line_up_parentheses
18736 && $mate_index_to_go[$i] < 0 )
18739 $self->[_rlp_object_by_seqno_]->{$type_sequence};
18741 my $K_begin_line = $lp_object->get_K_begin_line();
18742 my $i_begin_line = $K_begin_line - $K_to_go[0];
18743 $self->set_forced_lp_break( $i_begin_line, $i );
18748 } ## end if ($type_sequence)
18750 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
18752 #------------------------------------------------------------
18753 # Handle Increasing Depth..
18755 # prepare for a new list when depth increases
18756 # token $i is a '(','{', or '['
18757 #------------------------------------------------------------
18758 # hardened against bad input syntax: depth jump must be 1 and type
18759 # must be opening..fixes c102
18760 if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
18762 $type_sequence_stack[$depth] = $type_sequence;
18763 $override_cab3[$depth] =
18764 $rOpts_comma_arrow_breakpoints == 3
18766 && $self->[_roverride_cab3_]->{$type_sequence};
18767 $breakpoint_stack[$depth] = get_forced_breakpoint_count();
18768 $breakpoint_undo_stack[$depth] =
18769 get_forced_breakpoint_undo_count();
18770 $has_broken_sublist[$depth] = 0;
18771 $identifier_count_stack[$depth] = 0;
18772 $index_before_arrow[$depth] = -1;
18773 $interrupted_list[$depth] = 0;
18774 $item_count_stack[$depth] = 0;
18775 $last_comma_index[$depth] = undef;
18776 $last_dot_index[$depth] = undef;
18777 $last_nonblank_type[$depth] = $last_nonblank_type;
18778 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
18779 $opening_structure_index_stack[$depth] = $i;
18780 $rand_or_list[$depth] = [];
18781 $rfor_semicolon_list[$depth] = [];
18782 $i_equals[$depth] = -1;
18783 $want_comma_break[$depth] = 0;
18784 $container_type[$depth] =
18787 $is_container_label_type{$last_nonblank_type}
18788 ? $last_nonblank_token
18790 $has_old_logical_breakpoints[$depth] = 0;
18792 # if line ends here then signal closing token to break
18793 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
18795 $self->set_closing_breakpoint($i);
18798 # Not all lists of values should be vertically aligned..
18799 $dont_align[$depth] =
18801 # code BLOCKS are handled at a higher level
18802 ( $block_type ne "" )
18804 # certain paren lists
18805 || ( $type eq '(' ) && (
18807 # it does not usually look good to align a list of
18808 # identifiers in a parameter list, as in:
18809 # my($var1, $var2, ...)
18810 # (This test should probably be refined, for now I'm just
18811 # testing for any keyword)
18812 ( $last_nonblank_type eq 'k' )
18814 # a trailing '(' usually indicates a non-list
18815 || ( $next_nonblank_type eq '(' )
18818 # patch to outdent opening brace of long if/for/..
18819 # statements (like this one). See similar coding in
18820 # set_continuation breaks. We have also catch it here for
18821 # short line fragments which otherwise will not go through
18822 # break_long_lines.
18826 # if we have the ')' but not its '(' in this batch..
18827 && ( $last_nonblank_token eq ')' )
18828 && $mate_index_to_go[$i_last_nonblank_token] < 0
18830 # and user wants brace to left
18831 && !$rOpts_opening_brace_always_on_right
18833 && ( $type eq '{' ) # should be true
18834 && ( $token eq '{' ) # should be true
18837 $self->set_forced_breakpoint( $i - 1 );
18838 } ## end if ( $block_type && ( ...))
18839 } ## end if ( $depth > $current_depth)
18841 #------------------------------------------------------------
18842 # Handle Decreasing Depth..
18844 # finish off any old list when depth decreases
18845 # token $i is a ')','}', or ']'
18846 #------------------------------------------------------------
18847 # hardened against bad input syntax: depth jump must be 1 and type
18848 # must be closing .. fixes c102
18849 elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
18851 check_for_new_minimum_depth($depth);
18853 $comma_follows_last_closing_token =
18854 $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
18856 # force all outer logical containers to break after we see on
18858 $has_old_logical_breakpoints[$depth] ||=
18859 $has_old_logical_breakpoints[$current_depth];
18861 # Patch to break between ') {' if the paren list is broken.
18862 # There is similar logic in break_long_lines for
18863 # non-broken lists.
18865 && $next_nonblank_block_type
18866 && $interrupted_list[$current_depth]
18867 && $next_nonblank_type eq '{'
18868 && !$rOpts_opening_brace_always_on_right )
18870 $self->set_forced_breakpoint($i);
18871 } ## end if ( $token eq ')' && ...
18873 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
18875 # set breaks at commas if necessary
18876 my ( $bp_count, $do_not_break_apart ) =
18877 $self->set_comma_breakpoints($current_depth);
18879 my $i_opening = $opening_structure_index_stack[$current_depth];
18880 my $saw_opening_structure = ( $i_opening >= 0 );
18882 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
18883 $lp_object = $self->[_rlp_object_by_seqno_]
18884 ->{ $type_sequence_to_go[$i_opening] };
18887 # this term is long if we had to break at interior commas..
18888 my $is_long_term = $bp_count > 0;
18890 # If this is a short container with one or more comma arrows,
18891 # then we will mark it as a long term to open it if requested.
18892 # $rOpts_comma_arrow_breakpoints =
18893 # 0 - open only if comma precedes closing brace
18894 # 1 - stable: except for one line blocks
18895 # 2 - try to form 1 line blocks
18897 # 4 - always open up if vt=0
18898 # 5 - stable: even for one line blocks if vt=0
18900 # PATCH: Modify the -cab flag if we are not processing a list:
18901 # We only want the -cab flag to apply to list containers, so
18902 # for non-lists we use the default and stable -cab=5 value.
18903 # Fixes case b939a.
18904 my $cab_flag = $rOpts_comma_arrow_breakpoints;
18905 if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
18909 # Ignore old breakpoints when under stress.
18910 # Fixes b1203 b1204 as well as b1197-b1200.
18911 # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
18912 # b1264 to see if this check is still required at all, and
18913 # these still require a check, but at higher level beta+3
18914 # instead of beta: b1193 b780
18915 if ( $saw_opening_structure
18917 && $levels_to_go[$i_opening] >= $list_stress_level )
18921 # Do not break hash braces under stress (fixes b1238)
18922 $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
18924 # This option fixes b1235, b1237, b1240 with old and new -lp,
18925 # but formatting is nicer with next option.
18926 ## $is_long_term ||=
18927 ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
18929 # This option fixes b1240 but not b1235, b1237 with new -lp,
18930 # but this gives better formatting than the previous option.
18931 $do_not_break_apart ||=
18932 $levels_to_go[$i_opening] > $stress_level_beta;
18935 if ( !$is_long_term
18936 && $saw_opening_structure
18937 && $is_opening_token{ $tokens_to_go[$i_opening] }
18938 && $index_before_arrow[ $depth + 1 ] > 0
18939 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
18944 || $cab_flag == 0 && $last_nonblank_token eq ','
18945 || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
18946 } ## end if ( !$is_long_term &&...)
18948 # mark term as long if the length between opening and closing
18949 # parens exceeds allowed line length
18950 if ( !$is_long_term && $saw_opening_structure ) {
18952 my $i_opening_minus =
18953 $self->find_token_starting_list($i_opening);
18956 $self->excess_line_length( $i_opening_minus, $i );
18958 # Use standard spaces for indentation of lists in -lp mode
18959 # if it gives a longer line length. This helps to avoid an
18960 # instability due to forming and breaking one-line blocks.
18961 # This fixes case b1314.
18962 my $indentation = $leading_spaces_to_go[$i_opening_minus];
18963 if ( ref($indentation)
18964 && $ris_broken_container->{$type_sequence} )
18966 my $lp_spaces = $indentation->get_spaces();
18968 $standard_spaces_to_go[$i_opening_minus];
18969 my $diff = $std_spaces - $lp_spaces;
18970 if ( $diff > 0 ) { $excess += $diff }
18973 my $tol = $length_tol;
18975 # boost tol for an -lp container
18979 && ( $rOpts_extended_continuation_indentation
18980 || !$ris_list_by_seqno->{$type_sequence} )
18983 $tol += $lp_tol_boost;
18986 # Patch to avoid blinking with -bbxi=2 and -cab=2
18987 # in which variations in -ci cause unstable formatting
18988 # in edge cases. We just always add one ci level so that
18989 # the formatting is independent of the -BBX results.
18990 # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
18991 # b1161 b1166 b1167 b1168
18992 if ( !$ci_levels_to_go[$i_opening]
18993 && $rbreak_before_container_by_seqno->{$type_sequence} )
18995 $tol += $rOpts->{'continuation-indentation'};
18998 $is_long_term = $excess + $tol > 0;
19000 } ## end if ( !$is_long_term &&...)
19002 # We've set breaks after all comma-arrows. Now we have to
19003 # undo them if this can be a one-line block
19004 # (the only breakpoints set will be due to comma-arrows)
19008 # user doesn't require breaking after all comma-arrows
19009 ( $cab_flag != 0 ) && ( $cab_flag != 4 )
19011 # and if the opening structure is in this batch
19012 && $saw_opening_structure
19014 # and either on the same old line
19016 $old_breakpoint_count_stack[$current_depth] ==
19017 $last_old_breakpoint_count
19019 # or user wants to form long blocks with arrows
19022 # if -cab=3 is overridden then use -cab=2 behavior
19023 || $cab_flag == 3 && $override_cab3[$current_depth]
19026 # and we made breakpoints between the opening and closing
19027 && ( $breakpoint_undo_stack[$current_depth] <
19028 get_forced_breakpoint_undo_count() )
19030 # and this block is short enough to fit on one line
19031 # Note: use < because need 1 more space for possible comma
19036 $self->undo_forced_breakpoint_stack(
19037 $breakpoint_undo_stack[$current_depth] );
19038 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
19040 # now see if we have any comma breakpoints left
19041 my $has_comma_breakpoints =
19042 ( $breakpoint_stack[$current_depth] !=
19043 get_forced_breakpoint_count() );
19045 # update broken-sublist flag of the outer container
19046 $has_broken_sublist[$depth] =
19047 $has_broken_sublist[$depth]
19048 || $has_broken_sublist[$current_depth]
19050 || $has_comma_breakpoints;
19052 # Having come to the closing ')', '}', or ']', now we have to decide if we
19053 # should 'open up' the structure by placing breaks at the opening and
19054 # closing containers. This is a tricky decision. Here are some of the
19055 # basic considerations:
19057 # -If this is a BLOCK container, then any breakpoints will have already
19058 # been set (and according to user preferences), so we need do nothing here.
19060 # -If we have a comma-separated list for which we can align the list items,
19061 # then we need to do so because otherwise the vertical aligner cannot
19062 # currently do the alignment.
19064 # -If this container does itself contain a container which has been broken
19065 # open, then it should be broken open to properly show the structure.
19067 # -If there is nothing to align, and no other reason to break apart,
19068 # then do not do it.
19070 # We will not break open the parens of a long but 'simple' logical expression.
19073 # This is an example of a simple logical expression and its formatting:
19075 # if ( $bigwasteofspace1 && $bigwasteofspace2
19076 # || $bigwasteofspace3 && $bigwasteofspace4 )
19078 # Most people would prefer this than the 'spacey' version:
19081 # $bigwasteofspace1 && $bigwasteofspace2
19082 # || $bigwasteofspace3 && $bigwasteofspace4
19085 # To illustrate the rules for breaking logical expressions, consider:
19089 # and ( exists $ids_excl_uc{$id_uc}
19090 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
19092 # This is on the verge of being difficult to read. The current default is to
19093 # open it up like this:
19098 # and ( exists $ids_excl_uc{$id_uc}
19099 # or grep $id_uc =~ /$_/, @ids_excl_uc )
19102 # This is a compromise which tries to avoid being too dense and to spacey.
19103 # A more spaced version would be:
19109 # exists $ids_excl_uc{$id_uc}
19110 # or grep $id_uc =~ /$_/, @ids_excl_uc
19114 # Some people might prefer the spacey version -- an option could be added. The
19115 # innermost expression contains a long block '( exists $ids_... ')'.
19117 # Here is how the logic goes: We will force a break at the 'or' that the
19118 # innermost expression contains, but we will not break apart its opening and
19119 # closing containers because (1) it contains no multi-line sub-containers itself,
19120 # and (2) there is no alignment to be gained by breaking it open like this
19123 # exists $ids_excl_uc{$id_uc}
19124 # or grep $id_uc =~ /$_/, @ids_excl_uc
19127 # (although this looks perfectly ok and might be good for long expressions). The
19128 # outer 'if' container, though, contains a broken sub-container, so it will be
19129 # broken open to avoid too much density. Also, since it contains no 'or's, there
19130 # will be a forced break at its 'and'.
19132 # set some flags telling something about this container..
19133 my $is_simple_logical_expression = 0;
19134 if ( $item_count_stack[$current_depth] == 0
19135 && $saw_opening_structure
19136 && $tokens_to_go[$i_opening] eq '('
19137 && $is_logical_container{ $container_type[$current_depth] }
19141 # This seems to be a simple logical expression with
19142 # no existing breakpoints. Set a flag to prevent
19144 if ( !$has_comma_breakpoints ) {
19145 $is_simple_logical_expression = 1;
19148 # This seems to be a simple logical expression with
19149 # breakpoints (broken sublists, for example). Break
19150 # at all 'or's and '||'s.
19152 $self->set_logical_breakpoints($current_depth);
19154 } ## end if ( $item_count_stack...)
19157 && @{ $rfor_semicolon_list[$current_depth] } )
19159 $self->set_for_semicolon_breakpoints($current_depth);
19161 # open up a long 'for' or 'foreach' container to allow
19162 # leading term alignment unless -lp is used.
19163 $has_comma_breakpoints = 1 unless ($lp_object);
19164 } ## end if ( $is_long_term && ...)
19168 # breaks for code BLOCKS are handled at a higher level
19171 # we do not need to break at the top level of an 'if'
19173 && !$is_simple_logical_expression
19175 ## modification to keep ': (' containers vertically tight;
19176 ## but probably better to let user set -vt=1 to avoid
19177 ## inconsistency with other paren types
19178 ## && ($container_type[$current_depth] ne ':')
19180 # otherwise, we require one of these reasons for breaking:
19183 # - this term has forced line breaks
19184 $has_comma_breakpoints
19186 # - the opening container is separated from this batch
19187 # for some reason (comment, blank line, code block)
19188 # - this is a non-paren container spanning multiple lines
19189 || !$saw_opening_structure
19191 # - this is a long block contained in another breakable
19193 || $is_long_term && !$self->is_in_block_by_i($i_opening)
19198 # do special -lp breaks at the CLOSING token for INTACT
19199 # blocks (because we might not do them if the block does
19202 my $K_begin_line = $lp_object->get_K_begin_line();
19203 my $i_begin_line = $K_begin_line - $K_to_go[0];
19204 $self->set_forced_lp_break( $i_begin_line, $i_opening );
19207 # break after opening structure.
19208 # note: break before closing structure will be automatic
19209 if ( $minimum_depth <= $current_depth ) {
19211 if ( $i_opening >= 0 ) {
19212 $self->set_forced_breakpoint($i_opening)
19213 unless ( $do_not_break_apart
19214 || is_unbreakable_container($current_depth) );
19217 # break at ',' of lower depth level before opening token
19218 if ( $last_comma_index[$depth] ) {
19219 $self->set_forced_breakpoint(
19220 $last_comma_index[$depth] );
19223 # break at '.' of lower depth level before opening token
19224 if ( $last_dot_index[$depth] ) {
19225 $self->set_forced_breakpoint(
19226 $last_dot_index[$depth] );
19229 # break before opening structure if preceded by another
19230 # closing structure and a comma. This is normally
19231 # done by the previous closing brace, but not
19232 # if it was a one-line block.
19233 if ( $i_opening > 2 ) {
19235 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
19240 $types_to_go[$i_prev] eq ','
19241 && ( $types_to_go[ $i_prev - 1 ] eq ')'
19242 || $types_to_go[ $i_prev - 1 ] eq '}' )
19245 $self->set_forced_breakpoint($i_prev);
19248 # also break before something like ':(' or '?('
19251 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
19253 my $token_prev = $tokens_to_go[$i_prev];
19254 if ( $want_break_before{$token_prev} ) {
19255 $self->set_forced_breakpoint($i_prev);
19257 } ## end elsif ( $types_to_go[$i_prev...])
19258 } ## end if ( $i_opening > 2 )
19259 } ## end if ( $minimum_depth <=...)
19261 # break after comma following closing structure
19262 if ( $next_type eq ',' ) {
19263 $self->set_forced_breakpoint( $i + 1 );
19266 # break before an '=' following closing structure
19268 $is_assignment{$next_nonblank_type}
19269 && ( $breakpoint_stack[$current_depth] !=
19270 get_forced_breakpoint_count() )
19273 $self->set_forced_breakpoint($i);
19274 } ## end if ( $is_assignment{$next_nonblank_type...})
19276 # break at any comma before the opening structure Added
19277 # for -lp, but seems to be good in general. It isn't
19278 # obvious how far back to look; the '5' below seems to
19279 # work well and will catch the comma in something like
19280 # push @list, myfunc( $param, $param, ..
19282 my $icomma = $last_comma_index[$depth];
19283 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
19284 unless ( $forced_breakpoint_to_go[$icomma] ) {
19285 $self->set_forced_breakpoint($icomma);
19288 } ## end logic to open up a container
19290 # Break open a logical container open if it was already open
19291 elsif ($is_simple_logical_expression
19292 && $has_old_logical_breakpoints[$current_depth] )
19294 $self->set_logical_breakpoints($current_depth);
19297 # Handle long container which does not get opened up
19298 elsif ($is_long_term) {
19300 # must set fake breakpoint to alert outer containers that
19302 set_fake_breakpoint();
19303 } ## end elsif ($is_long_term)
19305 } ## end elsif ( $depth < $current_depth)
19307 #------------------------------------------------------------
19308 # Handle this token
19309 #------------------------------------------------------------
19311 $current_depth = $depth;
19313 # most token types can skip the rest of this loop
19314 next unless ( $quick_filter{$type} );
19316 # handle comma-arrow
19317 if ( $type eq '=>' ) {
19318 next if ( $last_nonblank_type eq '=>' );
19319 next if $rOpts_break_at_old_comma_breakpoints;
19321 if ( $rOpts_comma_arrow_breakpoints == 3
19322 && !$override_cab3[$depth] );
19323 $want_comma_break[$depth] = 1;
19324 $index_before_arrow[$depth] = $i_last_nonblank_token;
19326 } ## end if ( $type eq '=>' )
19328 elsif ( $type eq '.' ) {
19329 $last_dot_index[$depth] = $i;
19332 # Turn off alignment if we are sure that this is not a list
19333 # environment. To be safe, we will do this if we see certain
19334 # non-list tokens, such as ';', and also the environment is
19335 # not a list. Note that '=' could be in any of the = operators
19336 # (lextest.t). We can't just use the reported environment
19337 # because it can be incorrect in some cases.
19338 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
19339 && !$self->is_in_list_by_i($i) )
19341 $dont_align[$depth] = 1;
19342 $want_comma_break[$depth] = 0;
19343 $index_before_arrow[$depth] = -1;
19344 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
19346 # now just handle any commas
19347 next unless ( $type eq ',' );
19349 $last_dot_index[$depth] = undef;
19350 $last_comma_index[$depth] = $i;
19352 # break here if this comma follows a '=>'
19353 # but not if there is a side comment after the comma
19354 if ( $want_comma_break[$depth] ) {
19356 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
19357 if ($rOpts_comma_arrow_breakpoints) {
19358 $want_comma_break[$depth] = 0;
19363 $self->set_forced_breakpoint($i)
19364 unless ( $next_nonblank_type eq '#' );
19366 # break before the previous token if it looks safe
19367 # Example of something that we will not try to break before:
19368 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
19369 # Also we don't want to break at a binary operator (like +):
19373 # $y - $R, -fill => 'black',
19375 my $ibreak = $index_before_arrow[$depth] - 1;
19377 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
19379 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
19380 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
19381 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
19383 # don't break pointer calls, such as the following:
19384 # File::Spec->curdir => 1,
19385 # (This is tokenized as adjacent 'w' tokens)
19386 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
19388 # And don't break before a comma, as in the following:
19389 # ( LONGER_THAN,=> 1,
19390 # EIGHTY_CHARACTERS,=> 2,
19391 # CAUSES_FORMATTING,=> 3,
19394 # This example is for -tso but should be general rule
19395 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
19396 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
19398 $self->set_forced_breakpoint($ibreak);
19400 } ## end if ( $types_to_go[$ibreak...])
19401 } ## end if ( $ibreak > 0 && $tokens_to_go...)
19403 $want_comma_break[$depth] = 0;
19404 $index_before_arrow[$depth] = -1;
19406 # handle list which mixes '=>'s and ','s:
19407 # treat any list items so far as an interrupted list
19408 $interrupted_list[$depth] = 1;
19410 } ## end if ( $want_comma_break...)
19412 # Break after all commas above starting depth...
19413 # But only if the last closing token was followed by a comma,
19414 # to avoid breaking a list operator (issue c119)
19415 if ( $depth < $starting_depth
19416 && $comma_follows_last_closing_token
19417 && !$dont_align[$depth] )
19419 $self->set_forced_breakpoint($i)
19420 unless ( $next_nonblank_type eq '#' );
19424 # add this comma to the list..
19425 my $item_count = $item_count_stack[$depth];
19426 if ( $item_count == 0 ) {
19428 # but do not form a list with no opening structure
19431 # open INFILE_COPY, ">$input_file_copy"
19432 # or die ("very long message");
19433 if ( ( $opening_structure_index_stack[$depth] < 0 )
19434 && $self->is_in_block_by_i($i) )
19436 $dont_align[$depth] = 1;
19438 } ## end if ( $item_count == 0 )
19440 $comma_index[$depth][$item_count] = $i;
19441 ++$item_count_stack[$depth];
19442 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
19443 $identifier_count_stack[$depth]++;
19445 } ## end while ( ++$i <= $max_index_to_go)
19447 #-------------------------------------------
19448 # end of loop over all tokens in this batch
19449 #-------------------------------------------
19451 # set breaks for any unfinished lists ..
19452 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
19454 $interrupted_list[$dd] = 1;
19455 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
19456 $self->set_comma_breakpoints($dd);
19457 $self->set_logical_breakpoints($dd)
19458 if ( $has_old_logical_breakpoints[$dd] );
19459 $self->set_for_semicolon_breakpoints($dd);
19461 # break open container...
19462 my $i_opening = $opening_structure_index_stack[$dd];
19463 if ( defined($i_opening) && $i_opening >= 0 ) {
19464 $self->set_forced_breakpoint($i_opening)
19466 is_unbreakable_container($dd)
19468 # Avoid a break which would place an isolated ' or "
19471 && $i_opening >= $max_index_to_go - 2
19472 && ( $token eq "'" || $token eq '"' ) )
19475 } ## end for ( my $dd = $current_depth...)
19477 # Return a flag indicating if the input file had some good breakpoints.
19478 # This flag will be used to force a break in a line shorter than the
19479 # allowed line length.
19480 if ( $has_old_logical_breakpoints[$current_depth] ) {
19481 $saw_good_breakpoint = 1;
19484 # A complex line with one break at an = has a good breakpoint.
19485 # This is not complex ($total_depth_variation=0):
19489 # This is complex ($total_depth_variation=6):
19491 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
19492 elsif ($i_old_assignment_break
19493 && $total_depth_variation > 4
19494 && $old_breakpoint_count == 1 )
19496 $saw_good_breakpoint = 1;
19497 } ## end elsif ( $i_old_assignment_break...)
19499 return $saw_good_breakpoint;
19500 } ## end sub break_lists
19501 } ## end closure break_lists
19508 # Added 'w' to fix b1172
19509 my @q = qw(k w i Z ->);
19510 @is_kwiZ{@q} = (1) x scalar(@q);
19512 # added = for b1211
19513 @q = qw<( [ { L R } ] ) = b>;
19515 @is_key_type{@q} = (1) x scalar(@q);
19518 use constant DEBUG_FIND_START => 0;
19520 sub find_token_starting_list {
19522 # When testing to see if a block will fit on one line, some
19523 # previous token(s) may also need to be on the line; particularly
19524 # if this is a sub call. So we will look back at least one
19526 my ( $self, $i_opening_paren ) = @_;
19528 # This will be the return index
19529 my $i_opening_minus = $i_opening_paren;
19531 goto RETURN if ( $i_opening_minus <= 0 );
19533 my $im1 = $i_opening_paren - 1;
19534 my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
19535 if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
19537 $type_prev_nb = $types_to_go[$iprev_nb];
19540 if ( $type_prev_nb eq ',' ) {
19542 # a previous comma is a good break point
19543 # $i_opening_minus = $i_opening_paren;
19547 $tokens_to_go[$i_opening_paren] eq '('
19549 # non-parens added here to fix case b1186
19550 || $is_kwiZ{$type_prev_nb}
19553 $i_opening_minus = $im1;
19555 # Walk back to improve length estimate...
19556 # FIX for cases b1169 b1170 b1171: start walking back
19557 # at the previous nonblank. This makes the result insensitive
19558 # to the flag --space-function-paren, and similar.
19559 # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
19560 for ( my $j = $iprev_nb ; $j >= 0 ; $j-- ) {
19561 ##last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
19562 ##last if ( $is_key_type{ $types_to_go[$j] } );
19563 if ( $is_key_type{ $types_to_go[$j] } ) {
19566 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
19569 $i_opening_minus = $j;
19571 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
19576 DEBUG_FIND_START && print <<EOM;
19577 FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
19580 return $i_opening_minus;
19583 { ## begin closure set_comma_breakpoints_do
19585 my %is_keyword_with_special_leading_term;
19589 # These keywords have prototypes which allow a special leading item
19590 # followed by a list
19592 qw(formline grep kill map printf sprintf push chmod join pack unshift);
19593 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
19596 use constant DEBUG_SPARSE => 0;
19598 sub set_comma_breakpoints_do {
19600 # Given a list with some commas, set breakpoints at some of the
19601 # commas, if necessary, to make it easy to read.
19603 my ( $self, $rinput_hash ) = @_;
19605 my $depth = $rinput_hash->{depth};
19606 my $i_opening_paren = $rinput_hash->{i_opening_paren};
19607 my $i_closing_paren = $rinput_hash->{i_closing_paren};
19608 my $item_count = $rinput_hash->{item_count};
19609 my $identifier_count = $rinput_hash->{identifier_count};
19610 my $rcomma_index = $rinput_hash->{rcomma_index};
19611 my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
19612 my $list_type = $rinput_hash->{list_type};
19613 my $interrupted = $rinput_hash->{interrupted};
19614 my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
19615 my $must_break_open = $rinput_hash->{must_break_open};
19616 my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
19618 # nothing to do if no commas seen
19619 return if ( $item_count < 1 );
19621 my $i_first_comma = $rcomma_index->[0];
19622 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
19623 my $i_last_comma = $i_true_last_comma;
19624 if ( $i_last_comma >= $max_index_to_go ) {
19625 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
19626 return if ( $item_count < 1 );
19628 my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
19630 #---------------------------------------------------------------
19631 # find lengths of all items in the list to calculate page layout
19632 #---------------------------------------------------------------
19633 my $comma_count = $item_count;
19639 my @max_length = ( 0, 0 );
19640 my $first_term_length;
19641 my $i = $i_opening_paren;
19644 foreach my $j ( 0 .. $comma_count - 1 ) {
19645 $is_odd = 1 - $is_odd;
19646 $i_prev_plus = $i + 1;
19647 $i = $rcomma_index->[$j];
19650 ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
19652 ( $types_to_go[$i_prev_plus] eq 'b' )
19655 push @i_term_begin, $i_term_begin;
19656 push @i_term_end, $i_term_end;
19657 push @i_term_comma, $i;
19659 # note: currently adding 2 to all lengths (for comma and space)
19661 2 + token_sequence_length( $i_term_begin, $i_term_end );
19662 push @item_lengths, $length;
19665 $first_term_length = $length;
19669 if ( $length > $max_length[$is_odd] ) {
19670 $max_length[$is_odd] = $length;
19675 # now we have to make a distinction between the comma count and item
19676 # count, because the item count will be one greater than the comma
19677 # count if the last item is not terminated with a comma
19679 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
19680 ? $i_last_comma + 1
19683 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
19684 ? $i_closing_paren - 2
19685 : $i_closing_paren - 1;
19686 my $i_effective_last_comma = $i_last_comma;
19688 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
19690 if ( $last_item_length > 0 ) {
19692 # add 2 to length because other lengths include a comma and a blank
19693 $last_item_length += 2;
19694 push @item_lengths, $last_item_length;
19695 push @i_term_begin, $i_b + 1;
19696 push @i_term_end, $i_e;
19697 push @i_term_comma, undef;
19699 my $i_odd = $item_count % 2;
19701 if ( $last_item_length > $max_length[$i_odd] ) {
19702 $max_length[$i_odd] = $last_item_length;
19706 $i_effective_last_comma = $i_e + 1;
19708 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
19709 $identifier_count++;
19713 #---------------------------------------------------------------
19714 # End of length calculations
19715 #---------------------------------------------------------------
19717 #---------------------------------------------------------------
19718 # Compound List Rule 1:
19719 # Break at (almost) every comma for a list containing a broken
19720 # sublist. This has higher priority than the Interrupted List
19722 #---------------------------------------------------------------
19723 if ($has_broken_sublist) {
19725 # Break at every comma except for a comma between two
19726 # simple, small terms. This prevents long vertical
19727 # columns of, say, just 0's.
19728 my $small_length = 10; # 2 + actual maximum length wanted
19730 # We'll insert a break in long runs of small terms to
19731 # allow alignment in uniform tables.
19732 my $skipped_count = 0;
19733 my $columns = table_columns_available($i_first_comma);
19734 my $fields = int( $columns / $small_length );
19735 if ( $rOpts_maximum_fields_per_table
19736 && $fields > $rOpts_maximum_fields_per_table )
19738 $fields = $rOpts_maximum_fields_per_table;
19740 my $max_skipped_count = $fields - 1;
19742 my $is_simple_last_term = 0;
19743 my $is_simple_next_term = 0;
19744 foreach my $j ( 0 .. $item_count ) {
19745 $is_simple_last_term = $is_simple_next_term;
19746 $is_simple_next_term = 0;
19747 if ( $j < $item_count
19748 && $i_term_end[$j] == $i_term_begin[$j]
19749 && $item_lengths[$j] <= $small_length )
19751 $is_simple_next_term = 1;
19754 if ( $is_simple_last_term
19755 && $is_simple_next_term
19756 && $skipped_count < $max_skipped_count )
19761 $skipped_count = 0;
19762 my $i = $i_term_comma[ $j - 1 ];
19763 last unless defined $i;
19764 $self->set_forced_breakpoint($i);
19768 # always break at the last comma if this list is
19769 # interrupted; we wouldn't want to leave a terminal '{', for
19771 if ($interrupted) {
19772 $self->set_forced_breakpoint($i_true_last_comma);
19777 #my ( $a, $b, $c ) = caller();
19778 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
19779 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
19780 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
19782 #---------------------------------------------------------------
19783 # Interrupted List Rule:
19784 # A list is forced to use old breakpoints if it was interrupted
19785 # by side comments or blank lines, or requested by user.
19786 #---------------------------------------------------------------
19787 if ( $rOpts_break_at_old_comma_breakpoints
19789 || $i_opening_paren < 0 )
19791 $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
19795 #---------------------------------------------------------------
19796 # Looks like a list of items. We have to look at it and size it up.
19797 #---------------------------------------------------------------
19799 my $opening_token = $tokens_to_go[$i_opening_paren];
19800 my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
19802 #-------------------------------------------------------------------
19803 # Return if this will fit on one line
19804 #-------------------------------------------------------------------
19806 # The -bbxi=2 parameters can add an extra hidden level of indentation;
19807 # this needs a tolerance to avoid instability. Fixes b1259, 1260.
19809 if ( $break_before_container_types{$opening_token}
19810 && $container_indentation_options{$opening_token}
19811 && $container_indentation_options{$opening_token} == 2 )
19813 $tol = $rOpts_indent_columns;
19816 my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
19818 unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
19821 #-------------------------------------------------------------------
19822 # Now we know that this block spans multiple lines; we have to set
19823 # at least one breakpoint -- real or fake -- as a signal to break
19824 # open any outer containers.
19825 #-------------------------------------------------------------------
19826 set_fake_breakpoint();
19828 # be sure we do not extend beyond the current list length
19829 if ( $i_effective_last_comma >= $max_index_to_go ) {
19830 $i_effective_last_comma = $max_index_to_go - 1;
19833 # Set a flag indicating if we need to break open to keep -lp
19834 # items aligned. This is necessary if any of the list terms
19835 # exceeds the available space after the '('.
19836 my $need_lp_break_open = $must_break_open;
19837 if ( $is_lp_formatting && !$must_break_open ) {
19838 my $columns_if_unbroken =
19839 $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
19840 - total_line_length( $i_opening_minus, $i_opening_paren );
19841 $need_lp_break_open =
19842 ( $max_length[0] > $columns_if_unbroken )
19843 || ( $max_length[1] > $columns_if_unbroken )
19844 || ( $first_term_length > $columns_if_unbroken );
19847 # Specify if the list must have an even number of fields or not.
19848 # It is generally safest to assume an even number, because the
19849 # list items might be a hash list. But if we can be sure that
19850 # it is not a hash, then we can allow an odd number for more
19852 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
19854 if ( $identifier_count >= $item_count - 1
19855 || $is_assignment{$next_nonblank_type}
19856 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
19862 # do we have a long first term which should be
19863 # left on a line by itself?
19864 my $use_separate_first_term = (
19865 $odd_or_even == 1 # only if we can use 1 field/line
19866 && $item_count > 3 # need several items
19867 && $first_term_length >
19868 2 * $max_length[0] - 2 # need long first term
19869 && $first_term_length >
19870 2 * $max_length[1] - 2 # need long first term
19873 # or do we know from the type of list that the first term should
19875 if ( !$use_separate_first_term ) {
19876 if ( $is_keyword_with_special_leading_term{$list_type} ) {
19877 $use_separate_first_term = 1;
19879 # should the container be broken open?
19880 if ( $item_count < 3 ) {
19881 if ( $i_first_comma - $i_opening_paren < 4 ) {
19882 ${$rdo_not_break_apart} = 1;
19885 elsif ($first_term_length < 20
19886 && $i_first_comma - $i_opening_paren < 4 )
19888 my $columns = table_columns_available($i_first_comma);
19889 if ( $first_term_length < $columns ) {
19890 ${$rdo_not_break_apart} = 1;
19897 if ($use_separate_first_term) {
19899 # ..set a break and update starting values
19900 $use_separate_first_term = 1;
19901 $self->set_forced_breakpoint($i_first_comma);
19902 $i_opening_paren = $i_first_comma;
19903 $i_first_comma = $rcomma_index->[1];
19905 return if $comma_count == 1;
19906 shift @item_lengths;
19907 shift @i_term_begin;
19909 shift @i_term_comma;
19912 # if not, update the metrics to include the first term
19914 if ( $first_term_length > $max_length[0] ) {
19915 $max_length[0] = $first_term_length;
19919 # Field width parameters
19920 my $pair_width = ( $max_length[0] + $max_length[1] );
19922 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
19924 # Number of free columns across the page width for laying out tables
19925 my $columns = table_columns_available($i_first_comma);
19927 # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
19928 # to break after an opening paren, then the maximum line length for the
19929 # first line could be less than the later lines. So we need to reduce
19930 # the line length. Normally, we will get a break after an opening
19931 # paren, but in some cases we might not.
19932 if ( $rOpts_variable_maximum_line_length
19933 && $tokens_to_go[$i_opening_paren] eq '('
19935 ##&& !$old_breakpoint_to_go[$i_opening_paren] ) ## in b1210 patch
19937 my $ib = $i_term_begin[0];
19938 my $type = $types_to_go[$ib];
19940 # So far, the only known instance of this problem is when
19941 # a bareword follows an opening paren with -vmll
19942 if ( $type eq 'w' ) {
19944 # If a line starts with paren+space+terms, then its max length
19945 # could be up to ci+2-i spaces less than if the term went out
19946 # on a line after the paren. So..
19948 2 + $rOpts_continuation_indentation -
19949 $rOpts_indent_columns );
19950 $columns = max( 0, $columns - $tol );
19952 ## Here is the original b1210 fix, but it failed on b1216-b1218
19953 ##my $columns2 = table_columns_available($i_opening_paren);
19954 ##$columns = min( $columns, $columns2 );
19958 # Estimated maximum number of fields which fit this space
19959 # This will be our first guess
19960 my $number_of_fields_max =
19961 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
19963 my $number_of_fields = $number_of_fields_max;
19965 # Find the best-looking number of fields
19966 # and make this our second guess if possible
19967 my ( $number_of_fields_best, $ri_ragged_break_list,
19968 $new_identifier_count )
19969 = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
19970 \@item_lengths, $max_width );
19972 if ( $number_of_fields_best != 0
19973 && $number_of_fields_best < $number_of_fields_max )
19975 $number_of_fields = $number_of_fields_best;
19978 # ----------------------------------------------------------------------
19979 # If we are crowded and the -lp option is being used, try to
19980 # undo some indentation
19981 # ----------------------------------------------------------------------
19985 $number_of_fields == 0
19986 || ( $number_of_fields == 1
19987 && $number_of_fields != $number_of_fields_best )
19991 my $available_spaces =
19992 $self->get_available_spaces_to_go($i_first_comma);
19993 if ( $available_spaces > 0 ) {
19995 my $spaces_wanted = $max_width - $columns; # for 1 field
19997 if ( $number_of_fields_best == 0 ) {
19998 $number_of_fields_best =
19999 get_maximum_fields_wanted( \@item_lengths );
20002 if ( $number_of_fields_best != 1 ) {
20003 my $spaces_wanted_2 =
20004 1 + $pair_width - $columns; # for 2 fields
20005 if ( $available_spaces > $spaces_wanted_2 ) {
20006 $spaces_wanted = $spaces_wanted_2;
20010 if ( $spaces_wanted > 0 ) {
20011 my $deleted_spaces =
20012 $self->reduce_lp_indentation( $i_first_comma,
20016 if ( $deleted_spaces > 0 ) {
20017 $columns = table_columns_available($i_first_comma);
20018 $number_of_fields_max =
20019 maximum_number_of_fields( $columns, $odd_or_even,
20020 $max_width, $pair_width );
20021 $number_of_fields = $number_of_fields_max;
20023 if ( $number_of_fields_best == 1
20024 && $number_of_fields >= 1 )
20026 $number_of_fields = $number_of_fields_best;
20033 # try for one column if two won't work
20034 if ( $number_of_fields <= 0 ) {
20035 $number_of_fields = int( $columns / $max_width );
20038 # The user can place an upper bound on the number of fields,
20039 # which can be useful for doing maintenance on tables
20040 if ( $rOpts_maximum_fields_per_table
20041 && $number_of_fields > $rOpts_maximum_fields_per_table )
20043 $number_of_fields = $rOpts_maximum_fields_per_table;
20046 # How many columns (characters) and lines would this container take
20047 # if no additional whitespace were added?
20048 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
20049 $i_effective_last_comma + 1 );
20050 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
20051 my $packed_lines = 1 + int( $packed_columns / $columns );
20053 # are we an item contained in an outer list?
20054 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
20056 if ( $number_of_fields <= 0 ) {
20058 # #---------------------------------------------------------------
20059 # # We're in trouble. We can't find a single field width that works.
20060 # # There is no simple answer here; we may have a single long list
20062 # #---------------------------------------------------------------
20064 # In many cases, it may be best to not force a break if there is just one
20065 # comma, because the standard continuation break logic will do a better
20068 # In the common case that all but one of the terms can fit
20069 # on a single line, it may look better not to break open the
20070 # containing parens. Consider, for example
20074 # sort { $color_value{$::a} <=> $color_value{$::b}; }
20077 # which will look like this with the container broken:
20081 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
20084 # Here is an example of this rule for a long last term:
20086 # log_message( 0, 256, 128,
20087 # "Number of routes in adj-RIB-in to be considered: $peercount" );
20089 # And here is an example with a long first term:
20092 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
20093 # $r, $pu, $ps, $cu, $cs, $tt
20095 # if $style eq 'all';
20097 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
20098 my $long_last_term =
20099 $self->excess_line_length( 0, $i_last_comma ) <= 0;
20100 my $long_first_term =
20101 $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
20104 # break at every comma ...
20107 # if requested by user or is best looking
20108 $number_of_fields_best == 1
20110 # or if this is a sublist of a larger list
20111 || $in_hierarchical_list
20113 # or if multiple commas and we don't have a long first or last
20115 || ( $comma_count > 1
20116 && !( $long_last_term || $long_first_term ) )
20119 foreach ( 0 .. $comma_count - 1 ) {
20120 $self->set_forced_breakpoint( $rcomma_index->[$_] );
20123 elsif ($long_last_term) {
20125 $self->set_forced_breakpoint($i_last_comma);
20126 ${$rdo_not_break_apart} = 1 unless $must_break_open;
20128 elsif ($long_first_term) {
20130 $self->set_forced_breakpoint($i_first_comma);
20134 # let breaks be defined by default bond strength logic
20139 # --------------------------------------------------------
20140 # We have a tentative field count that seems to work.
20141 # How many lines will this require?
20142 # --------------------------------------------------------
20143 my $formatted_lines = $item_count / ($number_of_fields);
20144 if ( $formatted_lines != int $formatted_lines ) {
20145 $formatted_lines = 1 + int $formatted_lines;
20148 # So far we've been trying to fill out to the right margin. But
20149 # compact tables are easier to read, so let's see if we can use fewer
20150 # fields without increasing the number of lines.
20151 $number_of_fields =
20152 compactify_table( $item_count, $number_of_fields, $formatted_lines,
20155 # How many spaces across the page will we fill?
20156 my $columns_per_line =
20157 ( int $number_of_fields / 2 ) * $pair_width +
20158 ( $number_of_fields % 2 ) * $max_width;
20160 my $formatted_columns;
20162 if ( $number_of_fields > 1 ) {
20163 $formatted_columns =
20164 ( $pair_width * ( int( $item_count / 2 ) ) +
20165 ( $item_count % 2 ) * $max_width );
20168 $formatted_columns = $max_width * $item_count;
20170 if ( $formatted_columns < $packed_columns ) {
20171 $formatted_columns = $packed_columns;
20174 my $unused_columns = $formatted_columns - $packed_columns;
20176 # set some empirical parameters to help decide if we should try to
20177 # align; high sparsity does not look good, especially with few lines
20178 my $sparsity = ($unused_columns) / ($formatted_columns);
20179 my $max_allowed_sparsity =
20180 ( $item_count < 3 ) ? 0.1
20181 : ( $packed_lines == 1 ) ? 0.15
20182 : ( $packed_lines == 2 ) ? 0.4
20185 my $two_line_word_wrap_ok;
20186 if ( $opening_token eq '(' ) {
20188 # default is to allow wrapping of short paren lists
20189 $two_line_word_wrap_ok = 1;
20191 # but turn off word wrap where requested
20192 if ($rOpts_break_open_paren_list) {
20194 # This parameter is a one-character flag, as follows:
20195 # '0' matches no parens -> break open NOT OK -> word wrap OK
20196 # '1' matches all parens -> break open OK -> word wrap NOT OK
20197 # Other values are the same as used by the weld-exclusion-list
20198 my $flag = $rOpts_break_open_paren_list;
20202 $two_line_word_wrap_ok = 0;
20204 elsif ( $flag eq '0' ) {
20205 $two_line_word_wrap_ok = 1;
20208 my $KK = $K_to_go[$i_opening_paren];
20209 $two_line_word_wrap_ok =
20210 !$self->match_paren_flag( $KK, $flag );
20215 # Begin check for shortcut methods, which avoid treating a list
20216 # as a table for relatively small parenthesized lists. These
20217 # are usually easier to read if not formatted as tables.
20219 $packed_lines <= 2 # probably can fit in 2 lines
20220 && $item_count < 9 # doesn't have too many items
20221 && $opening_is_in_block # not a sub-container
20222 && $two_line_word_wrap_ok # ok to wrap this paren list
20223 ##&& $opening_token eq '(' # is paren list
20227 # Shortcut method 1: for -lp and just one comma:
20228 # This is a no-brainer, just break at the comma.
20230 $is_lp_formatting # -lp
20231 && $item_count == 2 # two items, one comma
20232 && !$must_break_open
20235 my $i_break = $rcomma_index->[0];
20236 $self->set_forced_breakpoint($i_break);
20237 ${$rdo_not_break_apart} = 1;
20242 # method 2 is for most small ragged lists which might look
20243 # best if not displayed as a table.
20245 ( $number_of_fields == 2 && $item_count == 3 )
20247 $new_identifier_count > 0 # isn't all quotes
20248 && $sparsity > 0.15
20249 ) # would be fairly spaced gaps if aligned
20253 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20254 $ri_ragged_break_list );
20255 ++$break_count if ($use_separate_first_term);
20257 # NOTE: we should really use the true break count here,
20258 # which can be greater if there are large terms and
20259 # little space, but usually this will work well enough.
20260 unless ($must_break_open) {
20262 if ( $break_count <= 1 ) {
20263 ${$rdo_not_break_apart} = 1;
20265 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20266 ${$rdo_not_break_apart} = 1;
20272 } ## end shortcut methods
20275 DEBUG_SPARSE && do {
20277 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
20281 #---------------------------------------------------------------
20282 # Compound List Rule 2:
20283 # If this list is too long for one line, and it is an item of a
20284 # larger list, then we must format it, regardless of sparsity
20285 # (ian.t). One reason that we have to do this is to trigger
20286 # Compound List Rule 1, above, which causes breaks at all commas of
20287 # all outer lists. In this way, the structure will be properly
20289 #---------------------------------------------------------------
20291 # Decide if this list is too long for one line unless broken
20292 my $total_columns = table_columns_available($i_opening_paren);
20293 my $too_long = $packed_columns > $total_columns;
20295 # For a paren list, include the length of the token just before the
20296 # '(' because this is likely a sub call, and we would have to
20297 # include the sub name on the same line as the list. This is still
20298 # imprecise, but not too bad. (steve.t)
20299 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
20301 $too_long = $self->excess_line_length( $i_opening_minus,
20302 $i_effective_last_comma + 1 ) > 0;
20305 # FIXME: For an item after a '=>', try to include the length of the
20306 # thing before the '=>'. This is crude and should be improved by
20307 # actually looking back token by token.
20308 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
20309 my $i_opening_minus = $i_opening_paren - 4;
20310 if ( $i_opening_minus >= 0 ) {
20311 $too_long = $self->excess_line_length( $i_opening_minus,
20312 $i_effective_last_comma + 1 ) > 0;
20316 # Always break lists contained in '[' and '{' if too long for 1 line,
20317 # and always break lists which are too long and part of a more complex
20319 my $must_break_open_container = $must_break_open
20321 && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
20323 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
20325 #---------------------------------------------------------------
20326 # The main decision:
20327 # Now decide if we will align the data into aligned columns. Do not
20328 # attempt to align columns if this is a tiny table or it would be
20329 # too spaced. It seems that the more packed lines we have, the
20330 # sparser the list that can be allowed and still look ok.
20331 #---------------------------------------------------------------
20333 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
20334 || ( $formatted_lines < 2 )
20335 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
20339 #---------------------------------------------------------------
20340 # too sparse: would look ugly if aligned in a table;
20341 #---------------------------------------------------------------
20343 # use old breakpoints if this is a 'big' list
20344 if ( $packed_lines > 2 && $item_count > 10 ) {
20345 write_logfile_entry("List sparse: using old breakpoints\n");
20346 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
20349 # let the continuation logic handle it if 2 lines
20352 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20353 $ri_ragged_break_list );
20354 ++$break_count if ($use_separate_first_term);
20356 unless ($must_break_open_container) {
20357 if ( $break_count <= 1 ) {
20358 ${$rdo_not_break_apart} = 1;
20360 elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20361 ${$rdo_not_break_apart} = 1;
20368 #---------------------------------------------------------------
20369 # go ahead and format as a table
20370 #---------------------------------------------------------------
20371 write_logfile_entry(
20372 "List: auto formatting with $number_of_fields fields/row\n");
20374 my $j_first_break =
20375 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
20378 my $j = $j_first_break ;
20379 $j < $comma_count ;
20380 $j += $number_of_fields
20383 my $i = $rcomma_index->[$j];
20384 $self->set_forced_breakpoint($i);
20388 } ## end closure set_comma_breakpoints_do
20390 sub study_list_complexity {
20392 # Look for complex tables which should be formatted with one term per line.
20393 # Returns the following:
20395 # \@i_ragged_break_list = list of good breakpoints to avoid lines
20396 # which are hard to read
20397 # $number_of_fields_best = suggested number of fields based on
20398 # complexity; = 0 if any number may be used.
20400 my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
20401 my $item_count = @{$ri_term_begin};
20402 my $complex_item_count = 0;
20403 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
20404 my $i_max = @{$ritem_lengths} - 1;
20405 ##my @item_complexity;
20407 my $i_last_last_break = -3;
20408 my $i_last_break = -2;
20409 my @i_ragged_break_list;
20411 my $definitely_complex = 30;
20412 my $definitely_simple = 12;
20413 my $quote_count = 0;
20415 for my $i ( 0 .. $i_max ) {
20416 my $ib = $ri_term_begin->[$i];
20417 my $ie = $ri_term_end->[$i];
20419 # define complexity: start with the actual term length
20420 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
20422 ##TBD: join types here and check for variations
20423 ##my $str=join "", @tokens_to_go[$ib..$ie];
20426 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
20430 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
20434 if ( $ib eq $ie ) {
20435 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
20436 $complex_item_count++;
20437 $weighted_length *= 2;
20443 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
20444 $complex_item_count++;
20445 $weighted_length *= 2;
20447 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
20448 $weighted_length += 4;
20452 # add weight for extra tokens.
20453 $weighted_length += 2 * ( $ie - $ib );
20455 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
20456 ## print "# COMPLEXITY:$weighted_length $BUB\n";
20458 ##push @item_complexity, $weighted_length;
20460 # now mark a ragged break after this item it if it is 'long and
20462 if ( $weighted_length >= $definitely_complex ) {
20464 # if we broke after the previous term
20465 # then break before it too
20466 if ( $i_last_break == $i - 1
20468 && $i_last_last_break != $i - 2 )
20471 ## FIXME: don't strand a small term
20472 pop @i_ragged_break_list;
20473 push @i_ragged_break_list, $i - 2;
20474 push @i_ragged_break_list, $i - 1;
20477 push @i_ragged_break_list, $i;
20478 $i_last_last_break = $i_last_break;
20479 $i_last_break = $i;
20482 # don't break before a small last term -- it will
20483 # not look good on a line by itself.
20484 elsif ($i == $i_max
20485 && $i_last_break == $i - 1
20486 && $weighted_length <= $definitely_simple )
20488 pop @i_ragged_break_list;
20492 my $identifier_count = $i_max + 1 - $quote_count;
20494 # Need more tuning here..
20495 if ( $max_width > 12
20496 && $complex_item_count > $item_count / 2
20497 && $number_of_fields_best != 2 )
20499 $number_of_fields_best = 1;
20502 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
20505 sub get_maximum_fields_wanted {
20507 # Not all tables look good with more than one field of items.
20508 # This routine looks at a table and decides if it should be
20509 # formatted with just one field or not.
20510 # This coding is still under development.
20511 my ($ritem_lengths) = @_;
20513 my $number_of_fields_best = 0;
20515 # For just a few items, we tentatively assume just 1 field.
20516 my $item_count = @{$ritem_lengths};
20517 if ( $item_count <= 5 ) {
20518 $number_of_fields_best = 1;
20521 # For larger tables, look at it both ways and see what looks best
20525 my @max_length = ( 0, 0 );
20526 my @last_length_2 = ( undef, undef );
20527 my @first_length_2 = ( undef, undef );
20528 my $last_length = undef;
20529 my $total_variation_1 = 0;
20530 my $total_variation_2 = 0;
20531 my @total_variation_2 = ( 0, 0 );
20533 foreach my $j ( 0 .. $item_count - 1 ) {
20535 $is_odd = 1 - $is_odd;
20536 my $length = $ritem_lengths->[$j];
20537 if ( $length > $max_length[$is_odd] ) {
20538 $max_length[$is_odd] = $length;
20541 if ( defined($last_length) ) {
20542 my $dl = abs( $length - $last_length );
20543 $total_variation_1 += $dl;
20545 $last_length = $length;
20547 my $ll = $last_length_2[$is_odd];
20548 if ( defined($ll) ) {
20549 my $dl = abs( $length - $ll );
20550 $total_variation_2[$is_odd] += $dl;
20553 $first_length_2[$is_odd] = $length;
20555 $last_length_2[$is_odd] = $length;
20557 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
20559 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
20560 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
20561 $number_of_fields_best = 1;
20564 return ($number_of_fields_best);
20567 sub table_columns_available {
20568 my $i_first_comma = shift;
20570 $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
20571 leading_spaces_to_go($i_first_comma);
20573 # Patch: the vertical formatter does not line up lines whose lengths
20574 # exactly equal the available line length because of allowances
20575 # that must be made for side comments. Therefore, the number of
20576 # available columns is reduced by 1 character.
20581 sub maximum_number_of_fields {
20583 # how many fields will fit in the available space?
20584 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
20585 my $max_pairs = int( $columns / $pair_width );
20586 my $number_of_fields = $max_pairs * 2;
20587 if ( $odd_or_even == 1
20588 && $max_pairs * $pair_width + $max_width <= $columns )
20590 $number_of_fields++;
20592 return $number_of_fields;
20595 sub compactify_table {
20597 # given a table with a certain number of fields and a certain number
20598 # of lines, see if reducing the number of fields will make it look
20600 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
20601 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
20605 $min_fields = $number_of_fields ;
20606 $min_fields >= $odd_or_even
20607 && $min_fields * $formatted_lines >= $item_count ;
20608 $min_fields -= $odd_or_even
20611 $number_of_fields = $min_fields;
20614 return $number_of_fields;
20617 sub set_ragged_breakpoints {
20619 # Set breakpoints in a list that cannot be formatted nicely as a
20621 my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
20623 my $break_count = 0;
20624 foreach ( @{$ri_ragged_break_list} ) {
20625 my $j = $ri_term_comma->[$_];
20627 $self->set_forced_breakpoint($j);
20631 return $break_count;
20634 sub copy_old_breakpoints {
20635 my ( $self, $i_first_comma, $i_last_comma ) = @_;
20636 for my $i ( $i_first_comma .. $i_last_comma ) {
20637 if ( $old_breakpoint_to_go[$i] ) {
20638 $self->set_forced_breakpoint($i);
20645 my ( $self, $i, $j ) = @_;
20646 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
20649 my ( $a, $b, $c ) = caller();
20650 my $forced_breakpoint_count = get_forced_breakpoint_count();
20652 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
20655 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
20658 # shouldn't happen; non-critical error
20661 my ( $a, $b, $c ) = caller();
20663 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
20669 ###############################################
20670 # CODE SECTION 12: Code for setting indentation
20671 ###############################################
20673 sub token_sequence_length {
20675 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
20676 # returns 0 if $ibeg > $iend (shouldn't happen)
20677 my ( $ibeg, $iend ) = @_;
20678 return 0 if ( !defined($iend) || $iend < 0 || $ibeg > $iend );
20679 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
20680 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
20683 sub total_line_length {
20685 # return length of a line of tokens ($ibeg .. $iend)
20686 my ( $ibeg, $iend ) = @_;
20689 #return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
20691 # this is basically sub 'leading_spaces_to_go':
20692 my $indentation = $leading_spaces_to_go[$ibeg];
20693 if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
20695 return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
20696 $summed_lengths_to_go[$ibeg];
20699 sub excess_line_length {
20701 # return number of characters by which a line of tokens ($ibeg..$iend)
20702 # exceeds the allowable line length.
20704 # NOTE: Profiling shows that this is a critical routine for efficiency.
20705 # Therefore I have eliminated additional calls to subs from it.
20706 my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
20708 # Original expression for line length
20709 ##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
20711 # This is basically sub 'leading_spaces_to_go':
20712 my $indentation = $leading_spaces_to_go[$ibeg];
20713 if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
20717 $summed_lengths_to_go[ $iend + 1 ] -
20718 $summed_lengths_to_go[$ibeg];
20720 # Include right weld lengths unless requested not to.
20721 if ( $total_weld_count
20722 && !$ignore_right_weld
20723 && $type_sequence_to_go[$iend] )
20725 my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
20726 $length += $wr if defined($wr);
20729 # return the excess
20730 return $length - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
20735 # return the number of leading spaces associated with an indentation
20736 # variable $indentation is either a constant number of spaces or an object
20737 # with a get_spaces method.
20738 my $indentation = shift;
20739 return ref($indentation) ? $indentation->get_spaces() : $indentation;
20742 sub get_recoverable_spaces {
20744 # return the number of spaces (+ means shift right, - means shift left)
20745 # that we would like to shift a group of lines with the same indentation
20746 # to get them to line up with their opening parens
20747 my $indentation = shift;
20748 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
20751 sub get_available_spaces_to_go {
20753 my ( $self, $ii ) = @_;
20754 my $item = $leading_spaces_to_go[$ii];
20756 # return the number of available leading spaces associated with an
20757 # indentation variable. $indentation is either a constant number of
20758 # spaces or an object with a get_available_spaces method.
20759 return ref($item) ? $item->get_available_spaces() : 0;
20762 { ## begin closure set_lp_indentation
20764 use constant DEBUG_LP => 0;
20766 # Stack of -lp index objects which survives between batches.
20770 # The predicted position of the next opening container which may start
20771 # an -lp indentation level. This survives between batches.
20772 my $lp_position_predictor;
20774 # A level at which the lp format becomes too highly stressed to continue
20775 my $lp_cutoff_level;
20779 # Index names for the -lp stack variables.
20780 # Do not combine with other BEGIN blocks (c101).
20784 _lp_ci_level_ => $i++,
20785 _lp_level_ => $i++,
20786 _lp_object_ => $i++,
20787 _lp_container_seqno_ => $i++,
20788 _lp_space_count_ => $i++,
20792 sub initialize_lp_vars {
20794 # initialize gnu variables for a new file;
20795 # must be called once at the start of a new file.
20797 $lp_position_predictor = 0;
20799 $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 );
20801 # we can turn off -lp if all levels will be at or above the cutoff
20802 if ( $lp_cutoff_level <= 1 ) {
20803 $rOpts_line_up_parentheses = 0;
20804 $rOpts_extended_line_up_parentheses = 0;
20809 # initialize the leading whitespace stack to negative levels
20810 # so that we can never run off the end of the stack
20811 $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
20812 $rLP->[$max_lp_stack]->[_lp_level_] = -1;
20813 $rLP->[$max_lp_stack]->[_lp_object_] = undef;
20814 $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
20815 $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
20820 # hashes for efficient testing
20826 my @q = qw< } ) ] >;
20827 @hash_test1{@q} = (1) x scalar(@q);
20830 @hash_test2{@q} = (1) x scalar(@q);
20831 @q = qw( . || && );
20832 @hash_test3{@q} = (1) x scalar(@q);
20835 sub set_lp_indentation {
20837 #------------------------------------------------------------------
20838 # Define the leading whitespace for all tokens in the current batch
20839 # when the -lp formatting is selected.
20840 #------------------------------------------------------------------
20844 return unless ($rOpts_line_up_parentheses);
20845 return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
20847 # List of -lp indentation objects created in this batch
20848 my $rlp_object_list = [];
20849 my $max_lp_object_list = UNDEFINED_INDEX;
20851 my %last_lp_equals;
20852 my %lp_comma_count;
20853 my %lp_arrow_count;
20854 my $ii_begin_line = 0;
20856 my $rLL = $self->[_rLL_];
20857 my $Klimit = $self->[_Klimit_];
20858 my $rbreak_container = $self->[_rbreak_container_];
20859 my $rshort_nested = $self->[_rshort_nested_];
20860 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
20861 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
20862 my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
20863 my $K_opening_container = $self->[_K_opening_container_]; ##TESTING
20864 my $K_closing_container = $self->[_K_closing_container_];
20865 my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_];
20866 my $radjusted_levels = $self->[_radjusted_levels_];
20867 my $rbreak_before_container_by_seqno =
20868 $self->[_rbreak_before_container_by_seqno_];
20869 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
20871 my $nws = @{$radjusted_levels};
20874 # The 'starting_in_quote' flag means that the first token is the first
20875 # token of a line and it is also the continuation of some kind of
20876 # multi-line quote or pattern. It must have no added leading
20877 # whitespace, so we can skip it.
20878 if ($starting_in_quote) {
20882 my $K_last_nonblank;
20883 my $Kpnb = $K_to_go[0] - 1;
20884 if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
20887 if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
20888 $K_last_nonblank = $Kpnb;
20891 my $last_nonblank_token = '';
20892 my $last_nonblank_type = '';
20893 my $last_last_nonblank_type = '';
20895 if ( defined($K_last_nonblank) ) {
20896 $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
20897 $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
20900 my ( $space_count, $current_level, $current_ci_level, $in_lp_mode );
20901 my $stack_changed = 1;
20903 #-----------------------------------
20904 # Loop over all tokens in this batch
20905 #-----------------------------------
20906 foreach my $ii ( $imin .. $max_index_to_go ) {
20908 my $KK = $K_to_go[$ii];
20909 my $type = $types_to_go[$ii];
20910 my $token = $tokens_to_go[$ii];
20911 my $level = $levels_to_go[$ii];
20912 my $ci_level = $ci_levels_to_go[$ii];
20913 my $total_depth = $nesting_depth_to_go[$ii];
20915 #--------------------------------------------------
20916 # Adjust levels if necessary to recycle whitespace:
20917 #--------------------------------------------------
20918 if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
20920 $level = $radjusted_levels->[$KK];
20921 if ( $level < 0 ) { $level = 0 } # note: this should not happen
20924 # get the top state from the stack if it has changed
20925 if ($stack_changed) {
20926 my $rLP_top = $rLP->[$max_lp_stack];
20927 my $lp_object = $rLP_top->[_lp_object_];
20929 ( $space_count, $current_level, $current_ci_level ) =
20930 @{ $lp_object->get_spaces_level_ci() };
20933 $current_ci_level = $rLP_top->[_lp_ci_level_];
20934 $current_level = $rLP_top->[_lp_level_];
20935 $space_count = $rLP_top->[_lp_space_count_];
20937 $stack_changed = 0;
20940 #------------------------------
20941 # update the position predictor
20942 #------------------------------
20943 if ( $type eq '{' || $type eq '(' ) {
20945 $lp_comma_count{ $total_depth + 1 } = 0;
20946 $lp_arrow_count{ $total_depth + 1 } = 0;
20948 # If we come to an opening token after an '=' token of some
20949 # type, see if it would be helpful to 'break' after the '=' to
20951 my $last_equals = $last_lp_equals{$total_depth};
20952 if ( $last_equals && $last_equals > $ii_begin_line ) {
20954 my $seqno = $type_sequence_to_go[$ii];
20956 # find the position if we break at the '='
20957 my $i_test = $last_equals;
20959 # Fix for issue b1229, check for break before
20960 if ( $want_break_before{ $types_to_go[$i_test] } ) {
20961 if ( $i_test > 0 ) { $i_test-- }
20963 elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
20966 ##my $too_close = ($i_test==$ii-1);
20968 my $test_position = total_line_length( $i_test, $ii );
20970 $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
20972 #------------------------------------------------------
20973 # Break if structure will reach the maximum line length
20974 #------------------------------------------------------
20976 # Historically, -lp just used one-half line length here
20977 my $len_increase = $rOpts_maximum_line_length / 2;
20979 # For -xlp, we can also use the pre-computed lengths
20980 my $min_len = $rcollapsed_length_by_seqno->{$seqno};
20981 if ( $min_len && $min_len > $len_increase ) {
20982 $len_increase = $min_len;
20987 # the equals is not just before an open paren (testing)
20990 # if we might exceed the maximum line length
20991 $lp_position_predictor + $len_increase > $mll
20993 # if a -bbx flag WANTS a break before this opening token
20995 && $rbreak_before_container_by_seqno->{$seqno} )
20997 # or we are beyond the 1/4 point and there was an old
20998 # break at an assignment (not '=>') [fix for b1035]
21000 $lp_position_predictor >
21001 $mll - $rOpts_maximum_line_length * 3 / 4
21002 && $types_to_go[$last_equals] ne '=>'
21004 $old_breakpoint_to_go[$last_equals]
21005 || ( $last_equals > 0
21006 && $old_breakpoint_to_go[ $last_equals - 1 ]
21008 || ( $last_equals > 1
21009 && $types_to_go[ $last_equals - 1 ] eq 'b'
21010 && $old_breakpoint_to_go[ $last_equals - 2 ]
21017 # then make the switch -- note that we do not set a
21018 # real breakpoint here because we may not really need
21019 # one; sub break_lists will do that if necessary.
21021 my $Kc = $K_closing_container->{$seqno};
21024 # For -lp, only if the closing token is in this
21025 # batch (c117). Otherwise it cannot be done by sub
21027 defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
21029 # For -xlp, we only need one nonblank token after
21030 # the opening token.
21031 || $rOpts_extended_line_up_parentheses
21034 $ii_begin_line = $i_test + 1;
21035 $lp_position_predictor = $test_position;
21037 #--------------------------------------------------
21038 # Fix for an opening container terminating a batch:
21039 #--------------------------------------------------
21040 # To get alignment of a -lp container with its
21041 # contents, we have to put a break after $i_test.
21042 # For $ii<$max_index_to_go, this will be done by
21043 # sub break_lists based on the indentation object.
21044 # But for $ii=$max_index_to_go, the indentation
21045 # object for this seqno will not be created until
21046 # the next batch, so we have to set a break at
21047 # $i_test right now in order to get one.
21048 if ( $ii == $max_index_to_go
21049 && !$block_type_to_go[$ii]
21052 && !$ris_excluded_lp_container->{$seqno} )
21054 $self->set_forced_lp_break( $ii_begin_line,
21060 } ## end update position predictor
21062 #------------------------
21063 # Handle decreasing depth
21064 #------------------------
21065 # Note that one token may have both decreasing and then increasing
21066 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
21067 # in this example we would first go back to (1,0) then up to (2,0)
21068 # in a single call.
21069 if ( $level < $current_level || $ci_level < $current_ci_level ) {
21071 # loop to find the first entry at or completely below this level
21072 my ( $lev, $ci_lev );
21074 if ($max_lp_stack) {
21076 # save index of token which closes this level
21077 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21079 $rLP->[$max_lp_stack]->[_lp_object_];
21081 $lp_object->set_closed($ii);
21083 my $comma_count = 0;
21084 my $arrow_count = 0;
21085 if ( $type eq '}' || $type eq ')' ) {
21086 $comma_count = $lp_comma_count{$total_depth};
21087 $arrow_count = $lp_arrow_count{$total_depth};
21088 $comma_count = 0 unless $comma_count;
21089 $arrow_count = 0 unless $arrow_count;
21092 $lp_object->set_comma_count($comma_count);
21093 $lp_object->set_arrow_count($arrow_count);
21095 # Undo any extra indentation if we saw no commas
21096 my $available_spaces =
21097 $lp_object->get_available_spaces();
21098 my $K_start = $lp_object->get_K_begin_line();
21100 if ( $available_spaces > 0
21101 && $K_start >= $K_to_go[0]
21102 && ( $comma_count <= 0 || $arrow_count > 0 ) )
21105 my $i = $lp_object->get_lp_item_index();
21107 # Safety check for a valid stack index. It
21108 # should be ok because we just checked that the
21109 # index K of the token associated with this
21110 # indentation is in this batch.
21111 if ( $i < 0 || $i > $max_lp_object_list ) {
21113 my $lno = $rLL->[$KK]->[_LINE_INDEX_];
21115 Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
21120 if ( $arrow_count == 0 ) {
21121 $rlp_object_list->[$i]
21122 ->permanently_decrease_available_spaces
21123 ($available_spaces);
21126 $rlp_object_list->[$i]
21127 ->tentatively_decrease_available_spaces
21128 ($available_spaces);
21131 my $j ( $i + 1 .. $max_lp_object_list )
21133 $rlp_object_list->[$j]
21134 ->decrease_SPACES($available_spaces);
21140 # go down one level
21143 my $rLP_top = $rLP->[$max_lp_stack];
21144 my $ci_lev = $rLP_top->[_lp_ci_level_];
21145 my $lev = $rLP_top->[_lp_level_];
21146 my $spaces = $rLP_top->[_lp_space_count_];
21147 if ( $rLP_top->[_lp_object_] ) {
21148 my $lp_obj = $rLP_top->[_lp_object_];
21149 ( $spaces, $lev, $ci_lev ) =
21150 @{ $lp_obj->get_spaces_level_ci() };
21153 # stop when we reach a level at or below the current
21155 if ( $lev <= $level && $ci_lev <= $ci_level ) {
21156 $space_count = $spaces;
21157 $current_level = $lev;
21158 $current_ci_level = $ci_lev;
21163 # reached bottom of stack .. should never happen because
21164 # only negative levels can get here, and $level was forced
21165 # to be positive above.
21168 # non-fatal, keep going except in DEVEL_MODE
21171 program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp
21177 } ## end decreasing depth
21179 #------------------------
21180 # handle increasing depth
21181 #------------------------
21182 if ( $level > $current_level || $ci_level > $current_ci_level ) {
21184 $stack_changed = 1;
21186 # Compute the standard incremental whitespace. This will be
21187 # the minimum incremental whitespace that will be used. This
21188 # choice results in a smooth transition between the gnu-style
21189 # and the standard style.
21190 my $standard_increment =
21191 ( $level - $current_level ) *
21192 $rOpts_indent_columns +
21193 ( $ci_level - $current_ci_level ) *
21194 $rOpts_continuation_indentation;
21196 # Now we have to define how much extra incremental space
21197 # ("$available_space") we want. This extra space will be
21198 # reduced as necessary when long lines are encountered or when
21199 # it becomes clear that we do not have a good list.
21200 my $available_spaces = 0;
21201 my $align_seqno = 0;
21204 my $last_nonblank_seqno;
21205 my $last_nonblank_block_type;
21206 if ( defined($K_last_nonblank) ) {
21207 $last_nonblank_seqno =
21208 $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
21209 $last_nonblank_block_type =
21210 $last_nonblank_seqno
21211 ? $rblock_type_of_seqno->{$last_nonblank_seqno}
21215 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
21217 #-----------------------------------------------
21218 # Initialize indentation spaces on empty stack..
21219 #-----------------------------------------------
21220 if ( $max_lp_stack == 0 ) {
21221 $space_count = $level * $rOpts_indent_columns;
21224 #----------------------------------------
21225 # Add the standard space increment if ...
21226 #----------------------------------------
21229 # if this is a BLOCK, add the standard increment
21230 $last_nonblank_block_type
21232 # or if this is not a sequenced item
21233 || !$last_nonblank_seqno
21235 # or this continer is excluded by user rules
21236 # or contains here-docs or multiline qw text
21237 || defined($last_nonblank_seqno)
21238 && $ris_excluded_lp_container->{$last_nonblank_seqno}
21240 # or if last nonblank token was not structural indentation
21241 || $last_nonblank_type ne '{'
21243 # and do not start -lp under stress .. fixes b1244, b1255
21244 || !$in_lp_mode && $level >= $lp_cutoff_level
21249 # If we have entered lp mode, use the top lp object to get
21250 # the current indentation spaces because it may have
21251 # changed. Fixes b1285, b1286.
21253 $space_count = $in_lp_mode->get_spaces();
21255 $space_count += $standard_increment;
21258 #---------------------------------------------------------------
21259 # -lp mode: try to use space to the first non-blank level change
21260 #---------------------------------------------------------------
21263 # see how much space we have available
21264 my $test_space_count = $lp_position_predictor;
21267 $rcollapsed_length_by_seqno->{$last_nonblank_seqno};
21268 my $next_opening_too_far;
21270 if ( defined($min_len) ) {
21272 $test_space_count +
21274 $maximum_line_length_at_level[$level];
21275 if ( $excess > 0 ) {
21276 $test_space_count -= $excess;
21278 # will the next opening token be a long way out?
21279 $next_opening_too_far =
21280 $lp_position_predictor + $excess >
21281 $maximum_line_length_at_level[$level];
21285 my $rLP_top = $rLP->[$max_lp_stack];
21286 my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
21287 if ( $rLP_top->[_lp_object_] ) {
21288 $min_gnu_indentation =
21289 $rLP_top->[_lp_object_]->get_spaces();
21291 $available_spaces =
21292 $test_space_count - $min_gnu_indentation;
21294 # Do not startup -lp indentation mode if no space ...
21295 # ... or if it puts the opening far to the right
21297 && ( $available_spaces <= 0 || $next_opening_too_far ) )
21299 $space_count += $standard_increment;
21300 $available_spaces = 0;
21305 $space_count = $test_space_count;
21308 if ( $available_spaces >= $standard_increment ) {
21309 $min_gnu_indentation += $standard_increment;
21311 elsif ( $available_spaces > 1 ) {
21312 $min_gnu_indentation += $available_spaces + 1;
21314 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
21315 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
21316 $min_gnu_indentation += 2;
21319 $min_gnu_indentation += 1;
21323 $min_gnu_indentation += $standard_increment;
21325 $available_spaces = $space_count - $min_gnu_indentation;
21327 if ( $available_spaces < 0 ) {
21328 $space_count = $min_gnu_indentation;
21329 $available_spaces = 0;
21331 $align_seqno = $last_nonblank_seqno;
21335 #-------------------------------------------
21336 # update the state, but not on a blank token
21337 #-------------------------------------------
21338 if ( $type ne 'b' ) {
21340 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21341 $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
21345 #----------------------------------------
21346 # Create indentation object if in lp-mode
21347 #----------------------------------------
21352 # A negative level implies not to store the item in the
21354 my $lp_item_index = 0;
21355 if ( $level >= 0 ) {
21356 $lp_item_index = ++$max_lp_object_list;
21359 my $K_begin_line = 0;
21360 if ( $ii_begin_line >= 0
21361 && $ii_begin_line <= $max_index_to_go )
21363 $K_begin_line = $K_to_go[$ii_begin_line];
21366 # Minor Fix: when creating indentation at a side
21367 # comment we don't know what the space to the actual
21368 # next code token will be. We will allow a space for
21369 # sub correct_lp to move it in if necessary.
21371 && $max_index_to_go > 0
21374 $available_spaces += 1;
21377 $lp_object = Perl::Tidy::IndentationItem->new(
21378 spaces => $space_count,
21380 ci_level => $ci_level,
21381 available_spaces => $available_spaces,
21382 lp_item_index => $lp_item_index,
21383 align_seqno => $align_seqno,
21384 stack_depth => $max_lp_stack,
21385 K_begin_line => $K_begin_line,
21389 my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
21390 print STDERR <<EOM;
21391 DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
21395 if ( $level >= 0 ) {
21396 $rlp_object_list->[$max_lp_object_list] =
21400 if ( $last_nonblank_token =~ /^[\{\[\(]$/
21401 && $last_nonblank_seqno )
21403 $rlp_object_by_seqno->{$last_nonblank_seqno} =
21408 #------------------------------------
21409 # Store this indentation on the stack
21410 #------------------------------------
21411 $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
21412 $rLP->[$max_lp_stack]->[_lp_level_] = $level;
21413 $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
21414 $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
21415 $last_nonblank_seqno;
21416 $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
21418 # If the opening paren is beyond the half-line length, then
21419 # we will use the minimum (standard) indentation. This will
21420 # help avoid problems associated with running out of space
21421 # near the end of a line. As a result, in deeply nested
21422 # lists, there will be some indentations which are limited
21423 # to this minimum standard indentation. But the most deeply
21424 # nested container will still probably be able to shift its
21425 # parameters to the right for proper alignment, so in most
21426 # cases this will not be noticeable.
21427 if ( $available_spaces > 0 && $lp_object ) {
21429 $maximum_line_length_at_level[$level] -
21430 $rOpts_maximum_line_length / 2;
21431 $lp_object->tentatively_decrease_available_spaces(
21433 if ( $space_count > $halfway );
21436 } ## end increasing depth
21438 #------------------
21439 # Handle all tokens
21440 #------------------
21441 if ( $type ne 'b' ) {
21443 # Count commas and look for non-list characters. Once we see a
21444 # non-list character, we give up and don't look for any more
21446 if ( $type eq '=>' ) {
21447 $lp_arrow_count{$total_depth}++;
21449 # remember '=>' like '=' for estimating breaks (but see
21450 # above note for b1035)
21451 $last_lp_equals{$total_depth} = $ii;
21454 elsif ( $type eq ',' ) {
21455 $lp_comma_count{$total_depth}++;
21458 elsif ( $is_assignment{$type} ) {
21459 $last_lp_equals{$total_depth} = $ii;
21462 # this token might start a new line if ..
21465 # this is the first nonblank token of the line
21466 $ii == 1 && $types_to_go[0] eq 'b'
21468 # or previous character was one of these:
21470 || $hash_test2{$last_nonblank_type}
21472 # or previous character was opening and this is not closing
21473 || ( $last_nonblank_type eq '{' && $type ne '}' )
21474 || ( $last_nonblank_type eq '(' and $type ne ')' )
21476 # or this token is one of these:
21477 # /^([\.]|\|\||\&\&)$/
21478 || $hash_test3{$type}
21480 # or this is a closing structure
21481 || ( $last_nonblank_type eq '}'
21482 && $last_nonblank_token eq $last_nonblank_type )
21484 # or previous token was keyword 'return'
21486 $last_nonblank_type eq 'k'
21487 && ( $last_nonblank_token eq 'return'
21491 # or starting a new line at certain keywords is fine
21493 && $is_if_unless_and_or_last_next_redo_return{$token} )
21495 # or this is after an assignment after a closing structure
21497 $is_assignment{$last_nonblank_type}
21500 $hash_test1{$last_last_nonblank_type}
21502 # and it is significantly to the right
21503 || $lp_position_predictor > (
21504 $maximum_line_length_at_level[$level] -
21505 $rOpts_maximum_line_length / 2
21511 check_for_long_gnu_style_lines( $ii, $rlp_object_list );
21512 $ii_begin_line = $ii;
21514 # back up 1 token if we want to break before that type
21515 # otherwise, we may strand tokens like '?' or ':' on a line
21516 if ( $ii_begin_line > 0 ) {
21517 if ( $last_nonblank_type eq 'k' ) {
21519 if ( $want_break_before{$last_nonblank_token} ) {
21523 elsif ( $want_break_before{$last_nonblank_type} ) {
21527 } ## end if ( $ii == 1 && $types_to_go...)
21529 $K_last_nonblank = $KK;
21531 $last_last_nonblank_type = $last_nonblank_type;
21532 $last_nonblank_type = $type;
21533 $last_nonblank_token = $token;
21535 } ## end if ( $type ne 'b' )
21537 # remember the predicted position of this token on the output line
21538 if ( $ii > $ii_begin_line ) {
21540 ## NOTE: this is a critical loop - the following call has been
21541 ## expanded for about 2x speedup:
21542 ## $lp_position_predictor =
21543 ## total_line_length( $ii_begin_line, $ii );
21545 my $indentation = $leading_spaces_to_go[$ii_begin_line];
21546 if ( ref($indentation) ) {
21547 $indentation = $indentation->get_spaces();
21549 $lp_position_predictor =
21551 $summed_lengths_to_go[ $ii + 1 ] -
21552 $summed_lengths_to_go[$ii_begin_line];
21555 $lp_position_predictor =
21556 $space_count + $token_lengths_to_go[$ii];
21559 # Store the indentation object for this token.
21560 # This allows us to manipulate the leading whitespace
21561 # (in case we have to reduce indentation to fit a line) without
21562 # having to change any token values.
21564 #---------------------------------------------------------------
21565 # replace leading whitespace with indentation objects where used
21566 #---------------------------------------------------------------
21567 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21568 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
21569 $leading_spaces_to_go[$ii] = $lp_object;
21570 if ( $max_lp_stack > 0
21572 && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
21574 $reduced_spaces_to_go[$ii] =
21575 $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
21578 $reduced_spaces_to_go[$ii] = $lp_object;
21581 } ## end loop over all tokens in this batch
21583 undo_incomplete_lp_indentation($rlp_object_list)
21584 if ( !$rOpts_extended_line_up_parentheses );
21589 sub check_for_long_gnu_style_lines {
21591 # look at the current estimated maximum line length, and
21592 # remove some whitespace if it exceeds the desired maximum
21593 my ( $mx_index_to_go, $rlp_object_list ) = @_;
21595 my $max_lp_object_list = @{$rlp_object_list} - 1;
21597 # nothing can be done if no stack items defined for this line
21598 return if ( $max_lp_object_list < 0 );
21600 # see if we have exceeded the maximum desired line length
21601 # keep 2 extra free because they are needed in some cases
21602 # (result of trial-and-error testing)
21603 my $spaces_needed =
21604 $lp_position_predictor -
21605 $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
21607 return if ( $spaces_needed <= 0 );
21609 # We are over the limit, so try to remove a requested number of
21610 # spaces from leading whitespace. We are only allowed to remove
21611 # from whitespace items created on this batch, since others have
21612 # already been used and cannot be undone.
21613 my @candidates = ();
21616 # loop over all whitespace items created for the current batch
21617 for ( $i = 0 ; $i <= $max_lp_object_list ; $i++ ) {
21618 my $item = $rlp_object_list->[$i];
21620 # item must still be open to be a candidate (otherwise it
21621 # cannot influence the current token)
21622 next if ( $item->get_closed() >= 0 );
21624 my $available_spaces = $item->get_available_spaces();
21626 if ( $available_spaces > 0 ) {
21627 push( @candidates, [ $i, $available_spaces ] );
21631 return unless (@candidates);
21633 # sort by available whitespace so that we can remove whitespace
21634 # from the maximum available first
21635 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
21637 # keep removing whitespace until we are done or have no more
21638 foreach my $candidate (@candidates) {
21639 my ( $i, $available_spaces ) = @{$candidate};
21640 my $deleted_spaces =
21641 ( $available_spaces > $spaces_needed )
21643 : $available_spaces;
21645 # remove the incremental space from this item
21646 $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
21650 # update the leading whitespace of this item and all items
21651 # that came after it
21652 for ( ; $i <= $max_lp_object_list ; $i++ ) {
21654 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
21655 if ( $old_spaces >= $deleted_spaces ) {
21656 $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
21659 # shouldn't happen except for code bug:
21661 # non-fatal, keep going except in DEVEL_MODE
21663 my $level = $rlp_object_list->[$i_debug]->get_level();
21665 $rlp_object_list->[$i_debug]->get_ci_level();
21666 my $old_level = $rlp_object_list->[$i]->get_level();
21668 $rlp_object_list->[$i]->get_ci_level();
21670 program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
21675 $lp_position_predictor -= $deleted_spaces;
21676 $spaces_needed -= $deleted_spaces;
21677 last unless ( $spaces_needed > 0 );
21682 sub undo_incomplete_lp_indentation {
21684 #------------------------------------------------------------------
21685 # Undo indentation for all incomplete -lp indentation levels of the
21686 # current batch unless -xlp is set.
21687 #------------------------------------------------------------------
21689 # This routine is called once after each output stream batch is
21690 # finished to undo indentation for all incomplete -lp indentation
21691 # levels. If this routine is called then comments and blank lines will
21692 # disrupt this indentation style. In older versions of perltidy this
21693 # was always done because it could cause problems otherwise, but recent
21694 # improvements allow fairly good results to be obtained by skipping
21695 # this step with the -xlp flag.
21696 my ($rlp_object_list) = @_;
21698 my $max_lp_object_list = @{$rlp_object_list} - 1;
21700 # nothing to do if no stack items defined for this line
21701 return if ( $max_lp_object_list < 0 );
21703 # loop over all whitespace items created for the current batch
21704 foreach my $i ( 0 .. $max_lp_object_list ) {
21705 my $item = $rlp_object_list->[$i];
21707 # only look for open items
21708 next if ( $item->get_closed() >= 0 );
21710 # Tentatively remove all of the available space
21711 # (The vertical aligner will try to get it back later)
21712 my $available_spaces = $item->get_available_spaces();
21713 if ( $available_spaces > 0 ) {
21715 # delete incremental space for this item
21716 $rlp_object_list->[$i]
21717 ->tentatively_decrease_available_spaces($available_spaces);
21719 # Reduce the total indentation space of any nodes that follow
21720 # Note that any such nodes must necessarily be dependents
21722 foreach ( $i + 1 .. $max_lp_object_list ) {
21723 $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
21729 } ## end closure set_lp_indentation
21731 #----------------------------------------------------------------------
21732 # sub to set a requested break before an opening container in -lp mode.
21733 #----------------------------------------------------------------------
21734 sub set_forced_lp_break {
21736 my ( $self, $i_begin_line, $i_opening ) = @_;
21739 # $i_begin_line = index of break in the _to_go arrays
21740 # $i_opening = index of the opening container
21742 # Set any requested break at a token before this opening container
21743 # token. This is often an '=' or '=>' but can also be things like
21744 # '.', ',', 'return'. It was defined by sub set_lp_indentation.
21747 # For intact containers, call this at the closing token.
21748 # For broken containers, call this at the opening token.
21749 # This will avoid needless breaks when it turns out that the
21750 # container does not actually get broken. This isn't known until
21751 # the closing container for intact blocks.
21754 if ( $i_begin_line < 0
21755 || $i_begin_line > $max_index_to_go );
21757 # Handle request to put a break break immediately before this token.
21758 # We may not want to do that since we are also breaking after it.
21759 if ( $i_begin_line == $i_opening ) {
21761 # The following rules should be reviewed. We may want to always
21762 # allow the break. If we do not do the break, the indentation
21765 # RULE: don't break before it unless it is welded to a qw.
21766 # This works well, but we may want to relax this to allow
21767 # breaks in additional cases.
21769 if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
21770 return unless ( $types_to_go[$max_index_to_go] eq 'q' );
21773 # Only break for breakpoints at the same
21774 # indentation level as the opening paren
21775 my $test1 = $nesting_depth_to_go[$i_opening];
21776 my $test2 = $nesting_depth_to_go[$i_begin_line];
21777 return if ( $test2 != $test1 );
21779 # Back up at a blank (fixes case b932)
21780 my $ibr = $i_begin_line - 1;
21782 && $types_to_go[$ibr] eq 'b' )
21787 my $i_nonblank = $self->set_forced_breakpoint($ibr);
21789 # Crude patch to prevent sub recombine_breakpoints from undoing
21790 # this break, especially after an '='. It will leave old
21791 # breakpoints alone. See c098/x045 for some examples.
21792 if ( defined($i_nonblank) ) {
21793 $old_breakpoint_to_go[$i_nonblank] = 1;
21799 sub reduce_lp_indentation {
21801 # reduce the leading whitespace at token $i if possible by $spaces_needed
21802 # (a large value of $spaces_needed will remove all excess space)
21803 # NOTE: to be called from break_lists only for a sequence of tokens
21804 # contained between opening and closing parens/braces/brackets
21806 my ( $self, $i, $spaces_wanted ) = @_;
21807 my $deleted_spaces = 0;
21809 my $item = $leading_spaces_to_go[$i];
21810 my $available_spaces = $item->get_available_spaces();
21813 $available_spaces > 0
21814 && ( ( $spaces_wanted <= $available_spaces )
21815 || !$item->get_have_child() )
21819 # we'll remove these spaces, but mark them as recoverable
21821 $item->tentatively_decrease_available_spaces($spaces_wanted);
21824 return $deleted_spaces;
21827 ###########################################################
21828 # CODE SECTION 13: Preparing batches for vertical alignment
21829 ###########################################################
21831 sub check_convey_batch_input {
21833 # Check for valid input to sub convey_batch_to_vertical_aligner. An
21834 # error here would most likely be due to an error in the calling
21835 # routine 'sub grind_batch_of_CODE'.
21836 my ( $self, $ri_first, $ri_last ) = @_;
21838 if ( !defined($ri_first) || !defined($ri_last) ) {
21840 Undefined line ranges ri_first and/r ri_last
21844 my $nmax = @{$ri_first} - 1;
21845 my $nmax_check = @{$ri_last} - 1;
21846 if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
21848 Line range index error: nmax=$nmax but nmax_check=$nmax_check
21849 These should be equal and >=0
21852 my ( $ibeg, $iend );
21853 foreach my $n ( 0 .. $nmax ) {
21854 my $ibeg_m = $ibeg;
21855 my $iend_m = $iend;
21856 $ibeg = $ri_first->[$n];
21857 $iend = $ri_last->[$n];
21858 if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
21860 Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
21861 These should have iend >= ibeg and be in the range (0..$max_index_to_go)
21864 next if ( $n == 0 );
21865 if ( $ibeg <= $iend_m ) {
21867 Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
21874 sub convey_batch_to_vertical_aligner {
21878 # This routine receives a batch of code for which the final line breaks
21879 # have been defined. Here we prepare the lines for passing to the vertical
21880 # aligner. We do the following tasks:
21881 # - mark certain vertical alignment tokens, such as '=', in each line
21882 # - make minor indentation adjustments
21883 # - do logical padding: insert extra blank spaces to help display certain
21884 # logical constructions
21886 my $this_batch = $self->[_this_batch_];
21887 my $ri_first = $this_batch->[_ri_first_];
21888 my $ri_last = $this_batch->[_ri_last_];
21890 $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
21892 my $n_last_line = @{$ri_first} - 1;
21894 my $do_not_pad = $this_batch->[_do_not_pad_];
21895 my $peak_batch_size = $this_batch->[_peak_batch_size_];
21896 my $starting_in_quote = $this_batch->[_starting_in_quote_];
21897 my $ending_in_quote = $this_batch->[_ending_in_quote_];
21898 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
21899 my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
21900 my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
21902 my $rLL = $self->[_rLL_];
21903 my $Klimit = $self->[_Klimit_];
21904 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
21905 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
21907 my $ibeg_next = $ri_first->[0];
21908 my $iend_next = $ri_last->[0];
21910 my $type_beg_next = $types_to_go[$ibeg_next];
21911 my $type_end_next = $types_to_go[$iend_next];
21912 my $token_beg_next = $tokens_to_go[$ibeg_next];
21914 my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#';
21916 my $rindentation_list = [0]; # ref to indentations for each line
21917 my ( $cscw_block_comment, $closing_side_comment );
21918 if ($rOpts_closing_side_comments) {
21919 ( $closing_side_comment, $cscw_block_comment ) =
21920 $self->add_closing_side_comment( $ri_first, $ri_last );
21923 # flush before a long if statement to avoid unwanted alignment
21924 if ( $n_last_line > 0
21925 && $type_beg_next eq 'k'
21926 && $token_beg_next =~ /^(if|unless)$/ )
21928 $self->flush_vertical_aligner();
21931 $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci )
21932 if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation );
21934 $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
21935 $starting_in_quote )
21936 if ( $n_last_line > 0 && $rOpts_logical_padding );
21938 if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
21940 # ----------------------------------------------------------
21941 # define the vertical alignments for all lines of this batch
21942 # ----------------------------------------------------------
21943 my $rline_alignments =
21944 $self->make_vertical_alignments( $ri_first, $ri_last );
21946 # ----------------------------------------------
21947 # loop to send each line to the vertical aligner
21948 # ----------------------------------------------
21949 my ( $type_beg, $token_beg );
21951 my ( $ibeg, $iend );
21952 for my $n ( 0 .. $n_last_line ) {
21954 # ----------------------------------------------------------------
21955 # This hash will hold the args for vertical alignment of this line
21956 # We will populate it as we go.
21957 # ----------------------------------------------------------------
21958 my $rvao_args = {};
21960 my $type_beg_last = $type_beg;
21961 my $type_end_last = $type_end;
21963 my $ibeg = $ibeg_next;
21964 my $iend = $iend_next;
21965 my $Kbeg = $K_to_go[$ibeg];
21966 my $Kend = $K_to_go[$iend];
21968 $type_beg = $type_beg_next;
21969 $type_end = $type_end_next;
21970 $token_beg = $token_beg_next;
21972 # ---------------------------------------------------
21973 # Define the check value 'Kend' to send for this line
21974 # ---------------------------------------------------
21975 # The 'Kend' value is an integer for checking that lines come out of
21976 # the far end of the pipeline in the right order. It increases
21977 # linearly along the token stream. But we only send ending K values of
21978 # non-comments down the pipeline. This is equivalent to checking that
21979 # the last CODE_type is blank or equal to 'VER'. See also sub
21980 # resync_lines_and_tokens for related coding. Note that
21981 # '$batch_CODE_type' is the code type of the line to which the ending
21984 $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
21986 # $ljump is a level jump needed by 'sub final_indentation_adjustment'
21989 # Get some vars on line [n+1], if any:
21990 if ( $n < $n_last_line ) {
21991 $ibeg_next = $ri_first->[ $n + 1 ];
21992 $iend_next = $ri_last->[ $n + 1 ];
21994 $type_beg_next = $types_to_go[$ibeg_next];
21995 $type_end_next = $types_to_go[$iend_next];
21996 $token_beg_next = $tokens_to_go[$ibeg_next];
21998 my $Kbeg_next = $K_to_go[$ibeg_next];
21999 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22001 elsif ( !$is_block_comment && $Kend < $Klimit ) {
22003 # Patch for git #51, a bare closing qw paren was not outdented
22004 # if the flag '-nodelete-old-newlines is set
22005 # Note that we are just looking ahead for the next nonblank
22006 # character. We could scan past an arbitrary number of block
22007 # comments or hanging side comments by calling K_next_code, but it
22008 # could add significant run time with very little to be gained.
22009 my $Kbeg_next = $Kend + 1;
22010 if ( $Kbeg_next < $Klimit
22011 && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
22016 $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22019 # ---------------------------------------------
22020 # get the vertical alignment info for this line
22021 # ---------------------------------------------
22023 # The lines are broken into fields which can be spaced by the vertical
22024 # to achieve vertical alignment. These fields are the actual text
22025 # which will be output, so from here on no more changes can be made to
22027 my $rline_alignment = $rline_alignments->[$n];
22028 my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
22029 @{$rline_alignment};
22031 # Programming check: (shouldn't happen)
22032 # The number of tokens which separate the fields must always be
22033 # one less than the number of fields. If this is not true then
22034 # an error has been introduced in sub make_alignment_patterns.
22036 if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
22037 my $nt = @{$rtokens};
22038 my $nf = @{$rfields};
22040 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
22041 The number of tokens = $nt should be one less than number of fields: $nf
22047 # --------------------------------------
22048 # get the final indentation of this line
22049 # --------------------------------------
22050 my ( $indentation, $lev, $level_end, $terminal_type,
22051 $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
22052 = $self->final_indentation_adjustment( $ibeg, $iend, $rfields,
22053 $rpatterns, $ri_first, $ri_last,
22054 $rindentation_list, $ljump, $starting_in_quote,
22055 $is_static_block_comment, );
22057 # --------------------------------
22058 # define flag 'outdent_long_lines'
22059 # --------------------------------
22061 # we will allow outdenting of long lines..
22062 # which are long quotes, if allowed
22063 ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
22065 # which are long block comments, if allowed
22068 && $rOpts_outdent_long_comments
22070 # but not if this is a static block comment
22071 && !$is_static_block_comment
22075 $rvao_args->{outdent_long_lines} = 1;
22077 # convert -lp indentation objects to spaces to allow outdenting
22078 if ( ref($indentation) ) {
22079 $indentation = $indentation->get_spaces();
22083 # --------------------------------------------------
22084 # define flags 'break_alignment_before' and '_after'
22085 # --------------------------------------------------
22087 # These flags tell the vertical aligner to stop alignment before or
22089 if ($is_outdented_line) {
22090 $rvao_args->{break_alignment_before} = 1;
22091 $rvao_args->{break_alignment_after} = 1;
22093 elsif ($do_not_pad) {
22094 $rvao_args->{break_alignment_before} = 1;
22097 # flush at an 'if' which follows a line with (1) terminal semicolon
22098 # or (2) terminal block_type which is not an 'if'. This prevents
22099 # unwanted alignment between the lines.
22100 elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
22105 my $Km = $Kbeg - 1;
22106 $type_m = $rLL->[$Km]->[_TYPE_];
22107 if ( $type_m eq 'b' && $Km > 0 ) {
22109 $type_m = $rLL->[$Km]->[_TYPE_];
22111 if ( $type_m eq '#' && $Km > 0 ) {
22113 $type_m = $rLL->[$Km]->[_TYPE_];
22114 if ( $type_m eq 'b' && $Km > 0 ) {
22116 $type_m = $rLL->[$Km]->[_TYPE_];
22120 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
22122 $block_type_m = $rblock_type_of_seqno->{$seqno_m};
22126 # break after anything that is not if-like
22129 || ( $type_m eq '}'
22131 && $block_type_m ne 'if'
22132 && $block_type_m ne 'unless'
22133 && $block_type_m ne 'elsif'
22134 && $block_type_m ne 'else' )
22137 $rvao_args->{break_alignment_before} = 1;
22141 # ----------------------------------
22142 # define 'rvertical_tightness_flags'
22143 # ----------------------------------
22144 # These flags tell the vertical aligner if/when to combine consecutive
22145 # lines, based on the user input parameters.
22146 $rvao_args->{rvertical_tightness_flags} =
22147 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
22148 $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
22149 if ( !$is_block_comment );
22151 # ----------------------------------
22152 # define 'is_terminal_ternary' flag
22153 # ----------------------------------
22155 # This flag is set at the final ':' of a ternary chain to request
22156 # vertical alignment of the final term. Here is a slightly complex
22159 # $self->{_text} = (
22161 # : $type eq 'item' ? "the $section entry"
22162 # : "the section on $section"
22166 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
22167 # : ' elsewhere in this document'
22170 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
22172 my $is_terminal_ternary = 0;
22173 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
22174 if ( $terminal_type ne ';'
22175 && $n_last_line > $n
22176 && $level_end == $lev )
22178 my $Kbeg_next = $K_to_go[$ibeg_next];
22179 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
22180 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
22183 $last_leading_type eq ':'
22184 && ( ( $terminal_type eq ';' && $level_end <= $lev )
22185 || ( $terminal_type ne ':' && $level_end < $lev ) )
22189 # the terminal term must not contain any ternary terms, as in
22191 # $Is_MSWin32 ? ".\\echo$$"
22192 # : $Is_MacOS ? ":echo$$"
22193 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
22195 $is_terminal_ternary = 1;
22197 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
22198 while ( defined($KP) && $KP <= $Kend ) {
22199 my $type_KP = $rLL->[$KP]->[_TYPE_];
22200 if ( $type_KP eq '?' || $type_KP eq ':' ) {
22201 $is_terminal_ternary = 0;
22204 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
22207 $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
22210 # -------------------------------------------------
22211 # add any new closing side comment to the last line
22212 # -------------------------------------------------
22213 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
22215 $rfields->[-1] .= " $closing_side_comment";
22217 # NOTE: Patch for csc. We can just use 1 for the length of the csc
22218 # because its length should not be a limiting factor from here on.
22219 $rfield_lengths->[-1] += 2;
22223 [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
22226 # ------------------------
22227 # define flag 'list_seqno'
22228 # ------------------------
22230 # This flag indicates if this line is contained in a multi-line list
22231 if ( !$is_block_comment ) {
22232 my $parent_seqno = $parent_seqno_to_go[$ibeg];
22233 $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
22236 # The alignment tokens have been marked with nesting_depths, so we need
22237 # to pass nesting depths to the vertical aligner. They remain invariant
22238 # under all formatting operations. Previously, level values were sent
22239 # to the aligner. But they can be altered in welding and other
22240 # opeartions, and this can lead to alignement errors.
22241 my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
22242 my $nesting_depth_end = $nesting_depth_to_go[$iend];
22244 # A quirk in the definition of nesting depths is that the closing token
22245 # has the same depth as internal tokens. The vertical aligner is
22246 # programmed to expect them to have the lower depth, so we fix this.
22247 if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
22248 if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
22250 # Adjust nesting depths to keep -lp indentation for qw lists. This is
22251 # required because qw lists contained in brackets do not get nesting
22252 # depths, but the vertical aligner is watching nesting depth changes to
22253 # decide if a -lp block is intact. Without this patch, qw lists
22254 # enclosed in angle brackets will not get the correct -lp indentation.
22256 # Looking for line with isolated qw ...
22257 if ( $rOpts_line_up_parentheses
22258 && $type_beg eq 'q'
22259 && $ibeg == $iend )
22262 # ... which is part of a multiline qw
22263 my $Km = $self->K_previous_nonblank($Kbeg);
22264 my $Kp = $self->K_next_nonblank($Kbeg);
22265 if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
22266 || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
22268 $nesting_depth_beg++;
22269 $nesting_depth_end++;
22273 # ---------------------------------
22274 # define flag 'forget_side_comment'
22275 # ---------------------------------
22277 # This flag tells the vertical aligner to reset the side comment
22278 # location if we are entering a new block from level 0. This is
22279 # intended to keep side comments from drifting too far to the right.
22280 if ( $terminal_block_type
22281 && $nesting_depth_end > $nesting_depth_beg )
22283 my $level_adj = $lev;
22284 my $radjusted_levels = $self->[_radjusted_levels_];
22285 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
22287 $level_adj = $radjusted_levels->[$Kbeg];
22288 if ( $level_adj < 0 ) { $level_adj = 0 }
22290 if ( $level_adj == 0 ) {
22291 $rvao_args->{forget_side_comment} = 1;
22295 # -----------------------------------
22296 # Store the remaining non-flag values
22297 # -----------------------------------
22298 $rvao_args->{Kend} = $Kend_code;
22299 $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
22300 $rvao_args->{indentation} = $indentation;
22301 $rvao_args->{level_end} = $nesting_depth_end;
22302 $rvao_args->{level} = $nesting_depth_beg;
22303 $rvao_args->{rline_alignment} = $rline_alignment;
22304 $rvao_args->{maximum_line_length} =
22305 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
22307 # --------------------------------------
22308 # send this line to the vertical aligner
22309 # --------------------------------------
22310 my $vao = $self->[_vertical_aligner_object_];
22311 $vao->valign_input($rvao_args);
22315 # Set flag indicating if this line ends in an opening
22316 # token and is very short, so that a blank line is not
22317 # needed if the subsequent line is a comment.
22318 # Examples of what we are looking for:
22324 $self->[_last_output_short_opening_token_]
22326 # line ends in opening token
22328 = $is_opening_type{$type_end}
22332 # line has either single opening token
22335 # or is a single token followed by opening token.
22336 # Note that sub identifiers have blanks like 'sub doit'
22337 # $token_beg !~ /\s+/
22338 || ( $Kend - $Kbeg <= 2 && index( $token_beg, ' ' ) < 0 )
22341 # and limit total to 10 character widths
22342 && token_sequence_length( $ibeg, $iend ) <= 10;
22344 } ## end of loop to output each line
22346 # remember indentation of lines containing opening containers for
22347 # later use by sub final_indentation_adjustment
22348 $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list )
22349 if ( !$is_block_comment );
22351 # output any new -cscw block comment
22352 if ($cscw_block_comment) {
22353 $self->flush_vertical_aligner();
22354 my $file_writer_object = $self->[_file_writer_object_];
22355 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
22360 sub check_batch_summed_lengths {
22362 my ( $self, $msg ) = @_;
22363 $msg = "" unless defined($msg);
22364 my $rLL = $self->[_rLL_];
22366 # Verify that the summed lengths are correct. We want to be sure that
22367 # errors have not been introduced by programming changes. Summed lengths
22368 # are defined in sub $store_token. Operations like padding and unmasking
22369 # semicolons can change token lengths, but those operations are expected to
22370 # update the summed lengths when they make changes. So the summed lengths
22371 # should always be correct.
22372 foreach my $i ( 0 .. $max_index_to_go ) {
22374 $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
22375 my $len_tok_i = $token_lengths_to_go[$i];
22376 my $KK = $K_to_go[$i];
22378 if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
22379 if ( $len_by_sum != $len_tok_i
22380 || defined($len_tok_K) && $len_by_sum != $len_tok_K )
22382 my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
22383 $KK = 'undef' unless defined($KK);
22384 my $tok = $tokens_to_go[$i];
22385 my $type = $types_to_go[$i];
22387 Summed lengths are appear to be incorrect. $msg
22388 lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
22389 near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
22396 { ## begin closure set_vertical_alignment_markers
22397 my %is_vertical_alignment_type;
22398 my %is_not_vertical_alignment_token;
22399 my %is_vertical_alignment_keyword;
22400 my %is_terminal_alignment_type;
22401 my %is_low_level_alignment_token;
22407 # Replaced =~ and // in the list. // had been removed in RT 119588
22409 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
22410 { ? : => && || ~~ !~~ =~ !~ // <=> ->
22412 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
22414 # These 'tokens' are not aligned. We need this to remove [
22415 # from the above list because it has type ='{'
22417 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
22419 # these are the only types aligned at a line end
22421 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
22423 # these tokens only align at line level
22425 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
22427 # eq and ne were removed from this list to improve alignment chances
22428 @q = qw(if unless and or err for foreach while until);
22429 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
22432 sub set_vertical_alignment_markers {
22434 # This routine takes the first step toward vertical alignment of the
22435 # lines of output text. It looks for certain tokens which can serve as
22436 # vertical alignment markers (such as an '=').
22438 # Method: We look at each token $i in this output batch and set
22439 # $ralignment_type_to_go->[$i] equal to those tokens at which we would
22440 # accept vertical alignment.
22442 my ( $self, $ri_first, $ri_last ) = @_;
22444 my $ralignment_type_to_go;
22445 my $ralignment_counts = [];
22446 my $ralignment_hash_by_line = [];
22448 # NOTE: closing side comments can insert up to 2 additional tokens
22449 # beyond the original $max_index_to_go, so we need to check ri_last for
22451 my $max_line = @{$ri_first} - 1;
22452 my $max_i = $ri_last->[$max_line];
22453 if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
22455 # -----------------------------------------------------------------
22457 # - no alignments if there is only 1 token.
22458 # - and nothing to do if we aren't allowed to change whitespace.
22459 # -----------------------------------------------------------------
22460 if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
22461 return ( $ralignment_type_to_go, $ralignment_counts,
22462 $ralignment_hash_by_line );
22465 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
22466 my $ris_function_call_paren = $self->[_ris_function_call_paren_];
22467 my $rLL = $self->[_rLL_];
22469 # -------------------------------
22470 # First handle any side comment.
22471 # -------------------------------
22472 my $i_terminal = $max_i;
22473 if ( $types_to_go[$max_i] eq '#' ) {
22475 # We know $max_i > 0 if we get here.
22477 if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
22481 my $token = $tokens_to_go[$max_i];
22482 my $KK = $K_to_go[$max_i];
22484 # Do not align various special side comments
22485 my $do_not_align = (
22487 # it is any specially marked side comment
22488 ( defined($KK) && $rspecial_side_comment_type->{$KK} )
22490 # or it is a static side comment
22491 || ( $rOpts->{'static-side-comments'}
22492 && $token =~ /$static_side_comment_pattern/ )
22494 # or a closing side comment
22495 || ( $types_to_go[$i_terminal] eq '}'
22496 && $tokens_to_go[$i_terminal] eq '}'
22497 && $token =~ /$closing_side_comment_prefix_pattern/ )
22500 # - For the specific combination -vc -nvsc, we put all side comments
22501 # at fixed locations. Note that we will lose hanging side comment
22502 # alignments. Otherwise, hsc's can move to strange locations.
22503 # - For -nvc -nvsc we make all side comments vertical alignments
22504 # because the vertical aligner will check for -nvsc and be able
22505 # to reduce the final padding to the side comments for long lines.
22506 # and keep hanging side comments aligned.
22507 if ( !$do_not_align
22508 && !$rOpts_valign_side_comments
22509 && $rOpts_valign_code )
22513 my $ipad = $max_i - 1;
22514 if ( $types_to_go[$ipad] eq 'b' ) {
22516 $rOpts->{'minimum-space-to-comment'} -
22517 $token_lengths_to_go[$ipad];
22518 $self->pad_token( $ipad, $pad_spaces );
22522 if ( !$do_not_align ) {
22523 $ralignment_type_to_go->[$max_i] = '#';
22524 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
22525 $ralignment_counts->[$max_line]++;
22529 # ----------------------------------------------
22530 # Nothing more to do on this line if -nvc is set
22531 # ----------------------------------------------
22532 if ( !$rOpts_valign_code ) {
22533 return ( $ralignment_type_to_go, $ralignment_counts,
22534 $ralignment_hash_by_line );
22537 # -------------------------------------
22538 # Loop over each line of this batch ...
22539 # -------------------------------------
22540 my $last_vertical_alignment_BEFORE_index;
22541 my $vert_last_nonblank_type;
22542 my $vert_last_nonblank_token;
22543 my $vert_last_nonblank_block_type;
22545 foreach my $line ( 0 .. $max_line ) {
22547 my $ibeg = $ri_first->[$line];
22548 my $iend = $ri_last->[$line];
22550 next if ( $iend <= $ibeg );
22552 # back up before any side comment
22553 if ( $iend > $i_terminal ) { $iend = $i_terminal }
22555 my $level_beg = $levels_to_go[$ibeg];
22556 my $token_beg = $tokens_to_go[$ibeg];
22557 my $type_beg = $types_to_go[$ibeg];
22558 my $type_beg_special_char =
22559 ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
22561 $last_vertical_alignment_BEFORE_index = -1;
22562 $vert_last_nonblank_type = $type_beg;
22563 $vert_last_nonblank_token = $token_beg;
22565 # ----------------------------------------------------------------
22566 # Initialization code merged from 'sub delete_needless_alignments'
22567 # ----------------------------------------------------------------
22568 my $i_good_paren = -1;
22569 my $i_elsif_close = $ibeg - 1;
22570 my $i_elsif_open = $iend + 1;
22572 if ( $type_beg eq 'k' ) {
22574 # Initialization for paren patch: mark a location of a paren we
22575 # should keep, such as one following something like a leading
22577 $i_good_paren = $ibeg + 1;
22578 if ( $types_to_go[$i_good_paren] eq 'b' ) {
22582 # Initializtion for 'elsif' patch: remember the paren range of
22583 # an elsif, and do not make alignments within them because this
22584 # can cause loss of padding and overall brace alignment in the
22585 # vertical aligner.
22586 if ( $token_beg eq 'elsif'
22587 && $i_good_paren < $iend
22588 && $tokens_to_go[$i_good_paren] eq '(' )
22590 $i_elsif_open = $i_good_paren;
22591 $i_elsif_close = $mate_index_to_go[$i_good_paren];
22593 } ## end if ( $type_beg eq 'k' )
22595 # --------------------------------------------
22596 # Loop over each token in this output line ...
22597 # --------------------------------------------
22598 foreach my $i ( $ibeg + 1 .. $iend ) {
22600 next if ( $types_to_go[$i] eq 'b' );
22602 my $type = $types_to_go[$i];
22603 my $token = $tokens_to_go[$i];
22604 my $alignment_type = '';
22606 # ----------------------------------------------
22607 # Check for 'paren patch' : Remove excess parens
22608 # ----------------------------------------------
22610 # Excess alignment of parens can prevent other good alignments.
22611 # For example, note the parens in the first two rows of the
22612 # following snippet. They would normally get marked for
22613 # alignment and aligned as follows:
22615 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
22616 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
22617 # my $img = new Gimp::Image( $w, $h, RGB );
22619 # This causes unnecessary paren alignment and prevents the
22620 # third equals from aligning. If we remove the unwanted
22621 # alignments we get:
22623 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
22624 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
22625 # my $img = new Gimp::Image( $w, $h, RGB );
22627 # A rule for doing this which works well is to remove alignment
22628 # of parens whose containers do not contain other aligning
22629 # tokens, with the exception that we always keep alignment of
22630 # the first opening paren on a line (for things like 'if' and
22631 # 'elsif' statements).
22632 if ( $token eq ')' && @imatch_list ) {
22634 # undo the corresponding opening paren if:
22635 # - it is at the top of the stack
22636 # - and not the first overall opening paren
22637 # - does not follow a leading keyword on this line
22638 my $imate = $mate_index_to_go[$i];
22639 if ( $imatch_list[-1] eq $imate
22640 && ( $ibeg > 1 || @imatch_list > 1 )
22641 && $imate > $i_good_paren )
22643 if ( $ralignment_type_to_go->[$imate] ) {
22644 $ralignment_type_to_go->[$imate] = '';
22645 $ralignment_counts->[$line]--;
22646 delete $ralignment_hash_by_line->[$line]->{$imate};
22652 # do not align tokens at lower level than start of line
22653 # except for side comments
22654 if ( $levels_to_go[$i] < $level_beg ) {
22658 #--------------------------------------------------------
22659 # First see if we want to align BEFORE this token
22660 #--------------------------------------------------------
22662 # The first possible token that we can align before
22663 # is index 2 because: 1) it doesn't normally make sense to
22664 # align before the first token and 2) the second
22665 # token must be a blank if we are to align before
22667 if ( $i < $ibeg + 2 ) { }
22669 # must follow a blank token
22670 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
22672 # otherwise, do not align two in a row to create a
22674 elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
22676 # align before one of these keywords
22677 # (within a line, since $i>1)
22678 elsif ( $type eq 'k' ) {
22680 # /^(if|unless|and|or|eq|ne)$/
22681 if ( $is_vertical_alignment_keyword{$token} ) {
22682 $alignment_type = $token;
22686 # align before one of these types..
22687 elsif ( $is_vertical_alignment_type{$type}
22688 && !$is_not_vertical_alignment_token{$token} )
22690 $alignment_type = $token;
22692 # Do not align a terminal token. Although it might
22693 # occasionally look ok to do this, this has been found to be
22694 # a good general rule. The main problems are:
22695 # (1) that the terminal token (such as an = or :) might get
22696 # moved far to the right where it is hard to see because
22697 # nothing follows it, and
22698 # (2) doing so may prevent other good alignments.
22699 # Current exceptions are && and || and =>
22700 if ( $i == $iend ) {
22701 $alignment_type = ""
22702 unless ( $is_terminal_alignment_type{$type} );
22705 # Do not align leading ': (' or '. ('. This would prevent
22706 # alignment in something like the following:
22708 # ( $input_line_number < 10 ) ? " "
22709 # : ( $input_line_number < 100 ) ? " "
22713 # ( $case_matters ? $accessor : " lc($accessor) " )
22714 # . ( $yesno ? " eq " : " ne " )
22716 # Also, do not align a ( following a leading ? so we can
22717 # align something like this:
22718 # $converter{$_}->{ushortok} =
22719 # $PDL::IO::Pic::biggrays
22720 # ? ( m/GIF/ ? 0 : 1 )
22721 # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
22722 if ( $type_beg_special_char
22724 && $types_to_go[ $i - 1 ] eq 'b' )
22726 $alignment_type = "";
22729 # Certain tokens only align at the same level as the
22730 # initial line level
22731 if ( $is_low_level_alignment_token{$token}
22732 && $levels_to_go[$i] != $level_beg )
22734 $alignment_type = "";
22737 # For a paren after keyword, only align something like this:
22739 # elsif ( $b ) { &b }
22740 if ( $token eq '(' ) {
22742 if ( $vert_last_nonblank_type eq 'k' ) {
22743 $alignment_type = ""
22744 unless $vert_last_nonblank_token =~
22745 /^(if|unless|elsif)$/;
22748 # Do not align a spaced-function-paren if requested.
22749 # Issue git #53, #73.
22750 if ( !$rOpts_function_paren_vertical_alignment ) {
22751 my $seqno = $type_sequence_to_go[$i];
22752 if ( $ris_function_call_paren->{$seqno} ) {
22753 $alignment_type = "";
22758 # be sure the alignment tokens are unique
22759 # This didn't work well: reason not determined
22760 # if ($token ne $type) {$alignment_type .= $type}
22763 # NOTE: This is deactivated because it causes the previous
22764 # if/elsif alignment to fail
22765 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
22766 #{ $alignment_type = $type; }
22768 if ($alignment_type) {
22769 $last_vertical_alignment_BEFORE_index = $i;
22772 #--------------------------------------------------------
22773 # Next see if we want to align AFTER the previous nonblank
22774 #--------------------------------------------------------
22776 # We want to line up ',' and interior ';' tokens, with the added
22777 # space AFTER these tokens. (Note: interior ';' is included
22778 # because it may occur in short blocks).
22781 # we haven't already set it
22784 # previous token IS one of these:
22786 $vert_last_nonblank_type eq ','
22787 || $vert_last_nonblank_type eq ';'
22790 # and its not the first token of the line
22793 # and it follows a blank
22794 && $types_to_go[ $i - 1 ] eq 'b'
22796 # and it's NOT one of these
22797 && !$is_closing_token{$type}
22799 # then go ahead and align
22803 $alignment_type = $vert_last_nonblank_type;
22806 #-----------------------
22807 # Set the alignment type
22808 #-----------------------
22809 if ($alignment_type) {
22811 # but do not align the opening brace of an anonymous sub
22813 && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
22818 # and do not make alignments within 'elsif' parens
22819 elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
22823 # and ignore any tokens which have leading padded spaces
22824 # example: perl527/lop.t
22825 elsif ( substr( $alignment_type, 0, 1 ) eq ' ' ) {
22830 $ralignment_type_to_go->[$i] = $alignment_type;
22831 $ralignment_hash_by_line->[$line]->{$i} =
22833 $ralignment_counts->[$line]++;
22834 push @imatch_list, $i;
22838 $vert_last_nonblank_type = $type;
22839 $vert_last_nonblank_token = $token;
22843 return ( $ralignment_type_to_go, $ralignment_counts,
22844 $ralignment_hash_by_line );
22845 } ## end sub set_vertical_alignment_markers
22846 } ## end closure set_vertical_alignment_markers
22848 sub make_vertical_alignments {
22849 my ( $self, $ri_first, $ri_last ) = @_;
22851 #----------------------------
22852 # Shortcut for a single token
22853 #----------------------------
22854 if ( $max_index_to_go == 0 ) {
22855 if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
22857 my $rfields = [ $tokens_to_go[0] ];
22858 my $rpatterns = [ $types_to_go[0] ];
22859 my $rfield_lengths =
22860 [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
22861 return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
22864 # Strange line packing, not fatal but should not happen
22865 elsif (DEVEL_MODE) {
22866 my $max_line = @{$ri_first} - 1;
22867 my $ibeg = $ri_first->[0];
22868 my $iend = $ri_last->[0];
22869 my $tok_b = $tokens_to_go[$ibeg];
22870 my $tok_e = $tokens_to_go[$iend];
22871 my $type_b = $types_to_go[$ibeg];
22872 my $type_e = $types_to_go[$iend];
22874 "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
22879 #---------------------------------------------------------
22880 # Step 1: Define the alignment tokens for the entire batch
22881 #---------------------------------------------------------
22882 my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
22883 = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
22885 #----------------------------------------------
22886 # Step 2: Break each line into alignment fields
22887 #----------------------------------------------
22888 my $rline_alignments = [];
22889 my $max_line = @{$ri_first} - 1;
22890 foreach my $line ( 0 .. $max_line ) {
22892 my $ibeg = $ri_first->[$line];
22893 my $iend = $ri_last->[$line];
22895 my $rtok_fld_pat_len = $self->make_alignment_patterns(
22896 $ibeg, $iend, $ralignment_type_to_go,
22897 $ralignment_counts->[$line],
22898 $ralignment_hash_by_line->[$line]
22900 push @{$rline_alignments}, $rtok_fld_pat_len;
22902 return $rline_alignments;
22903 } ## end sub make_vertical_alignments
22907 # get opening and closing sequence numbers of a token for the vertical
22908 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
22909 # to be treated somewhat like opening and closing tokens for stacking
22910 # tokens by the vertical aligner.
22911 my ( $self, $ii, $ending_in_quote ) = @_;
22913 my $rLL = $self->[_rLL_];
22915 my $KK = $K_to_go[$ii];
22916 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
22918 if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
22920 my $token = $rLL->[$KK]->[_TOKEN_];
22922 $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
22925 if ( !$ending_in_quote ) {
22926 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
22934 my %undo_extended_ci;
22936 sub initialize_undo_ci {
22937 %undo_extended_ci = ();
22943 # Undo continuation indentation in certain sequences
22944 my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
22945 my ( $line_1, $line_2, $lev_last );
22946 my $this_line_is_semicolon_terminated;
22947 my $max_line = @{$ri_first} - 1;
22949 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
22951 # Prepare a list of controlling indexes for each line if required.
22952 # This is used for efficient processing below. Note: this is
22953 # critical for speed. In the initial implementation I just looped
22954 # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
22955 # found that this routine was causing a huge run time in large lists.
22956 # On a very large list test case, this new coding dropped the run time
22957 # of this routine from 30 seconds to 169 milliseconds.
22958 my @i_controlling_ci;
22959 if ( @{$rix_seqno_controlling_ci} ) {
22960 my @tmp = reverse @{$rix_seqno_controlling_ci};
22961 my $ix_next = pop @tmp;
22962 foreach my $line ( 0 .. $max_line ) {
22963 my $iend = $ri_last->[$line];
22964 while ( defined($ix_next) && $ix_next <= $iend ) {
22965 push @{ $i_controlling_ci[$line] }, $ix_next;
22966 $ix_next = pop @tmp;
22971 # Loop over all lines of the batch ...
22973 # Workaround originally created for problem c007, in which the
22974 # combination -lp -xci could produce a "Program bug" message in unusual
22976 my $skip_SECTION_1;
22977 if ( $rOpts_line_up_parentheses
22978 && $rOpts_extended_continuation_indentation )
22981 # Only set this flag if -lp is actually used here
22982 foreach my $line ( 0 .. $max_line ) {
22983 my $ibeg = $ri_first->[$line];
22984 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
22985 $skip_SECTION_1 = 1;
22991 foreach my $line ( 0 .. $max_line ) {
22993 my $ibeg = $ri_first->[$line];
22994 my $iend = $ri_last->[$line];
22995 my $lev = $levels_to_go[$ibeg];
22997 #-----------------------------------
22998 # SECTION 1: Undo needless common CI
22999 #-----------------------------------
23001 # We are looking at leading tokens and looking for a sequence all
23002 # at the same level and all at a higher level than enclosing lines.
23004 # For example, we can undo continuation indentation in sort/map/grep
23007 # my $dat1 = pack( "n*",
23008 # map { $_, $lookup->{$_} }
23009 # sort { $a <=> $b }
23010 # grep { $lookup->{$_} ne $default } keys %$lookup );
23014 # my $dat1 = pack( "n*",
23015 # map { $_, $lookup->{$_} }
23016 # sort { $a <=> $b }
23017 # grep { $lookup->{$_} ne $default } keys %$lookup );
23019 if ( $line > 0 && !$skip_SECTION_1 ) {
23021 # if we have started a chain..
23024 # see if it continues..
23025 if ( $lev == $lev_last ) {
23026 if ( $types_to_go[$ibeg] eq 'k'
23027 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23030 # chain continues...
23031 # check for chain ending at end of a statement
23032 if ( $line == $max_line ) {
23034 # see of this line ends a statement
23035 $this_line_is_semicolon_terminated =
23036 $types_to_go[$iend] eq ';'
23038 # with possible side comment
23039 || ( $types_to_go[$iend] eq '#'
23040 && $iend - $ibeg >= 2
23041 && $types_to_go[ $iend - 2 ] eq ';'
23042 && $types_to_go[ $iend - 1 ] eq 'b' );
23045 if ($this_line_is_semicolon_terminated);
23053 elsif ( $lev < $lev_last ) {
23055 # chain ends with previous line
23056 $line_2 = $line - 1;
23058 elsif ( $lev > $lev_last ) {
23064 # undo the continuation indentation if a chain ends
23065 if ( defined($line_2) && defined($line_1) ) {
23066 my $continuation_line_count = $line_2 - $line_1 + 1;
23067 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
23068 = (0) x ($continuation_line_count)
23069 if ( $continuation_line_count >= 0 );
23070 @leading_spaces_to_go[ @{$ri_first}
23071 [ $line_1 .. $line_2 ] ] =
23072 @reduced_spaces_to_go[ @{$ri_first}
23073 [ $line_1 .. $line_2 ] ];
23078 # not in a chain yet..
23081 # look for start of a new sort/map/grep chain
23082 if ( $lev > $lev_last ) {
23083 if ( $types_to_go[$ibeg] eq 'k'
23084 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23092 #-------------------------------------
23093 # SECTION 2: Undo ci at cuddled blocks
23094 #-------------------------------------
23096 # Note that sub final_indentation_adjustment will be called later to
23097 # actually do this, but for now we will tentatively mark cuddled
23098 # lines with ci=0 so that the the -xci loop which follows will be
23099 # correct at cuddles.
23101 $types_to_go[$ibeg] eq '}'
23102 && ( $nesting_depth_to_go[$iend] + 1 ==
23103 $nesting_depth_to_go[$ibeg] )
23106 my $terminal_type = $types_to_go[$iend];
23107 if ( $terminal_type eq '#' && $iend > $ibeg ) {
23108 $terminal_type = $types_to_go[ $iend - 1 ];
23109 if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
23110 $terminal_type = $types_to_go[ $iend - 2 ];
23113 if ( $terminal_type eq '{' ) {
23114 my $Kbeg = $K_to_go[$ibeg];
23115 $ci_levels_to_go[$ibeg] = 0;
23119 #--------------------------------------------------------
23120 # SECTION 3: Undo ci set by sub extended_ci if not needed
23121 #--------------------------------------------------------
23123 # Undo the ci of the leading token if its controlling token
23124 # went out on a previous line without ci
23125 if ( $ci_levels_to_go[$ibeg] ) {
23126 my $Kbeg = $K_to_go[$ibeg];
23127 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
23128 if ( $seqno && $undo_extended_ci{$seqno} ) {
23130 # but do not undo ci set by the -lp flag
23131 if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
23132 $ci_levels_to_go[$ibeg] = 0;
23133 $leading_spaces_to_go[$ibeg] =
23134 $reduced_spaces_to_go[$ibeg];
23139 # Flag any controlling opening tokens in lines without ci. This
23140 # will be used later in the above if statement to undo the ci which
23141 # they added. The array i_controlling_ci[$line] was prepared at
23142 # the top of this routine.
23143 if ( !$ci_levels_to_go[$ibeg]
23144 && defined( $i_controlling_ci[$line] ) )
23146 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
23147 my $seqno = $type_sequence_to_go[$i];
23148 $undo_extended_ci{$seqno} = 1;
23159 { ## begin closure set_logical_padding
23164 my @q = qw( + - * / );
23165 @is_math_op{@q} = (1) x scalar(@q);
23168 sub set_logical_padding {
23170 # Look at a batch of lines and see if extra padding can improve the
23171 # alignment when there are certain leading operators. Here is an
23172 # example, in which some extra space is introduced before
23173 # '( $year' to make it line up with the subsequent lines:
23175 # if ( ( $Year < 1601 )
23176 # || ( $Year > 2899 )
23177 # || ( $EndYear < 1601 )
23178 # || ( $EndYear > 2899 ) )
23180 # &Error_OutOfRange;
23183 my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
23185 my $max_line = @{$ri_first} - 1;
23187 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
23188 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
23190 # Patch to produce padding in the first line of short code blocks.
23191 # This is part of an update to fix cases b562 .. b983.
23192 # This is needed to compensate for a change which was made in 'sub
23193 # starting_one_line_block' to prevent blinkers. Previously, that sub
23194 # would not look at the total block size and rely on sub
23195 # break_long_lines to break up long blocks. Consequently, the
23196 # first line of those batches would end in the opening block brace of a
23197 # sort/map/grep/eval block. When this was changed to immediately check
23198 # for blocks which were too long, the opening block brace would go out
23199 # in a single batch, and the block contents would go out as the next
23200 # batch. This caused the logic in this routine which decides if the
23201 # first line should be padded to be incorrect. To fix this, we set a
23202 # flag if the previous batch ended in an opening sort/map/grep/eval
23203 # block brace, and use it to adjust the logic to compensate.
23205 # For example, the following would have previously been a single batch
23206 # but now is two batches. We want to pad the line starting in '$dir':
23207 # my (@indices) = # batch n-1 (prev batch n)
23208 # sort { # batch n-1 (prev batch n)
23209 # $dir eq 'left' # batch n
23210 # ? $cells[$a] <=> $cells[$b] # batch n
23211 # : $cells[$b] <=> $cells[$a]; # batch n
23212 # } ( 0 .. $#cells ); # batch n
23214 my $rLL = $self->[_rLL_];
23215 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
23217 my $is_short_block;
23218 if ( $K_to_go[0] > 0 ) {
23219 my $Kp = $K_to_go[0] - 1;
23220 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23223 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
23225 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23229 my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
23231 my $block_type = $rblock_type_of_seqno->{$seqno};
23233 $is_short_block = $is_sort_map_grep_eval{$block_type};
23234 $is_short_block ||= $want_one_line_block{$block_type};
23239 # looking at each line of this batch..
23240 foreach my $line ( 0 .. $max_line - 1 ) {
23242 # see if the next line begins with a logical operator
23243 $ibeg = $ri_first->[$line];
23244 $iend = $ri_last->[$line];
23245 $ibeg_next = $ri_first->[ $line + 1 ];
23246 $tok_next = $tokens_to_go[$ibeg_next];
23247 $type_next = $types_to_go[$ibeg_next];
23249 $has_leading_op_next = ( $tok_next =~ /^\w/ )
23250 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
23251 : $is_chain_operator{$type_next}; # and, or
23253 next unless ($has_leading_op_next);
23255 # next line must not be at lesser depth
23257 if ( $nesting_depth_to_go[$ibeg] >
23258 $nesting_depth_to_go[$ibeg_next] );
23260 # identify the token in this line to be padded on the left
23263 # handle lines at same depth...
23264 if ( $nesting_depth_to_go[$ibeg] ==
23265 $nesting_depth_to_go[$ibeg_next] )
23268 # if this is not first line of the batch ...
23271 # and we have leading operator..
23272 next if $has_leading_op;
23274 # Introduce padding if..
23275 # 1. the previous line is at lesser depth, or
23276 # 2. the previous line ends in an assignment
23277 # 3. the previous line ends in a 'return'
23278 # 4. the previous line ends in a comma
23279 # Example 1: previous line at lesser depth
23280 # if ( ( $Year < 1601 ) # <- we are here but
23281 # || ( $Year > 2899 ) # list has not yet
23282 # || ( $EndYear < 1601 ) # collapsed vertically
23283 # || ( $EndYear > 2899 ) )
23286 # Example 2: previous line ending in assignment:
23288 # $year % 4 ? 0 # <- We are here
23289 # : $year % 100 ? 1
23290 # : $year % 400 ? 0
23293 # Example 3: previous line ending in comma:
23300 # be sure levels agree (do not indent after an indented 'if')
23302 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
23304 # allow padding on first line after a comma but only if:
23305 # (1) this is line 2 and
23306 # (2) there are at more than three lines and
23307 # (3) lines 3 and 4 have the same leading operator
23308 # These rules try to prevent padding within a long
23309 # comma-separated list.
23311 if ( $types_to_go[$iendm] eq ','
23315 my $ibeg_next_next = $ri_first->[ $line + 2 ];
23316 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
23317 $ok_comma = $tok_next_next eq $tok_next;
23322 $is_assignment{ $types_to_go[$iendm] }
23324 || ( $nesting_depth_to_go[$ibegm] <
23325 $nesting_depth_to_go[$ibeg] )
23326 || ( $types_to_go[$iendm] eq 'k'
23327 && $tokens_to_go[$iendm] eq 'return' )
23330 # we will add padding before the first token
23334 # for first line of the batch..
23337 # WARNING: Never indent if first line is starting in a
23338 # continued quote, which would change the quote.
23339 next if $starting_in_quote;
23341 # if this is text after closing '}'
23342 # then look for an interior token to pad
23343 if ( $types_to_go[$ibeg] eq '}' ) {
23347 # otherwise, we might pad if it looks really good
23348 elsif ($is_short_block) {
23353 # we might pad token $ibeg, so be sure that it
23354 # is at the same depth as the next line.
23356 if ( $nesting_depth_to_go[$ibeg] !=
23357 $nesting_depth_to_go[$ibeg_next] );
23359 # We can pad on line 1 of a statement if at least 3
23360 # lines will be aligned. Otherwise, it
23361 # can look very confusing.
23363 # We have to be careful not to pad if there are too few
23364 # lines. The current rule is:
23365 # (1) in general we require at least 3 consecutive lines
23366 # with the same leading chain operator token,
23367 # (2) but an exception is that we only require two lines
23368 # with leading colons if there are no more lines. For example,
23369 # the first $i in the following snippet would get padding
23370 # by the second rule:
23372 # $i == 1 ? ( "First", "Color" )
23373 # : $i == 2 ? ( "Then", "Rarity" )
23374 # : ( "Then", "Name" );
23376 if ( $max_line > 1 ) {
23377 my $leading_token = $tokens_to_go[$ibeg_next];
23380 # never indent line 1 of a '.' series because
23381 # previous line is most likely at same level.
23382 # TODO: we should also look at the leading_spaces
23383 # of the last output line and skip if it is same
23385 next if ( $leading_token eq '.' );
23388 foreach my $l ( 2 .. 3 ) {
23389 last if ( $line + $l > $max_line );
23390 my $ibeg_next_next = $ri_first->[ $line + $l ];
23391 if ( $tokens_to_go[$ibeg_next_next] ne
23394 $tokens_differ = 1;
23399 next if ($tokens_differ);
23400 next if ( $count < 3 && $leading_token ne ':' );
23410 # find interior token to pad if necessary
23411 if ( !defined($ipad) ) {
23413 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
23415 # find any unclosed container
23417 unless ( $type_sequence_to_go[$i]
23418 && $mate_index_to_go[$i] > $iend );
23420 # find next nonblank token to pad
23421 $ipad = $inext_to_go[$i];
23422 last if ( $ipad > $iend );
23427 # We cannot pad the first leading token of a file because
23428 # it could cause a bug in which the starting indentation
23429 # level is guessed incorrectly each time the code is run
23430 # though perltidy, thus causing the code to march off to
23431 # the right. For example, the following snippet would have
23434 ## ov_method mycan( $package, '(""' ), $package
23435 ## or ov_method mycan( $package, '(0+' ), $package
23436 ## or ov_method mycan( $package, '(bool' ), $package
23437 ## or ov_method mycan( $package, '(nomethod' ), $package;
23439 # If this snippet is within a block this won't happen
23440 # unless the user just processes the snippet alone within
23441 # an editor. In that case either the user will see and
23442 # fix the problem or it will be corrected next time the
23443 # entire file is processed with perltidy.
23444 next if ( $ipad == 0 && $peak_batch_size <= 1 );
23446 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
23447 ## IT DID MORE HARM THAN GOOD
23449 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
23452 ##? # do not put leading padding for just 2 lines of math
23453 ##? if ( $ipad == $ibeg
23455 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
23456 ##? && $is_math_op{$type_next}
23457 ##? && $line + 2 <= $max_line )
23459 ##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
23460 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
23461 ##? next if !$is_math_op{$type_next_next};
23464 # next line must not be at greater depth
23465 my $iend_next = $ri_last->[ $line + 1 ];
23467 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
23468 $nesting_depth_to_go[$ipad] );
23470 # lines must be somewhat similar to be padded..
23471 my $inext_next = $inext_to_go[$ibeg_next];
23472 my $type = $types_to_go[$ipad];
23473 my $type_next = $types_to_go[ $ipad + 1 ];
23475 # see if there are multiple continuation lines
23476 my $logical_continuation_lines = 1;
23477 if ( $line + 2 <= $max_line ) {
23478 my $leading_token = $tokens_to_go[$ibeg_next];
23479 my $ibeg_next_next = $ri_first->[ $line + 2 ];
23480 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
23481 && $nesting_depth_to_go[$ibeg_next] eq
23482 $nesting_depth_to_go[$ibeg_next_next] )
23484 $logical_continuation_lines++;
23488 # see if leading types match
23489 my $types_match = $types_to_go[$inext_next] eq $type;
23490 my $matches_without_bang;
23492 # if first line has leading ! then compare the following token
23493 if ( !$types_match && $type eq '!' ) {
23494 $types_match = $matches_without_bang =
23495 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
23499 # either we have multiple continuation lines to follow
23500 # and we are not padding the first token
23502 $logical_continuation_lines > 1
23503 && ( $ipad > 0 || $is_short_block )
23512 # and keywords must match if keyword
23515 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
23521 #----------------------begin special checks--------------
23524 # A check is needed before we can make the pad.
23525 # If we are in a list with some long items, we want each
23526 # item to stand out. So in the following example, the
23527 # first line beginning with '$casefold->' would look good
23528 # padded to align with the next line, but then it
23529 # would be indented more than the last line, so we
23533 # $casefold->{code} eq '0041'
23534 # && $casefold->{status} eq 'C'
23535 # && $casefold->{mapping} eq '0061',
23540 # It would be faster, and almost as good, to use a comma
23541 # count, and not pad if comma_count > 1 and the previous
23542 # line did not end with a comma.
23546 my $ibg = $ri_first->[ $line + 1 ];
23547 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
23549 # just use simplified formula for leading spaces to avoid
23550 # needless sub calls
23551 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
23553 # look at each line beyond the next ..
23555 foreach my $ltest ( $line + 2 .. $max_line ) {
23557 my $ibg = $ri_first->[$l];
23559 # quit looking at the end of this container
23561 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
23562 || ( $nesting_depth_to_go[$ibg] < $depth );
23564 # cannot do the pad if a later line would be
23566 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
23572 # don't pad if we end in a broken list
23573 if ( $l == $max_line ) {
23574 my $i2 = $ri_last->[$l];
23575 if ( $types_to_go[$i2] eq '#' ) {
23576 my $i1 = $ri_first->[$l];
23577 next if terminal_type_i( $i1, $i2 ) eq ',';
23582 # a minus may introduce a quoted variable, and we will
23583 # add the pad only if this line begins with a bare word,
23584 # such as for the word 'Button' here:
23586 # Button => "Print letter \"~$_\"",
23587 # -command => [ sub { print "$_[0]\n" }, $_ ],
23588 # -accelerator => "Meta+$_"
23591 # On the other hand, if 'Button' is quoted, it looks best
23594 # 'Button' => "Print letter \"~$_\"",
23595 # -command => [ sub { print "$_[0]\n" }, $_ ],
23596 # -accelerator => "Meta+$_"
23598 if ( $types_to_go[$ibeg_next] eq 'm' ) {
23599 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
23602 next unless $ok_to_pad;
23604 #----------------------end special check---------------
23606 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
23607 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
23608 $pad_spaces = $length_2 - $length_1;
23610 # If the first line has a leading ! and the second does
23611 # not, then remove one space to try to align the next
23612 # leading characters, which are often the same. For example:
23614 # || $ts == $self->Holder
23615 # || $self->Holder->Type eq "Arena" )
23617 # This usually helps readability, but if there are subsequent
23618 # ! operators things will still get messed up. For example:
23620 # if ( !exists $Net::DNS::typesbyname{$qtype}
23621 # && exists $Net::DNS::classesbyname{$qtype}
23622 # && !exists $Net::DNS::classesbyname{$qclass}
23623 # && exists $Net::DNS::typesbyname{$qclass} )
23624 # We can't fix that.
23625 if ($matches_without_bang) { $pad_spaces-- }
23627 # make sure this won't change if -lp is used
23628 my $indentation_1 = $leading_spaces_to_go[$ibeg];
23629 if ( ref($indentation_1)
23630 && $indentation_1->get_recoverable_spaces() == 0 )
23632 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
23633 if ( ref($indentation_2)
23634 && $indentation_2->get_recoverable_spaces() != 0 )
23640 # we might be able to handle a pad of -1 by removing a blank
23642 if ( $pad_spaces < 0 ) {
23644 # Deactivated for -kpit due to conflict. This block deletes
23645 # a space in an attempt to improve alignment in some cases,
23646 # but it may conflict with user spacing requests. For now
23647 # it is just deactivated if the -kpit option is used.
23648 if ( $pad_spaces == -1 ) {
23650 && $types_to_go[ $ipad - 1 ] eq 'b'
23651 && !%keyword_paren_inner_tightness )
23653 $self->pad_token( $ipad - 1, $pad_spaces );
23659 # now apply any padding for alignment
23660 if ( $ipad >= 0 && $pad_spaces ) {
23662 my $length_t = total_line_length( $ibeg, $iend );
23663 if ( $pad_spaces + $length_t <=
23664 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
23666 $self->pad_token( $ipad, $pad_spaces );
23674 $has_leading_op = $has_leading_op_next;
23675 } ## end of loop over lines
23678 } ## end closure set_logical_padding
23682 # insert $pad_spaces before token number $ipad
23683 my ( $self, $ipad, $pad_spaces ) = @_;
23684 my $rLL = $self->[_rLL_];
23685 my $KK = $K_to_go[$ipad];
23686 my $tok = $rLL->[$KK]->[_TOKEN_];
23687 my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
23689 if ( $pad_spaces > 0 ) {
23690 $tok = ' ' x $pad_spaces . $tok;
23691 $tok_len += $pad_spaces;
23693 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
23703 $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
23704 $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
23706 $token_lengths_to_go[$ipad] += $pad_spaces;
23707 $tokens_to_go[$ipad] = $tok;
23709 foreach my $i ( $ipad .. $max_index_to_go ) {
23710 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
23715 { ## begin closure make_alignment_patterns
23720 my %is_my_local_our;
23723 my %is_binary_type;
23724 my %is_binary_keyword;
23729 # Note: %block_type_map is now global to enable the -gal=s option
23731 # map certain keywords to the same 'if' class to align
23732 # long if/elsif sequences. [elsif.pl]
23738 'default' => 'given',
23739 'case' => 'switch',
23741 # treat an 'undef' similar to numbers and quotes
23745 # map certain operators to the same class for pattern matching
23760 # leading keywords which to skip for efficiency when making parenless
23762 my @q = qw( my local our return );
23763 @{is_my_local_our}{@q} = (1) x scalar(@q);
23765 # leading keywords where we should just join one token to form
23768 @{is_use_like}{@q} = (1) x scalar(@q);
23770 # leading token types which may be used to make a container name
23772 @{is_kwU}{@q} = (1) x scalar(@q);
23774 # token types which prevent using leading word as a container name
23776 x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
23777 &= // >> ~. &. |. ^.
23778 **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
23781 @{is_binary_type}{@q} = (1) x scalar(@q);
23783 # token keywords which prevent using leading word as a container name
23784 @_ = qw(and or err eq ne cmp);
23785 @is_binary_keyword{@_} = (1) x scalar(@_);
23787 # Some common function calls whose args can be aligned. These do not
23788 # give good alignments if the lengths differ significantly.
23790 'unlike' => 'like',
23792 ##'is_deeply' => 'is', # poor; names lengths too different
23797 sub make_alignment_patterns {
23799 # Here we do some important preliminary work for the
23800 # vertical aligner. We create four arrays for one
23801 # output line. These arrays contain strings that can
23802 # be tested by the vertical aligner to see if
23803 # consecutive lines can be aligned vertically.
23805 # The four arrays are indexed on the vertical
23806 # alignment fields and are:
23807 # @tokens - a list of any vertical alignment tokens for this line.
23808 # These are tokens, such as '=' '&&' '#' etc which
23809 # we want to might align vertically. These are
23810 # decorated with various information such as
23811 # nesting depth to prevent unwanted vertical
23812 # alignment matches.
23813 # @fields - the actual text of the line between the vertical alignment
23815 # @patterns - a modified list of token types, one for each alignment
23816 # field. These should normally each match before alignment is
23817 # allowed, even when the alignment tokens match.
23818 # @field_lengths - the display width of each field
23820 my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
23824 # The var $ralignment_hash contains all of the alignments for this
23825 # line. It is not yet used but is available for future coding in case
23826 # there is a need to do a preliminary scan of the alignment tokens.
23829 if ( defined($ralignment_hash) ) {
23830 $new_count = keys %{$ralignment_hash};
23832 my $old_count = $alignment_count;
23833 $old_count = 0 unless ($old_count);
23834 if ( $new_count != $old_count ) {
23835 my $K = $K_to_go[$ibeg];
23836 my $rLL = $self->[_rLL_];
23837 my $lnl = $rLL->[$K]->[_LINE_INDEX_];
23839 "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
23844 # -------------------------------------
23845 # Shortcut for lines without alignments
23846 # -------------------------------------
23847 if ( !$alignment_count ) {
23849 my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
23850 $summed_lengths_to_go[$ibeg] ];
23853 if ( $ibeg == $iend ) {
23854 $rfields = [ $tokens_to_go[$ibeg] ];
23855 $rpatterns = [ $types_to_go[$ibeg] ];
23858 $rfields = [ join( '', @tokens_to_go[ $ibeg .. $iend ] ) ];
23859 $rpatterns = [ join( '', @types_to_go[ $ibeg .. $iend ] ) ];
23861 return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
23864 my $i_start = $ibeg;
23866 my %container_name = ( 0 => "" );
23871 my @field_lengths = ();
23873 #-------------------------------------------------------------
23874 # Make a container name for any uncontained commas, issue c089
23875 #-------------------------------------------------------------
23876 # This is a generalization of the fix for rt136416 which was a
23877 # specialized patch just for 'use Module' statements.
23878 # We restrict this to semicolon-terminated statements; that way
23879 # we know that the top level commas are not in a list container.
23880 if ( $ibeg == 0 && $iend == $max_index_to_go ) {
23881 my $iterm = $max_index_to_go;
23882 if ( $types_to_go[$iterm] eq '#' ) {
23883 $iterm = $iprev_to_go[$iterm];
23886 # Alignment lines ending like '=> sub {'; fixes issue c093
23887 my $term_type_ok = $types_to_go[$iterm] eq ';';
23889 $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
23891 if ( $iterm > $ibeg
23893 && !$is_my_local_our{ $tokens_to_go[$ibeg] }
23894 && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
23897 # Make a container name by combining all leading barewords,
23898 # keywords and functions.
23904 for ( $ibeg .. $iterm ) {
23905 my $type = $types_to_go[$_];
23907 if ( $type eq 'b' ) {
23912 my $token = $tokens_to_go[$_];
23914 # Give up if we find an opening paren, binary operator or
23915 # comma within or after the proposed container name.
23917 || $is_binary_type{$type}
23918 || $type eq 'k' && $is_binary_keyword{$token} )
23924 # The container name is only built of certain types:
23925 last if ( !$is_kwU{$type} );
23927 # Normally it is made of one word, but two words for 'use'
23928 if ( $count == 0 ) {
23930 && $is_use_like{ $tokens_to_go[$_] } )
23938 elsif ( defined($count_max) && $count >= $count_max ) {
23942 if ( defined( $name_map{$token} ) ) {
23943 $token = $name_map{$token};
23946 $name .= ' ' . $token;
23951 # Require a space after the container name token(s)
23953 && defined($ilast_blank)
23954 && $ilast_blank > $iname_end )
23956 $name = substr( $name, 1 );
23957 $container_name{'0'} = $name;
23962 # --------------------
23963 # Loop over all tokens
23964 # --------------------
23965 my $j = 0; # field index
23969 for my $i ( $ibeg .. $iend ) {
23971 # Keep track of containers balanced on this line only.
23972 # These are used below to prevent unwanted cross-line alignments.
23973 # Unbalanced containers already avoid aligning across
23974 # container boundaries.
23976 my $type = $types_to_go[$i];
23977 my $token = $tokens_to_go[$i];
23978 my $depth_last = $depth;
23979 if ( $type_sequence_to_go[$i] ) {
23980 if ( $is_opening_token{$token} ) {
23982 # if container is balanced on this line...
23983 my $i_mate = $mate_index_to_go[$i];
23984 if ( $i_mate > $i && $i_mate <= $iend ) {
23987 # Append the previous token name to make the container name
23988 # more unique. This name will also be given to any commas
23989 # within this container, and it helps avoid undesirable
23990 # alignments of different types of containers.
23992 # Containers beginning with { and [ are given those names
23993 # for uniqueness. That way commas in different containers
23994 # will not match. Here is an example of what this prevents:
23995 # a => [ 1, 2, 3 ],
23996 # b => { b1 => 4, b2 => 5 },
23997 # Here is another example of what we avoid by labeling the
24000 # is_d( [ $a, $a ], [ $b, $c ] );
24001 # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
24002 # is_d( [ \$a, \$a ], [ \$b, \$c ] );
24005 if ( $token eq '(' ) {
24006 $name = $self->make_paren_name($i);
24009 # name cannot be '.', so change to something else if so
24010 if ( $name eq '.' ) { $name = 'dot' }
24012 $container_name{$depth} = "+" . $name;
24014 # Make the container name even more unique if necessary.
24015 # If we are not vertically aligning this opening paren,
24016 # append a character count to avoid bad alignment since
24017 # it usually looks bad to align commas within containers
24018 # for which the opening parens do not align. Here
24019 # is an example very BAD alignment of commas (because
24020 # the atan2 functions are not all aligned):
24022 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
24023 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
24024 # $X * atan2( $X, 1 ) -
24025 # $Y * atan2( $Y, 1 );
24027 # On the other hand, it is usually okay to align commas
24028 # if opening parens align, such as:
24029 # glVertex3d( $cx + $s * $xs, $cy, $z );
24030 # glVertex3d( $cx, $cy + $s * $ys, $z );
24031 # glVertex3d( $cx - $s * $xs, $cy, $z );
24032 # glVertex3d( $cx, $cy - $s * $ys, $z );
24034 # To distinguish between these situations, we append
24035 # the length of the line from the previous matching
24036 # token, or beginning of line, to the function name.
24037 # This will allow the vertical aligner to reject
24038 # undesirable matches.
24040 # if we are not aligning on this paren...
24041 if ( !$ralignment_type_to_go->[$i] ) {
24043 # Sum length from previous alignment
24044 my $len = token_sequence_length( $i_start, $i - 1 );
24046 # Minor patch: do not include the length of any '!'.
24047 # Otherwise, commas in the following line will not
24049 # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
24050 # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
24051 if ( grep { $_ eq '!' }
24052 @types_to_go[ $i_start .. $i - 1 ] )
24057 if ( $i_start == $ibeg ) {
24059 # For first token, use distance from start of
24060 # line but subtract off the indentation due to
24061 # level. Otherwise, results could vary with
24064 leading_spaces_to_go($ibeg) -
24065 $levels_to_go[$i_start] *
24066 $rOpts_indent_columns;
24067 if ( $len < 0 ) { $len = 0 }
24070 # tack this length onto the container name to try
24071 # to make a unique token name
24072 $container_name{$depth} .= "-" . $len;
24073 } ## end if ( !$ralignment_type_to_go...)
24074 } ## end if ( $i_mate > $i && $i_mate...)
24075 } ## end if ( $is_opening_token...)
24077 elsif ( $is_closing_type{$token} ) {
24078 $depth-- if $depth > 0;
24080 } ## end if ( $type_sequence_to_go...)
24082 # if we find a new synchronization token, we are done with
24084 if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
24086 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
24088 # map similar items
24089 my $tok_map = $operator_map{$tok};
24090 $tok = $tok_map if ($tok_map);
24092 # make separators in different nesting depths unique
24093 # by appending the nesting depth digit.
24094 if ( $raw_tok ne '#' ) {
24095 $tok .= "$nesting_depth_to_go[$i]";
24098 # also decorate commas with any container name to avoid
24099 # unwanted cross-line alignments.
24100 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
24102 # If we are at an opening token which increased depth, we have
24103 # to use the name from the previous depth.
24105 ( $depth_last < $depth ? $depth_last : $depth );
24106 if ( $container_name{$depth_p} ) {
24107 $tok .= $container_name{$depth_p};
24111 # Patch to avoid aligning leading and trailing if, unless.
24112 # Mark trailing if, unless statements with container names.
24113 # This makes them different from leading if, unless which
24114 # are not so marked at present. If we ever need to name
24115 # them too, we could use ci to distinguish them.
24116 # Example problem to avoid:
24117 # return ( 2, "DBERROR" )
24118 # if ( $retval == 2 );
24119 # if ( scalar @_ ) {
24120 # my ( $a, $b, $c, $d, $e, $f ) = @_;
24122 if ( $raw_tok eq '(' ) {
24123 if ( $ci_levels_to_go[$ibeg]
24124 && $container_name{$depth} =~ /^\+(if|unless)/ )
24126 $tok .= $container_name{$depth};
24130 # Decorate block braces with block types to avoid
24131 # unwanted alignments such as the following:
24132 # foreach ( @{$routput_array} ) { $fh->print($_) }
24133 # eval { $fh->close() };
24134 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
24135 my $block_type = $block_type_to_go[$i];
24137 # map certain related block types to allow
24138 # else blocks to align
24139 $block_type = $block_type_map{$block_type}
24140 if ( defined( $block_type_map{$block_type} ) );
24142 # remove sub names to allow one-line sub braces to align
24143 # regardless of name
24144 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
24146 # allow all control-type blocks to align
24147 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
24149 $tok .= $block_type;
24152 # Mark multiple copies of certain tokens with the copy number
24153 # This will allow the aligner to decide if they are matched.
24154 # For now, only do this for equals. For example, the two
24155 # equals on the next line will be labeled '=0' and '=0.2'.
24156 # Later, the '=0.2' will be ignored in alignment because it
24159 # $| = $debug = 1 if $opt_d;
24160 # $full_index = 1 if $opt_i;
24162 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
24163 $token_count{$tok}++;
24164 if ( $token_count{$tok} > 1 ) {
24165 $tok .= '.' . $token_count{$tok};
24169 # concatenate the text of the consecutive tokens to form
24172 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
24174 push @field_lengths,
24175 $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
24177 # store the alignment token for this field
24178 push( @tokens, $tok );
24180 # get ready for the next batch
24183 $patterns[$j] = "";
24184 } ## end if ( new synchronization token
24186 # continue accumulating tokens
24188 # for keywords we have to use the actual text
24189 if ( $type eq 'k' ) {
24191 my $tok_fix = $tokens_to_go[$i];
24193 # but map certain keywords to a common string to allow
24195 $tok_fix = $keyword_map{$tok_fix}
24196 if ( defined( $keyword_map{$tok_fix} ) );
24197 $patterns[$j] .= $tok_fix;
24200 elsif ( $type eq 'b' ) {
24201 $patterns[$j] .= $type;
24204 # Mark most things before arrows as a quote to
24205 # get them to line up. Testfile: mixed.pl.
24207 # handle $type =~ /^[wnC]$/
24208 elsif ( $is_w_n_C{$type} ) {
24210 my $type_fix = $type;
24212 if ( $i < $iend - 1 ) {
24213 my $next_type = $types_to_go[ $i + 1 ];
24214 my $i_next_nonblank =
24215 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
24217 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
24220 # Patch to ignore leading minus before words,
24221 # by changing pattern 'mQ' into just 'Q',
24222 # so that we can align things like this:
24223 # Button => "Print letter \"~$_\"",
24224 # -command => [ sub { print "$_[0]\n" }, $_ ],
24225 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
24229 # Convert a bareword within braces into a quote for
24230 # matching. This will allow alignment of expressions like
24232 # local ( $SIG{'INT'} ) = IGNORE;
24233 # local ( $SIG{ALRM} ) = 'POSTMAN';
24237 && $types_to_go[ $i - 1 ] eq 'L'
24238 && $types_to_go[ $i + 1 ] eq 'R' )
24243 # patch to make numbers and quotes align
24244 if ( $type eq 'n' ) { $type_fix = 'Q' }
24246 $patterns[$j] .= $type_fix;
24247 } ## end elsif ( $is_w_n_C{$type} )
24249 # ignore any ! in patterns
24250 elsif ( $type eq '!' ) { }
24254 $patterns[$j] .= $type;
24257 # remove any zero-level name at first fat comma
24258 if ( $depth == 0 && $type eq '=>' ) {
24259 $container_name{$depth} = "";
24261 } ## end for my $i ( $ibeg .. $iend)
24263 # done with this line .. join text of tokens to make the last field
24264 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
24265 push @field_lengths,
24266 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
24268 return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
24269 } ## end sub make_alignment_patterns
24271 } ## end closure make_alignment_patterns
24273 sub make_paren_name {
24274 my ( $self, $i ) = @_;
24276 # The token at index $i is a '('.
24277 # Create an alignment name for it to avoid incorrect alignments.
24279 # Start with the name of the previous nonblank token...
24282 return "" if ( $im < 0 );
24283 if ( $types_to_go[$im] eq 'b' ) { $im--; }
24284 return "" if ( $im < 0 );
24285 $name = $tokens_to_go[$im];
24287 # Prepend any sub name to an isolated -> to avoid unwanted alignments
24288 # [test case is test8/penco.pl]
24289 if ( $name eq '->' ) {
24291 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
24292 $name = $tokens_to_go[$im] . $name;
24296 # Finally, remove any leading arrows
24297 if ( substr( $name, 0, 2 ) eq '->' ) {
24298 $name = substr( $name, 2 );
24303 { ## begin closure final_indentation_adjustment
24305 my ( $last_indentation_written, $last_unadjusted_indentation,
24306 $last_leading_token );
24308 sub initialize_final_indentation_adjustment {
24309 $last_indentation_written = 0;
24310 $last_unadjusted_indentation = 0;
24311 $last_leading_token = "";
24315 sub final_indentation_adjustment {
24317 #--------------------------------------------------------------------
24318 # This routine sets the final indentation of a line in the Formatter.
24319 #--------------------------------------------------------------------
24321 # It starts with the basic indentation which has been defined for the
24322 # leading token, and then takes into account any options that the user
24323 # has set regarding special indenting and outdenting.
24325 # This routine has to resolve a number of complex interacting issues,
24327 # 1. The various -cti=n type flags, which contain the desired change in
24328 # indentation for lines ending in commas and semicolons, should be
24330 # 2. qw quotes require special processing and do not fit perfectly
24331 # with normal containers,
24332 # 3. formatting with -wn can complicate things, especially with qw
24334 # 4. formatting with the -lp option is complicated, and does not
24335 # work well with qw quotes and with -wn formatting.
24336 # 5. a number of special situations, such as 'cuddled' formatting.
24337 # 6. This routine is mainly concerned with outdenting closing tokens
24338 # but note that there is some overlap with the functions of sub
24339 # undo_ci, which was processed earlier, so care has to be taken to
24340 # keep them coordinated.
24345 $rpatterns, $ri_first,
24346 $ri_last, $rindentation_list,
24347 $level_jump, $starting_in_quote,
24348 $is_static_block_comment,
24351 my $rLL = $self->[_rLL_];
24352 my $Klimit = $self->[_Klimit_];
24353 my $ris_bli_container = $self->[_ris_bli_container_];
24354 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
24355 my $rwant_reduced_ci = $self->[_rwant_reduced_ci_];
24356 my $rK_weld_left = $self->[_rK_weld_left_];
24358 # Find the last code token of this line
24359 my $i_terminal = $iend;
24360 my $terminal_type = $types_to_go[$iend];
24361 if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
24363 $terminal_type = $types_to_go[$i_terminal];
24364 if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
24366 $terminal_type = $types_to_go[$i_terminal];
24370 my $terminal_block_type = $block_type_to_go[$i_terminal];
24371 my $is_outdented_line = 0;
24373 my $type_beg = $types_to_go[$ibeg];
24374 my $token_beg = $tokens_to_go[$ibeg];
24375 my $block_type_beg = $block_type_to_go[$ibeg];
24376 my $level_beg = $levels_to_go[$ibeg];
24377 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
24378 my $K_beg = $K_to_go[$ibeg];
24379 my $seqno_beg = $type_sequence_to_go[$ibeg];
24380 my $ibeg_weld_fix = $ibeg;
24381 my $is_closing_type_beg = $is_closing_type{$type_beg};
24382 my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
24384 # QW INDENTATION PATCH 3:
24385 my $seqno_qw_closing;
24386 if ( $type_beg eq 'q' && $ibeg == 0 ) {
24387 my $KK = $K_to_go[$ibeg];
24388 $seqno_qw_closing =
24389 $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
24392 my $is_semicolon_terminated = $terminal_type eq ';'
24393 && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
24394 || $seqno_qw_closing );
24396 # NOTE: A future improvement would be to make it semicolon terminated
24397 # even if it does not have a semicolon but is followed by a closing
24398 # block brace. This would undo ci even for something like the
24399 # following, in which the final paren does not have a semicolon because
24400 # it is a possible weld location:
24402 # if ($BOLD_MATH) {
24404 # $labels, $comment,
24405 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
24410 # MOJO: Set a flag if this lines begins with ')->'
24411 my $leading_paren_arrow = (
24412 $is_closing_type_beg
24413 && $token_beg eq ')'
24415 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
24416 || ( $ibeg < $i_terminal - 1
24417 && $types_to_go[ $ibeg + 1 ] eq 'b'
24418 && $types_to_go[ $ibeg + 2 ] eq '->' )
24422 #---------------------------------------------------------
24423 # Section 1: set a flag and a default indentation
24425 # Most lines are indented according to the initial token.
24426 # But it is common to outdent to the level just after the
24427 # terminal token in certain cases...
24428 # adjust_indentation flag:
24429 # 0 - do not adjust
24431 # 2 - vertically align with opening token
24433 #---------------------------------------------------------
24434 my $adjust_indentation = 0;
24435 my $default_adjust_indentation = $adjust_indentation;
24438 $opening_indentation, $opening_offset,
24439 $is_leading, $opening_exists
24442 # Honor any flag to reduce -ci set by the -bbxi=n option
24443 if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
24445 # if this is an opening, it must be alone on the line ...
24446 if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
24447 $adjust_indentation = 1;
24450 # ... or a single welded unit (fix for b1173)
24451 elsif ($total_weld_count) {
24452 my $Kterm = $K_to_go[$i_terminal];
24453 my $Kterm_test = $rK_weld_left->{$Kterm};
24454 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
24455 $Kterm = $Kterm_test;
24457 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
24461 # Update the $is_bli flag as we go. It is initially 1.
24462 # We note seeing a leading opening brace by setting it to 2.
24463 # If we get to the closing brace without seeing the opening then we
24464 # turn it off. This occurs if the opening brace did not get output
24465 # at the start of a line, so we will then indent the closing brace
24466 # in the default way.
24467 if ( $is_bli_beg && $is_bli_beg == 1 ) {
24468 my $K_opening_container = $self->[_K_opening_container_];
24469 my $K_opening = $K_opening_container->{$seqno_beg};
24470 if ( $K_beg eq $K_opening ) {
24471 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
24473 else { $is_bli_beg = 0 }
24476 # QW PATCH for the combination -lp -wn
24477 # For -lp formatting use $ibeg_weld_fix to get around the problem
24478 # that with -lp type formatting the opening and closing tokens to not
24479 # have sequence numbers.
24480 if ( $seqno_qw_closing && $total_weld_count ) {
24481 my $i_plus = $inext_to_go[$ibeg];
24482 if ( $i_plus <= $max_index_to_go ) {
24483 my $K_plus = $K_to_go[$i_plus];
24484 if ( defined( $rK_weld_left->{$K_plus} ) ) {
24485 $ibeg_weld_fix = $i_plus;
24490 # if we are at a closing token of some type..
24491 if ( $is_closing_type_beg || $seqno_qw_closing ) {
24493 # get the indentation of the line containing the corresponding
24496 $opening_indentation, $opening_offset,
24497 $is_leading, $opening_exists
24499 = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
24500 $ri_last, $rindentation_list, $seqno_qw_closing );
24502 my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
24504 # First set the default behavior:
24507 # default behavior is to outdent closing lines
24508 # of the form: "); }; ]; )->xxx;"
24509 $is_semicolon_terminated
24511 # and 'cuddled parens' of the form: ")->pack("
24512 # Bug fix for RT #123749]: the types here were
24513 # incorrectly '(' and ')'. Corrected to be '{' and '}'
24515 $terminal_type eq '{'
24516 && $type_beg eq '}'
24517 && ( $nesting_depth_to_go[$iend] + 1 ==
24518 $nesting_depth_to_go[$ibeg] )
24521 # remove continuation indentation for any line like
24523 # or without ending '{' and unbalanced, such as
24524 # such as '}->{$operator}'
24528 && ( $types_to_go[$iend] eq '{'
24529 || $levels_to_go[$iend] < $level_beg )
24532 # and when the next line is at a lower indentation level...
24534 # PATCH #1: and only if the style allows undoing continuation
24535 # for all closing token types. We should really wait until
24536 # the indentation of the next line is known and then make
24537 # a decision, but that would require another pass.
24539 # PATCH #2: and not if this token is under -xci control
24540 || ( $level_jump < 0
24541 && !$some_closing_token_indentation
24542 && !$rseqno_controlling_my_ci->{$K_beg} )
24544 # Patch for -wn=2, multiple welded closing tokens
24545 || ( $i_terminal > $ibeg
24546 && $is_closing_type{ $types_to_go[$iend] } )
24548 # Alternate Patch for git #51, isolated closing qw token not
24549 # outdented if no-delete-old-newlines is set. This works, but
24550 # a more general patch elsewhere fixes the real problem: ljump.
24551 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
24555 $adjust_indentation = 1;
24558 # outdent something like '),'
24560 $terminal_type eq ','
24562 # Removed this constraint for -wn
24563 # OLD: allow just one character before the comma
24564 # && $i_terminal == $ibeg + 1
24566 # require LIST environment; otherwise, we may outdent too much -
24567 # this can happen in calls without parentheses (overload.t);
24568 && $terminal_is_in_list
24571 $adjust_indentation = 1;
24574 # undo continuation indentation of a terminal closing token if
24575 # it is the last token before a level decrease. This will allow
24576 # a closing token to line up with its opening counterpart, and
24577 # avoids an indentation jump larger than 1 level.
24578 if ( $i_terminal == $ibeg
24579 && $is_closing_type_beg
24581 && $K_beg < $Klimit )
24583 my $K_plus = $K_beg + 1;
24584 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
24586 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24587 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24590 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
24591 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24592 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24593 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24596 # Note: we have skipped past just one comment (perhaps a
24597 # side comment). There could be more, and we could easily
24598 # skip past all the rest with the following code, or with a
24599 # while loop. It would be rare to have to do this, and
24600 # those block comments would still be indented, so it would
24601 # to leave them indented. So it seems best to just stop at
24602 # a maximum of one comment.
24603 ##if ($type_plus eq '#') {
24604 ## $K_plus = $self->K_next_code($K_plus);
24608 if ( !$is_bli_beg && defined($K_plus) ) {
24609 my $lev = $level_beg;
24610 my $level_next = $rLL->[$K_plus]->[_LEVEL_];
24612 # and do not undo ci if it was set by the -xci option
24613 $adjust_indentation = 1
24614 if ( $level_next < $lev
24615 && !$rseqno_controlling_my_ci->{$K_beg} );
24618 # Patch for RT #96101, in which closing brace of anonymous subs
24619 # was not outdented. We should look ahead and see if there is
24620 # a level decrease at the next token (i.e., a closing token),
24621 # but right now we do not have that information. For now
24622 # we see if we are in a list, and this works well.
24623 # See test files 'sub*.t' for good test cases.
24624 if ( $terminal_is_in_list
24625 && !$rOpts_indent_closing_brace
24627 && $block_type_beg =~ /$ASUB_PATTERN/ )
24630 $opening_indentation, $opening_offset,
24631 $is_leading, $opening_exists
24633 = $self->get_opening_indentation( $ibeg, $ri_first,
24634 $ri_last, $rindentation_list );
24635 my $indentation = $leading_spaces_beg;
24636 if ( defined($opening_indentation)
24637 && get_spaces($indentation) >
24638 get_spaces($opening_indentation) )
24640 $adjust_indentation = 1;
24645 # YVES patch 1 of 2:
24646 # Undo ci of line with leading closing eval brace,
24647 # but not beyond the indention of the line with
24648 # the opening brace.
24650 $block_type_beg eq 'eval'
24651 ##&& !$rOpts_line_up_parentheses
24652 && !ref($leading_spaces_beg)
24653 && !$rOpts_indent_closing_brace
24657 $opening_indentation, $opening_offset,
24658 $is_leading, $opening_exists
24660 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
24661 $rindentation_list );
24662 my $indentation = $leading_spaces_beg;
24663 if ( defined($opening_indentation)
24664 && get_spaces($indentation) >
24665 get_spaces($opening_indentation) )
24667 $adjust_indentation = 1;
24671 # patch for issue git #40: -bli setting has priority
24672 $adjust_indentation = 0 if ($is_bli_beg);
24674 $default_adjust_indentation = $adjust_indentation;
24676 # Now modify default behavior according to user request:
24677 # handle option to indent non-blocks of the form ); }; ];
24678 # But don't do special indentation to something like ')->pack('
24679 if ( !$block_type_beg ) {
24681 # Note that logical padding has already been applied, so we may
24682 # need to remove some spaces to get a valid hash key.
24683 my $tok = $token_beg;
24684 my $cti = $closing_token_indentation{$tok};
24686 # Fix the value of 'cti' for an isloated non-welded closing qw
24688 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
24690 # A quote delimiter which is not a container will not have
24691 # a cti value defined. In this case use the style of a
24692 # paren. For example
24700 if ( !defined($cti) && length($tok) == 1 ) {
24702 # something other than ')', '}', ']' ; use flag for ')'
24703 $cti = $closing_token_indentation{')'};
24705 # But for now, do not outdent non-container qw
24706 # delimiters because it would would change existing
24708 if ( $tok ne '>' ) { $cti = 3 }
24711 # A non-welded closing qw cannot currently use -cti=1
24712 # because that option requires a sequence number to find
24713 # the opening indentation, and qw quote delimiters are not
24715 if ( defined($cti) && $cti == 1 ) { $cti = 0 }
24718 if ( !defined($cti) ) {
24720 # $cti may not be defined for several reasons.
24721 # -padding may have been applied so the character
24723 # - we may have welded to a closing quote token.
24724 # Here is an example (perltidy -wn):
24725 # __PACKAGE__->load_components( qw(
24729 $adjust_indentation = 0;
24732 elsif ( $cti == 1 ) {
24733 if ( $i_terminal <= $ibeg + 1
24734 || $is_semicolon_terminated )
24736 $adjust_indentation = 2;
24739 $adjust_indentation = 0;
24742 elsif ( $cti == 2 ) {
24743 if ($is_semicolon_terminated) {
24744 $adjust_indentation = 3;
24747 $adjust_indentation = 0;
24750 elsif ( $cti == 3 ) {
24751 $adjust_indentation = 3;
24755 # handle option to indent blocks
24758 $rOpts_indent_closing_brace
24760 $i_terminal == $ibeg # isolated terminal '}'
24761 || $is_semicolon_terminated
24765 $adjust_indentation = 3;
24770 # if at ');', '};', '>;', and '];' of a terminal qw quote
24772 substr( $rpatterns->[0], 0, 2 ) eq 'qb'
24773 && substr( $rfields->[0], -1, 1 ) eq ';'
24774 ##&& $rpatterns->[0] =~ /^qb*;$/
24775 && $rfields->[0] =~ /^([\)\}\]\>]);$/
24778 if ( $closing_token_indentation{$1} == 0 ) {
24779 $adjust_indentation = 1;
24782 $adjust_indentation = 3;
24786 # if line begins with a ':', align it with any
24787 # previous line leading with corresponding ?
24788 elsif ( $type_beg eq ':' ) {
24790 $opening_indentation, $opening_offset,
24791 $is_leading, $opening_exists
24793 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
24794 $rindentation_list );
24795 if ($is_leading) { $adjust_indentation = 2; }
24798 #---------------------------------------------------------
24799 # Section 2: set indentation according to flag set above
24801 # Select the indentation object to define leading
24802 # whitespace. If we are outdenting something like '} } );'
24803 # then we want to use one level below the last token
24804 # ($i_terminal) in order to get it to fully outdent through
24806 #---------------------------------------------------------
24809 my $level_end = $levels_to_go[$iend];
24811 if ( $adjust_indentation == 0 ) {
24812 $indentation = $leading_spaces_beg;
24815 elsif ( $adjust_indentation == 1 ) {
24817 # Change the indentation to be that of a different token on the line
24818 # Previously, the indentation of the terminal token was used:
24820 # $indentation = $reduced_spaces_to_go[$i_terminal];
24821 # $lev = $levels_to_go[$i_terminal];
24823 # Generalization for MOJO:
24824 # Use the lowest level indentation of the tokens on the line.
24825 # For example, here we can use the indentation of the ending ';':
24826 # } until ($selection > 0 and $selection < 10); # ok to use ';'
24827 # But this will not outdent if we use the terminal indentation:
24828 # )->then( sub { # use indentation of the ->, not the {
24829 # Warning: reduced_spaces_to_go[] may be a reference, do not
24830 # do numerical checks with it
24833 $indentation = $reduced_spaces_to_go[$i_ind];
24834 $lev = $levels_to_go[$i_ind];
24835 while ( $i_ind < $i_terminal ) {
24837 if ( $levels_to_go[$i_ind] < $lev ) {
24838 $indentation = $reduced_spaces_to_go[$i_ind];
24839 $lev = $levels_to_go[$i_ind];
24844 # handle indented closing token which aligns with opening token
24845 elsif ( $adjust_indentation == 2 ) {
24847 # handle option to align closing token with opening token
24850 # calculate spaces needed to align with opening token
24852 get_spaces($opening_indentation) + $opening_offset;
24854 # Indent less than the previous line.
24856 # Problem: For -lp we don't exactly know what it was if there
24857 # were recoverable spaces sent to the aligner. A good solution
24858 # would be to force a flush of the vertical alignment buffer, so
24859 # that we would know. For now, this rule is used for -lp:
24861 # When the last line did not start with a closing token we will
24862 # be optimistic that the aligner will recover everything wanted.
24864 # This rule will prevent us from breaking a hierarchy of closing
24865 # tokens, and in a worst case will leave a closing paren too far
24866 # indented, but this is better than frequently leaving it not
24868 my $last_spaces = get_spaces($last_indentation_written);
24870 if ( ref($last_indentation_written)
24871 && !$is_closing_token{$last_leading_token} )
24874 get_recoverable_spaces($last_indentation_written);
24877 # reset the indentation to the new space count if it works
24878 # only options are all or none: nothing in-between looks good
24881 my $diff = $last_spaces - $space_count;
24883 $indentation = $space_count;
24887 # We need to fix things ... but there is no good way to do it.
24888 # The best solution is for the user to use a longer maximum
24889 # line length. We could get a smooth variation if we just move
24890 # the paren in using
24891 # $space_count -= ( 1 - $diff );
24892 # But unfortunately this can give a rather unbalanced look.
24894 # For -xlp we currently allow a tolerance of one indentation
24895 # level and then revert to a simpler default. This will jump
24896 # suddenly but keeps a balanced look.
24897 if ( $rOpts_extended_line_up_parentheses
24898 && $diff >= -$rOpts_indent_columns
24899 && $space_count > $leading_spaces_beg )
24901 $indentation = $space_count;
24904 # Otherwise revert to defaults
24905 elsif ( $default_adjust_indentation == 0 ) {
24906 $indentation = $leading_spaces_beg;
24908 elsif ( $default_adjust_indentation == 1 ) {
24909 $indentation = $reduced_spaces_to_go[$i_terminal];
24910 $lev = $levels_to_go[$i_terminal];
24915 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
24918 # handle -icb (indented closing code block braces)
24919 # Updated method for indented block braces: indent one full level if
24920 # there is no continuation indentation. This will occur for major
24921 # structures such as sub, if, else, but not for things like map
24924 # Note: only code blocks without continuation indentation are
24925 # handled here (if, else, unless, ..). In the following snippet,
24926 # the terminal brace of the sort block will have continuation
24927 # indentation as shown so it will not be handled by the coding
24928 # here. We would have to undo the continuation indentation to do
24929 # this, but it probably looks ok as is. This is a possible future
24930 # update for semicolon terminated lines.
24932 # if ($sortby eq 'date' or $sortby eq 'size') {
24934 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
24939 if ( $block_type_beg
24940 && $ci_levels_to_go[$i_terminal] == 0 )
24942 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
24943 $indentation = $spaces + $rOpts_indent_columns;
24945 # NOTE: for -lp we could create a new indentation object, but
24946 # there is probably no need to do it
24949 # handle -icp and any -icb block braces which fall through above
24950 # test such as the 'sort' block mentioned above.
24953 # There are currently two ways to handle -icp...
24954 # One way is to use the indentation of the previous line:
24955 # $indentation = $last_indentation_written;
24957 # The other way is to use the indentation that the previous line
24958 # would have had if it hadn't been adjusted:
24959 $indentation = $last_unadjusted_indentation;
24961 # Current method: use the minimum of the two. This avoids
24962 # inconsistent indentation.
24963 if ( get_spaces($last_indentation_written) <
24964 get_spaces($indentation) )
24966 $indentation = $last_indentation_written;
24970 # use previous indentation but use own level
24971 # to cause list to be flushed properly
24975 # remember indentation except for multi-line quotes, which get
24977 unless ( $ibeg == 0 && $starting_in_quote ) {
24978 $last_indentation_written = $indentation;
24979 $last_unadjusted_indentation = $leading_spaces_beg;
24980 $last_leading_token = $token_beg;
24982 # Patch to make a line which is the end of a qw quote work with the
24983 # -lp option. Make $token_beg look like a closing token as some
24984 # type even if it is not. This veriable will become
24985 # $last_leading_token at the end of this loop. Then, if the -lp
24986 # style is selected, and the next line is also a
24987 # closing token, it will not get more indentation than this line.
24988 # We need to do this because qw quotes (at present) only get
24989 # continuation indentation, not one level of indentation, so we
24990 # need to turn off the -lp indentation.
24992 # ... a picture is worth a thousand words:
24994 # perltidy -wn -gnu (Without this patch):
24996 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
25000 # perltidy -wn -gnu (With this patch):
25002 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
25005 if ( $seqno_qw_closing
25006 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
25008 $last_leading_token = ')';
25012 # be sure lines with leading closing tokens are not outdented more
25013 # than the line which contained the corresponding opening token.
25015 #--------------------------------------------------------
25016 # updated per bug report in alex_bug.pl: we must not
25017 # mess with the indentation of closing logical braces so
25018 # we must treat something like '} else {' as if it were
25019 # an isolated brace
25020 #--------------------------------------------------------
25021 my $is_isolated_block_brace = $block_type_beg
25022 && ( $i_terminal == $ibeg
25023 || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
25026 # only do this for a ':; which is aligned with its leading '?'
25027 my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
25030 defined($opening_indentation)
25031 && !$leading_paren_arrow # MOJO
25032 && !$is_isolated_block_brace
25033 && !$is_unaligned_colon
25036 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
25037 $indentation = $opening_indentation;
25041 # remember the indentation of each line of this batch
25042 push @{$rindentation_list}, $indentation;
25044 # outdent lines with certain leading tokens...
25047 # must be first word of this batch
25053 # certain leading keywords if requested
25054 $rOpts_outdent_keywords
25055 && $type_beg eq 'k'
25056 && $outdent_keyword{$token_beg}
25058 # or labels if requested
25059 || $rOpts_outdent_labels && $type_beg eq 'J'
25061 # or static block comments if requested
25062 || $is_static_block_comment
25063 && $rOpts_outdent_static_block_comments
25067 my $space_count = leading_spaces_to_go($ibeg);
25068 if ( $space_count > 0 ) {
25069 $space_count -= $rOpts_continuation_indentation;
25070 $is_outdented_line = 1;
25071 if ( $space_count < 0 ) { $space_count = 0 }
25073 # do not promote a spaced static block comment to non-spaced;
25074 # this is not normally necessary but could be for some
25075 # unusual user inputs (such as -ci = -i)
25076 if ( $type_beg eq '#' && $space_count == 0 ) {
25080 $indentation = $space_count;
25084 return ( $indentation, $lev, $level_end, $terminal_type,
25085 $terminal_block_type, $is_semicolon_terminated,
25086 $is_outdented_line );
25088 } ## end closure final_indentation_adjustment
25090 sub get_opening_indentation {
25092 # get the indentation of the line which output the opening token
25093 # corresponding to a given closing token in the current output batch.
25096 # $i_closing - index in this line of a closing token ')' '}' or ']'
25098 # $ri_first - reference to list of the first index $i for each output
25099 # line in this batch
25100 # $ri_last - reference to list of the last index $i for each output line
25102 # $rindentation_list - reference to a list containing the indentation
25103 # used for each line.
25104 # $qw_seqno - optional sequence number to use if normal seqno not defined
25105 # (TODO: would be more general to just look this up from index i)
25108 # -the indentation of the line which contained the opening token
25109 # which matches the token at index $i_opening
25110 # -and its offset (number of columns) from the start of the line
25112 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
25115 # first, see if the opening token is in the current batch
25116 my $i_opening = $mate_index_to_go[$i_closing];
25117 my ( $indent, $offset, $is_leading, $exists );
25119 if ( defined($i_opening) && $i_opening >= 0 ) {
25121 # it is..look up the indentation
25122 ( $indent, $offset, $is_leading ) =
25123 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
25124 $rindentation_list );
25127 # if not, it should have been stored in the hash by a previous batch
25129 my $seqno = $type_sequence_to_go[$i_closing];
25130 $seqno = $qw_seqno unless ($seqno);
25131 ( $indent, $offset, $is_leading, $exists ) =
25132 get_saved_opening_indentation($seqno);
25134 return ( $indent, $offset, $is_leading, $exists );
25137 sub set_vertical_tightness_flags {
25139 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
25140 $ending_in_quote, $closing_side_comment )
25143 # Define vertical tightness controls for the nth line of a batch.
25145 # These parameters are passed to the vertical aligner to indicated
25146 # if we should combine this line with the next line to achieve the
25147 # desired vertical tightness. This was previously an array but
25148 # has been converted to a hash:
25153 # 0 _vt_type: 1=opening non-block 2=closing non-block
25154 # 3=opening block brace 4=closing block brace
25156 # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
25157 # 1b _vt_closing_flag: spaces of padding to use if closing
25158 # 2 _vt_seqno: sequence number of container
25159 # 3 _vt_valid flag: do not append if this flag is false. Will be
25160 # true if appropriate -vt flag is set. Otherwise, Will be
25161 # made true only for 2 line container in parens with -lp
25162 # 4 _vt_seqno_beg: sequence number of first token of line
25163 # 5 _vt_seqno_end: sequence number of last token of line
25164 # 6 _vt_min_lines: min number of lines for joining opening cache,
25166 # 7 _vt_max_lines: max number of lines for joining opening cache,
25169 # The vertical tightness mechanism can add whitespace, so whitespace can
25170 # continually increase if we allowed it when the -fws flag is set.
25171 # See case b499 for an example.
25173 # Speedup: just return for a comment
25174 if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
25178 # Define these values...
25180 my $vt_opening_flag = 0;
25181 my $vt_closing_flag = 0;
25183 my $vt_valid_flag = 0;
25184 my $vt_seqno_beg = 0;
25185 my $vt_seqno_end = 0;
25186 my $vt_min_lines = 0;
25187 my $vt_max_lines = 0;
25190 if ($rOpts_freeze_whitespace);
25192 # Uses these global parameters:
25193 # $rOpts_block_brace_tightness
25194 # $rOpts_block_brace_vertical_tightness
25195 # $rOpts_stack_closing_block_brace
25196 # %opening_vertical_tightness
25197 # %closing_vertical_tightness
25198 # %opening_token_right
25199 # %stack_closing_token
25200 # %stack_opening_token
25202 #--------------------------------------------------------------
25203 # Vertical Tightness Flags Section 1:
25204 # Handle Lines 1 .. n-1 but not the last line
25205 # For non-BLOCK tokens, we will need to examine the next line
25206 # too, so we won't consider the last line.
25207 #--------------------------------------------------------------
25208 if ( $n < $n_last_line ) {
25210 #--------------------------------------------------------------
25211 # Vertical Tightness Flags Section 1a:
25212 # Look for Type 1, last token of this line is a non-block opening token
25213 #--------------------------------------------------------------
25214 my $ibeg_next = $ri_first->[ $n + 1 ];
25215 my $token_end = $tokens_to_go[$iend];
25216 my $iend_next = $ri_last->[ $n + 1 ];
25219 $type_sequence_to_go[$iend]
25220 && !$block_type_to_go[$iend]
25221 && $is_opening_token{$token_end}
25223 $opening_vertical_tightness{$token_end} > 0
25225 # allow 2-line method call to be closed up
25226 || ( $rOpts_line_up_parentheses
25227 && $token_end eq '('
25228 && $self->[_rlp_object_by_seqno_]
25229 ->{ $type_sequence_to_go[$iend] }
25231 && $types_to_go[ $iend - 1 ] ne 'b' )
25235 # avoid multiple jumps in nesting depth in one line if
25237 my $ovt = $opening_vertical_tightness{$token_end};
25238 my $iend_next = $ri_last->[ $n + 1 ];
25240 # Turn off the -vt flag if the next line ends in a weld.
25241 # This avoids an instability with one-line welds (fixes b1183).
25242 my $type_end_next = $types_to_go[$iend_next];
25244 if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
25245 && $is_closing_type{$type_end_next} );
25247 # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270
25248 # See similar patch above for $cvt.
25249 my $seqno = $type_sequence_to_go[$iend];
25250 if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) {
25256 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
25257 $nesting_depth_to_go[$ibeg_next] )
25261 # If -vt flag has not been set, mark this as invalid
25262 # and aligner will validate it if it sees the closing paren
25264 my $valid_flag = $ovt;
25267 $vt_opening_flag = $ovt;
25268 $vt_seqno = $type_sequence_to_go[$iend];
25269 $vt_valid_flag = $valid_flag;
25273 #--------------------------------------------------------------
25274 # Vertical Tightness Flags Section 1b:
25275 # Look for Type 2, first token of next line is a non-block closing
25276 # token .. and be sure this line does not have a side comment
25277 #--------------------------------------------------------------
25278 my $token_next = $tokens_to_go[$ibeg_next];
25279 if ( $type_sequence_to_go[$ibeg_next]
25280 && !$block_type_to_go[$ibeg_next]
25281 && $is_closing_token{$token_next}
25282 && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
25284 my $ovt = $opening_vertical_tightness{$token_next};
25285 my $cvt = $closing_vertical_tightness{$token_next};
25287 # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
25288 # See similar patch above for $ovt.
25289 my $seqno = $type_sequence_to_go[$ibeg_next];
25290 if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
25294 # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
25295 # otherwise. Added for rt136417.
25297 my $seqno = $type_sequence_to_go[$ibeg_next];
25298 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
25301 # The unusual combination -pvtc=2 -dws -naws can be unstable.
25302 # This fixes b1282, b1283. This can be moved to set_options.
25304 && $rOpts_delete_old_whitespace
25305 && !$rOpts_add_whitespace )
25312 # Never append a trailing line like ')->pack(' because it
25313 # will throw off later alignment. So this line must start at a
25314 # deeper level than the next line (fix1 for welding, git #45).
25316 $nesting_depth_to_go[$ibeg_next] >=
25317 $nesting_depth_to_go[ $iend_next + 1 ] + 1
25322 !$self->is_in_list_by_i($ibeg_next)
25326 # allow closing up 2-line method calls
25327 || ( $rOpts_line_up_parentheses
25328 && $token_next eq ')'
25329 && $self->[_rlp_object_by_seqno_]
25330 ->{ $type_sequence_to_go[$ibeg_next] } )
25337 # decide which trailing closing tokens to append..
25339 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
25341 my $str = join( '',
25342 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
25344 # append closing token if followed by comment or ';'
25345 # or another closing token (fix2 for welding, git #45)
25346 if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
25350 my $valid_flag = $cvt;
25354 # Fix for b1187 and b1188: Blinking can occur if we allow
25355 # welded tokens to re-form into one-line blocks during
25356 # vertical alignment when -lp used. So for this case we
25357 # set the minimum number of lines to be 1 instead of 0.
25358 # The maximum should be 1 if -vtc is not used. If -vtc is
25359 # used, we turn the valid
25360 # flag off and set the maximum to 0. This is equivalent to
25361 # using a large number.
25362 my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
25363 if ( $rOpts_line_up_parentheses
25364 && $total_weld_count
25365 && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
25366 && $self->is_welded_at_seqno($seqno_ibeg_next) )
25369 $max_lines = $cvt ? 0 : 1;
25374 $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
25375 $vt_seqno = $type_sequence_to_go[$ibeg_next];
25376 $vt_valid_flag = $valid_flag;
25377 $vt_min_lines = $min_lines;
25378 $vt_max_lines = $max_lines;
25383 #--------------------------------------------------------------
25384 # Vertical Tightness Flags Section 1c:
25385 # Implement the Opening Token Right flag (Type 2)..
25386 # If requested, move an isolated trailing opening token to the end of
25387 # the previous line which ended in a comma. We could do this
25388 # in sub recombine_breakpoints but that would cause problems
25389 # with -lp formatting. The problem is that indentation will
25390 # quickly move far to the right in nested expressions. By
25391 # doing it after indentation has been set, we avoid changes
25392 # to the indentation. Actual movement of the token takes place
25393 # in sub valign_output_step_B.
25395 # Note added 4 May 2021: the man page suggests that the -otr flags
25396 # are mainly for opening tokens following commas. But this seems
25397 # to have been generalized long ago to include other situations.
25398 # I checked the coding back to 2012 and it is essentially the same
25399 # as here, so it is best to leave this unchanged for now.
25400 #--------------------------------------------------------------
25402 $opening_token_right{ $tokens_to_go[$ibeg_next] }
25404 # previous line is not opening
25405 # (use -sot to combine with it)
25406 && !$is_opening_token{$token_end}
25408 # previous line ended in one of these
25409 # (add other cases if necessary; '=>' and '.' are not necessary
25410 && !$block_type_to_go[$ibeg_next]
25412 # this is a line with just an opening token
25413 && ( $iend_next == $ibeg_next
25414 || $iend_next == $ibeg_next + 2
25415 && $types_to_go[$iend_next] eq '#' )
25417 # Fix for case b1060 when both -baoo and -otr are set:
25418 # to avoid blinking, honor the -baoo flag over the -otr flag.
25419 && $token_end ne '||' && $token_end ne '&&'
25421 # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
25424 && $rOpts_line_up_parentheses
25425 && $self->[_rlp_object_by_seqno_]
25426 ->{ $type_sequence_to_go[$ibeg_next] }
25429 # looks bad if we align vertically with the wrong container
25430 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
25433 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25436 $vt_closing_flag = $spaces;
25437 $vt_seqno = $type_sequence_to_go[$ibeg_next];
25438 $vt_valid_flag = 1;
25441 #--------------------------------------------------------------
25442 # Vertical Tightness Flags Section 1d:
25443 # Stacking of opening and closing tokens (Type 2)
25444 #--------------------------------------------------------------
25446 my $token_beg_next = $tokens_to_go[$ibeg_next];
25448 # patch to make something like 'qw(' behave like an opening paren
25450 if ( $types_to_go[$ibeg_next] eq 'q' ) {
25451 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
25452 $token_beg_next = $1;
25456 if ( $is_closing_token{$token_end}
25457 && $is_closing_token{$token_beg_next} )
25460 # avoid instability of combo -bom and -sct; b1179
25461 my $seq_next = $type_sequence_to_go[$ibeg_next];
25462 $stackable = $stack_closing_token{$token_beg_next}
25463 unless ( $block_type_to_go[$ibeg_next]
25464 || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
25466 elsif ($is_opening_token{$token_end}
25467 && $is_opening_token{$token_beg_next} )
25469 $stackable = $stack_opening_token{$token_beg_next}
25470 unless ( $block_type_to_go[$ibeg_next] )
25471 ; # shouldn't happen; just checking
25476 my $is_semicolon_terminated;
25477 if ( $n + 1 == $n_last_line ) {
25478 my ( $terminal_type, $i_terminal ) =
25479 terminal_type_i( $ibeg_next, $iend_next );
25480 $is_semicolon_terminated = $terminal_type eq ';'
25481 && $nesting_depth_to_go[$iend_next] <
25482 $nesting_depth_to_go[$ibeg_next];
25485 # this must be a line with just an opening token
25486 # or end in a semicolon
25488 $is_semicolon_terminated
25489 || ( $iend_next == $ibeg_next
25490 || $iend_next == $ibeg_next + 2
25491 && $types_to_go[$iend_next] eq '#' )
25494 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25497 $vt_closing_flag = $spaces;
25498 $vt_seqno = $type_sequence_to_go[$ibeg_next];
25499 $vt_valid_flag = 1;
25505 #--------------------------------------------------------------
25506 # Vertical Tightness Flags Section 2:
25507 # Handle type 3, opening block braces on last line of the batch
25508 # Check for a last line with isolated opening BLOCK curly
25509 #--------------------------------------------------------------
25510 elsif ($rOpts_block_brace_vertical_tightness
25512 && $types_to_go[$iend] eq '{'
25513 && $block_type_to_go[$iend] =~
25514 /$block_brace_vertical_tightness_pattern/ )
25517 $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
25519 $vt_valid_flag = 1;
25522 #--------------------------------------------------------------
25523 # Vertical Tightness Flags Section 3:
25524 # Handle type 4, a closing block brace on the last line of the batch Check
25525 # for a last line with isolated closing BLOCK curly
25526 # Patch: added a check for any new closing side comment which the
25527 # -csc option may generate. If it exists, there will be a side comment
25528 # so we cannot combine with a brace on the next line. This issue
25529 # occurs for the combination -scbb and -csc is used.
25530 #--------------------------------------------------------------
25531 elsif ($rOpts_stack_closing_block_brace
25533 && $block_type_to_go[$iend]
25534 && $types_to_go[$iend] eq '}'
25535 && ( !$closing_side_comment || $n < $n_last_line ) )
25537 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
25540 $vt_closing_flag = $spaces;
25541 $vt_seqno = $type_sequence_to_go[$iend];
25542 $vt_valid_flag = 1;
25546 # get the sequence numbers of the ends of this line
25547 $vt_seqno_beg = $type_sequence_to_go[$ibeg];
25548 if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
25549 $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
25552 $vt_seqno_end = $type_sequence_to_go[$iend];
25553 if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
25554 $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
25559 my $rvertical_tightness_flags = {
25560 _vt_type => $vt_type,
25561 _vt_opening_flag => $vt_opening_flag,
25562 _vt_closing_flag => $vt_closing_flag,
25563 _vt_seqno => $vt_seqno,
25564 _vt_valid_flag => $vt_valid_flag,
25565 _vt_seqno_beg => $vt_seqno_beg,
25566 _vt_seqno_end => $vt_seqno_end,
25567 _vt_min_lines => $vt_min_lines,
25568 _vt_max_lines => $vt_max_lines,
25571 return ($rvertical_tightness_flags);
25574 ##########################################################
25575 # CODE SECTION 14: Code for creating closing side comments
25576 ##########################################################
25578 { ## begin closure accumulate_csc_text
25580 # These routines are called once per batch when the --closing-side-comments flag
25583 my %block_leading_text;
25584 my %block_opening_line_number;
25585 my $csc_new_statement_ok;
25586 my $csc_last_label;
25587 my %csc_block_label;
25588 my $accumulating_text_for_block;
25589 my $leading_block_text;
25590 my $rleading_block_if_elsif_text;
25591 my $leading_block_text_level;
25592 my $leading_block_text_length_exceeded;
25593 my $leading_block_text_line_length;
25594 my $leading_block_text_line_number;
25596 sub initialize_csc_vars {
25597 %block_leading_text = ();
25598 %block_opening_line_number = ();
25599 $csc_new_statement_ok = 1;
25600 $csc_last_label = "";
25601 %csc_block_label = ();
25602 $rleading_block_if_elsif_text = [];
25603 $accumulating_text_for_block = "";
25604 reset_block_text_accumulator();
25608 sub reset_block_text_accumulator {
25610 # save text after 'if' and 'elsif' to append after 'else'
25611 if ($accumulating_text_for_block) {
25613 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
25614 push @{$rleading_block_if_elsif_text}, $leading_block_text;
25617 $accumulating_text_for_block = "";
25618 $leading_block_text = "";
25619 $leading_block_text_level = 0;
25620 $leading_block_text_length_exceeded = 0;
25621 $leading_block_text_line_number = 0;
25622 $leading_block_text_line_length = 0;
25626 sub set_block_text_accumulator {
25627 my ( $self, $i ) = @_;
25628 $accumulating_text_for_block = $tokens_to_go[$i];
25629 if ( $accumulating_text_for_block !~ /^els/ ) {
25630 $rleading_block_if_elsif_text = [];
25632 $leading_block_text = "";
25633 $leading_block_text_level = $levels_to_go[$i];
25634 $leading_block_text_line_number = $self->get_output_line_number();
25635 $leading_block_text_length_exceeded = 0;
25637 # this will contain the column number of the last character
25638 # of the closing side comment
25639 $leading_block_text_line_length =
25640 length($csc_last_label) +
25641 length($accumulating_text_for_block) +
25642 length( $rOpts->{'closing-side-comment-prefix'} ) +
25643 $leading_block_text_level * $rOpts_indent_columns + 3;
25647 sub accumulate_block_text {
25648 my ( $self, $i ) = @_;
25650 # accumulate leading text for -csc, ignoring any side comments
25651 if ( $accumulating_text_for_block
25652 && !$leading_block_text_length_exceeded
25653 && $types_to_go[$i] ne '#' )
25656 my $added_length = $token_lengths_to_go[$i];
25657 $added_length += 1 if $i == 0;
25658 my $new_line_length =
25659 $leading_block_text_line_length + $added_length;
25661 # we can add this text if we don't exceed some limits..
25664 # we must not have already exceeded the text length limit
25665 length($leading_block_text) <
25666 $rOpts_closing_side_comment_maximum_text
25669 # the new total line length must be below the line length limit
25670 # or the new length must be below the text length limit
25671 # (ie, we may allow one token to exceed the text length limit)
25674 $maximum_line_length_at_level[$leading_block_text_level]
25676 || length($leading_block_text) + $added_length <
25677 $rOpts_closing_side_comment_maximum_text
25680 # UNLESS: we are adding a closing paren before the brace we seek.
25681 # This is an attempt to avoid situations where the ... to be
25682 # added are longer than the omitted right paren, as in:
25684 # foreach my $item (@a_rather_long_variable_name_here) {
25686 # } ## end foreach my $item (@a_rather_long_variable_name_here...
25689 $tokens_to_go[$i] eq ')'
25692 $i + 1 <= $max_index_to_go
25693 && $block_type_to_go[ $i + 1 ] eq
25694 $accumulating_text_for_block
25696 || ( $i + 2 <= $max_index_to_go
25697 && $block_type_to_go[ $i + 2 ] eq
25698 $accumulating_text_for_block )
25704 # add an extra space at each newline
25705 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
25706 $leading_block_text .= ' ';
25709 # add the token text
25710 $leading_block_text .= $tokens_to_go[$i];
25711 $leading_block_text_line_length = $new_line_length;
25714 # show that text was truncated if necessary
25715 elsif ( $types_to_go[$i] ne 'b' ) {
25716 $leading_block_text_length_exceeded = 1;
25717 $leading_block_text .= '...';
25723 sub accumulate_csc_text {
25727 # called once per output buffer when -csc is used. Accumulates
25728 # the text placed after certain closing block braces.
25729 # Defines and returns the following for this buffer:
25731 my $block_leading_text = ""; # the leading text of the last '}'
25732 my $rblock_leading_if_elsif_text;
25733 my $i_block_leading_text =
25734 -1; # index of token owning block_leading_text
25735 my $block_line_count = 100; # how many lines the block spans
25736 my $terminal_type = 'b'; # type of last nonblank token
25737 my $i_terminal = 0; # index of last nonblank token
25738 my $terminal_block_type = "";
25740 # update most recent statement label
25741 $csc_last_label = "" unless ($csc_last_label);
25742 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
25743 my $block_label = $csc_last_label;
25745 # Loop over all tokens of this batch
25746 for my $i ( 0 .. $max_index_to_go ) {
25747 my $type = $types_to_go[$i];
25748 my $block_type = $block_type_to_go[$i];
25749 my $token = $tokens_to_go[$i];
25751 # remember last nonblank token type
25752 if ( $type ne '#' && $type ne 'b' ) {
25753 $terminal_type = $type;
25754 $terminal_block_type = $block_type;
25758 my $type_sequence = $type_sequence_to_go[$i];
25759 if ( $block_type && $type_sequence ) {
25761 if ( $token eq '}' ) {
25763 # restore any leading text saved when we entered this block
25764 if ( defined( $block_leading_text{$type_sequence} ) ) {
25765 ( $block_leading_text, $rblock_leading_if_elsif_text )
25766 = @{ $block_leading_text{$type_sequence} };
25767 $i_block_leading_text = $i;
25768 delete $block_leading_text{$type_sequence};
25769 $rleading_block_if_elsif_text =
25770 $rblock_leading_if_elsif_text;
25773 if ( defined( $csc_block_label{$type_sequence} ) ) {
25774 $block_label = $csc_block_label{$type_sequence};
25775 delete $csc_block_label{$type_sequence};
25778 # if we run into a '}' then we probably started accumulating
25779 # at something like a trailing 'if' clause..no harm done.
25780 if ( $accumulating_text_for_block
25781 && $levels_to_go[$i] <= $leading_block_text_level )
25783 my $lev = $levels_to_go[$i];
25784 reset_block_text_accumulator();
25787 if ( defined( $block_opening_line_number{$type_sequence} ) )
25789 my $output_line_number =
25790 $self->get_output_line_number();
25791 $block_line_count =
25792 $output_line_number -
25793 $block_opening_line_number{$type_sequence} + 1;
25794 delete $block_opening_line_number{$type_sequence};
25798 # Error: block opening line undefined for this line..
25799 # This shouldn't be possible, but it is not a
25800 # significant problem.
25804 elsif ( $token eq '{' ) {
25806 my $line_number = $self->get_output_line_number();
25807 $block_opening_line_number{$type_sequence} = $line_number;
25809 # set a label for this block, except for
25810 # a bare block which already has the label
25811 # A label can only be used on the next {
25812 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
25813 $csc_block_label{$type_sequence} = $csc_last_label;
25814 $csc_last_label = "";
25816 if ( $accumulating_text_for_block
25817 && $levels_to_go[$i] == $leading_block_text_level )
25820 if ( $accumulating_text_for_block eq $block_type ) {
25822 # save any leading text before we enter this block
25823 $block_leading_text{$type_sequence} = [
25824 $leading_block_text,
25825 $rleading_block_if_elsif_text
25827 $block_opening_line_number{$type_sequence} =
25828 $leading_block_text_line_number;
25829 reset_block_text_accumulator();
25833 # shouldn't happen, but not a serious error.
25834 # We were accumulating -csc text for block type
25835 # $accumulating_text_for_block and unexpectedly
25836 # encountered a '{' for block type $block_type.
25843 && $csc_new_statement_ok
25844 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
25845 && $token =~ /$closing_side_comment_list_pattern/ )
25847 $self->set_block_text_accumulator($i);
25851 # note: ignoring type 'q' because of tricks being played
25852 # with 'q' for hanging side comments
25853 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
25854 $csc_new_statement_ok =
25855 ( $block_type || $type eq 'J' || $type eq ';' );
25858 && $accumulating_text_for_block
25859 && $levels_to_go[$i] == $leading_block_text_level )
25861 reset_block_text_accumulator();
25864 $self->accumulate_block_text($i);
25869 # Treat an 'else' block specially by adding preceding 'if' and
25870 # 'elsif' text. Otherwise, the 'end else' is not helpful,
25871 # especially for cuddled-else formatting.
25872 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
25873 $block_leading_text =
25874 $self->make_else_csc_text( $i_terminal, $terminal_block_type,
25875 $block_leading_text, $rblock_leading_if_elsif_text );
25878 # if this line ends in a label then remember it for the next pass
25879 $csc_last_label = "";
25880 if ( $terminal_type eq 'J' ) {
25881 $csc_last_label = $tokens_to_go[$i_terminal];
25884 return ( $terminal_type, $i_terminal, $i_block_leading_text,
25885 $block_leading_text, $block_line_count, $block_label );
25888 sub make_else_csc_text {
25890 # create additional -csc text for an 'else' and optionally 'elsif',
25891 # depending on the value of switch
25893 # = 0 add 'if' text to trailing else
25894 # = 1 same as 0 plus:
25895 # add 'if' to 'elsif's if can fit in line length
25896 # add last 'elsif' to trailing else if can fit in one line
25897 # = 2 same as 1 but do not check if exceed line length
25899 # $rif_elsif_text = a reference to a list of all previous closing
25900 # side comments created for this if block
25902 my ( $self, $i_terminal, $block_type, $block_leading_text,
25905 my $csc_text = $block_leading_text;
25907 if ( $block_type eq 'elsif'
25908 && $rOpts_closing_side_comment_else_flag == 0 )
25913 my $count = @{$rif_elsif_text};
25914 return $csc_text unless ($count);
25916 my $if_text = '[ if' . $rif_elsif_text->[0];
25918 # always show the leading 'if' text on 'else'
25919 if ( $block_type eq 'else' ) {
25920 $csc_text .= $if_text;
25923 # see if that's all
25924 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
25928 my $last_elsif_text = "";
25929 if ( $count > 1 ) {
25930 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
25931 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
25934 # tentatively append one more item
25935 my $saved_text = $csc_text;
25936 if ( $block_type eq 'else' ) {
25937 $csc_text .= $last_elsif_text;
25940 $csc_text .= ' ' . $if_text;
25943 # all done if no length checks requested
25944 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
25948 # undo it if line length exceeded
25950 length($csc_text) +
25951 length($block_type) +
25952 length( $rOpts->{'closing-side-comment-prefix'} ) +
25953 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
25955 $length > $maximum_line_length_at_level[$leading_block_text_level] )
25957 $csc_text = $saved_text;
25961 } ## end closure accumulate_csc_text
25963 { ## begin closure balance_csc_text
25965 # Some additional routines for handling the --closing-side-comments option
25980 sub balance_csc_text {
25982 # Append characters to balance a closing side comment so that editors
25983 # such as vim can correctly jump through code.
25985 # input = ## end foreach my $foo ( sort { $b ...
25986 # output = ## end foreach my $foo ( sort { $b ...})
25988 # NOTE: This routine does not currently filter out structures within
25989 # quoted text because the bounce algorithms in text editors do not
25990 # necessarily do this either (a version of vim was checked and
25991 # did not do this).
25993 # Some complex examples which will cause trouble for some editors:
25994 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
25995 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
25996 # if ( $1 eq '{' ) {
25997 # test file test1/braces.pl has many such examples.
26001 # loop to examine characters one-by-one, RIGHT to LEFT and
26002 # build a balancing ending, LEFT to RIGHT.
26003 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
26005 my $char = substr( $csc, $pos, 1 );
26007 # ignore everything except structural characters
26008 next unless ( $matching_char{$char} );
26010 # pop most recently appended character
26011 my $top = chop($csc);
26013 # push it back plus the mate to the newest character
26014 # unless they balance each other.
26015 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
26018 # return the balanced string
26021 } ## end closure balance_csc_text
26023 sub add_closing_side_comment {
26025 my ( $self, $ri_first, $ri_last ) = @_;
26026 my $rLL = $self->[_rLL_];
26028 # add closing side comments after closing block braces if -csc used
26029 my ( $closing_side_comment, $cscw_block_comment );
26031 #---------------------------------------------------------------
26032 # Step 1: loop through all tokens of this line to accumulate
26033 # the text needed to create the closing side comments. Also see
26034 # how the line ends.
26035 #---------------------------------------------------------------
26037 my ( $terminal_type, $i_terminal, $i_block_leading_text,
26038 $block_leading_text, $block_line_count, $block_label )
26039 = $self->accumulate_csc_text();
26041 #---------------------------------------------------------------
26042 # Step 2: make the closing side comment if this ends a block
26043 #---------------------------------------------------------------
26044 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
26046 # if this line might end in a block closure..
26048 $terminal_type eq '}'
26050 # Fix 1 for c091, this is only for blocks
26051 && $block_type_to_go[$i_terminal]
26056 # the block is long enough
26057 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
26059 # or there is an existing comment to check
26060 || ( $have_side_comment
26061 && $rOpts->{'closing-side-comment-warnings'} )
26064 # .. and if this is one of the types of interest
26065 && $block_type_to_go[$i_terminal] =~
26066 /$closing_side_comment_list_pattern/
26068 # .. but not an anonymous sub
26069 # These are not normally of interest, and their closing braces are
26070 # often followed by commas or semicolons anyway. This also avoids
26071 # possible erratic output due to line numbering inconsistencies
26072 # in the cases where their closing braces terminate a line.
26073 && $block_type_to_go[$i_terminal] ne 'sub'
26075 # ..and the corresponding opening brace must is not in this batch
26076 # (because we do not need to tag one-line blocks, although this
26077 # should also be caught with a positive -csci value)
26078 && $mate_index_to_go[$i_terminal] < 0
26083 # this is the last token (line doesn't have a side comment)
26084 !$have_side_comment
26086 # or the old side comment is a closing side comment
26087 || $tokens_to_go[$max_index_to_go] =~
26088 /$closing_side_comment_prefix_pattern/
26093 # then make the closing side comment text
26094 if ($block_label) { $block_label .= " " }
26096 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
26098 # append any extra descriptive text collected above
26099 if ( $i_block_leading_text == $i_terminal ) {
26100 $token .= $block_leading_text;
26103 $token = balance_csc_text($token)
26104 if $rOpts->{'closing-side-comments-balanced'};
26106 $token =~ s/\s*$//; # trim any trailing whitespace
26108 # handle case of existing closing side comment
26109 if ($have_side_comment) {
26111 # warn if requested and tokens differ significantly
26112 if ( $rOpts->{'closing-side-comment-warnings'} ) {
26113 my $old_csc = $tokens_to_go[$max_index_to_go];
26114 my $new_csc = $token;
26115 $new_csc =~ s/\s+//g; # trim all whitespace
26116 $old_csc =~ s/\s+//g; # trim all whitespace
26117 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
26118 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
26119 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
26120 my $new_trailing_dots = $1;
26121 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
26123 # Patch to handle multiple closing side comments at
26124 # else and elsif's. These have become too complicated
26125 # to check, so if we see an indication of
26126 # '[ if' or '[ # elsif', then assume they were made
26128 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
26129 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
26131 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
26132 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
26135 # if old comment is contained in new comment,
26136 # only compare the common part.
26137 if ( length($new_csc) > length($old_csc) ) {
26138 $new_csc = substr( $new_csc, 0, length($old_csc) );
26141 # if the new comment is shorter and has been limited,
26142 # only compare the common part.
26143 if ( length($new_csc) < length($old_csc)
26144 && $new_trailing_dots )
26146 $old_csc = substr( $old_csc, 0, length($new_csc) );
26149 # any remaining difference?
26150 if ( $new_csc ne $old_csc ) {
26152 # just leave the old comment if we are below the threshold
26153 # for creating side comments
26154 if ( $block_line_count <
26155 $rOpts->{'closing-side-comment-interval'} )
26160 # otherwise we'll make a note of it
26164 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
26167 # save the old side comment in a new trailing block
26169 my $timestamp = "";
26170 if ( $rOpts->{'timestamp'} ) {
26171 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
26174 $timestamp = "$year-$month-$day";
26176 $cscw_block_comment =
26177 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
26178 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
26183 # No differences.. we can safely delete old comment if we
26184 # are below the threshold
26185 if ( $block_line_count <
26186 $rOpts->{'closing-side-comment-interval'} )
26188 # Since the line breaks have already been set, we have
26189 # to remove the token from the _to_go array and also
26190 # from the line range (this fixes issue c081).
26191 # Note that we can only get here if -cscw has been set
26192 # because otherwise the old comment is already deleted.
26194 my $ibeg = $ri_first->[-1];
26195 my $iend = $ri_last->[-1];
26197 && $iend == $max_index_to_go
26198 && $types_to_go[$max_index_to_go] eq '#' )
26201 $max_index_to_go--;
26203 && $types_to_go[$max_index_to_go] eq 'b' )
26206 $max_index_to_go--;
26208 $ri_last->[-1] = $iend;
26214 # switch to the new csc (unless we deleted it!)
26217 my $len_tok = length($token); # NOTE: length no longer important
26219 $len_tok - $token_lengths_to_go[$max_index_to_go];
26221 $tokens_to_go[$max_index_to_go] = $token;
26222 $token_lengths_to_go[$max_index_to_go] = $len_tok;
26223 my $K = $K_to_go[$max_index_to_go];
26224 $rLL->[$K]->[_TOKEN_] = $token;
26225 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
26226 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
26230 # handle case of NO existing closing side comment
26233 # To avoid inserting a new token in the token arrays, we
26234 # will just return the new side comment so that it can be
26235 # inserted just before it is needed in the call to the
26236 # vertical aligner.
26237 $closing_side_comment = $token;
26240 return ( $closing_side_comment, $cscw_block_comment );
26243 ############################
26244 # CODE SECTION 15: Summarize
26245 ############################
26249 # This is the last routine called when a file is formatted.
26250 # Flush buffer and write any informative messages
26254 my $file_writer_object = $self->[_file_writer_object_];
26255 $file_writer_object->decrement_output_line_number()
26256 ; # fix up line number since it was incremented
26257 we_are_at_the_last_line();
26259 my $max_depth = $self->[_maximum_BLOCK_level_];
26260 my $at_line = $self->[_maximum_BLOCK_level_at_line_];
26261 write_logfile_entry(
26262 "Maximum leading structural depth is $max_depth in input at line $at_line\n"
26265 my $added_semicolon_count = $self->[_added_semicolon_count_];
26266 my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
26267 my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
26269 if ( $added_semicolon_count > 0 ) {
26270 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
26272 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
26273 write_logfile_entry("$added_semicolon_count $what added:\n");
26274 write_logfile_entry(
26275 " $first at input line $first_added_semicolon_at\n");
26277 if ( $added_semicolon_count > 1 ) {
26278 write_logfile_entry(
26279 " Last at input line $last_added_semicolon_at\n");
26281 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
26282 write_logfile_entry("\n");
26285 my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
26286 my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
26287 my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
26288 if ( $deleted_semicolon_count > 0 ) {
26289 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
26291 ( $deleted_semicolon_count > 1 )
26292 ? "semicolons were"
26294 write_logfile_entry(
26295 "$deleted_semicolon_count unnecessary $what deleted:\n");
26296 write_logfile_entry(
26297 " $first at input line $first_deleted_semicolon_at\n");
26299 if ( $deleted_semicolon_count > 1 ) {
26300 write_logfile_entry(
26301 " Last at input line $last_deleted_semicolon_at\n");
26303 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
26304 write_logfile_entry("\n");
26307 my $embedded_tab_count = $self->[_embedded_tab_count_];
26308 my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
26309 my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
26310 if ( $embedded_tab_count > 0 ) {
26311 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
26313 ( $embedded_tab_count > 1 )
26314 ? "quotes or patterns"
26315 : "quote or pattern";
26316 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
26317 write_logfile_entry(
26318 "This means the display of this script could vary with device or software\n"
26320 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
26322 if ( $embedded_tab_count > 1 ) {
26323 write_logfile_entry(
26324 " Last at input line $last_embedded_tab_at\n");
26326 write_logfile_entry("\n");
26329 my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
26330 my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
26331 my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
26332 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
26334 if ($first_tabbing_disagreement) {
26335 write_logfile_entry(
26336 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
26340 my $first_btd = $self->[_first_brace_tabbing_disagreement_];
26343 "First closing brace indentation disagreement started at input line $first_btd\n";
26344 write_logfile_entry($msg);
26346 # leave a hint in the .ERR file if there was a brace error
26347 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26350 my $in_btd = $self->[_in_brace_tabbing_disagreement_];
26353 "Ending with brace indentation disagreement which started at input line $in_btd\n";
26354 write_logfile_entry($msg);
26356 # leave a hint in the .ERR file if there was a brace error
26357 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26360 if ($in_tabbing_disagreement) {
26362 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
26363 write_logfile_entry($msg);
26367 if ($last_tabbing_disagreement) {
26369 write_logfile_entry(
26370 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
26374 write_logfile_entry("No indentation disagreement seen\n");
26378 if ($first_tabbing_disagreement) {
26379 write_logfile_entry(
26380 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
26383 write_logfile_entry("\n");
26385 my $vao = $self->[_vertical_aligner_object_];
26386 $vao->report_anything_unusual();
26388 $file_writer_object->report_line_length_errors();
26390 $self->[_converged_] = $file_writer_object->get_convergence_check()
26391 || $rOpts->{'indent-only'};
26396 } ## end package Perl::Tidy::Formatter