1 #####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 #####################################################################
9 # CODE SECTION 1: Preliminary code, global definitions and sub new
11 # CODE SECTION 2: Some Basic Utilities
12 # CODE SECTION 3: Check and process options
14 # CODE SECTION 4: Receive lines from the tokenizer
16 # CODE SECTION 5: Pre-process the entire file
17 # sub finish_formatting
18 # CODE SECTION 6: Process line-by-line
19 # sub process_all_lines
20 # CODE SECTION 7: Process lines of code
21 # process_line_of_CODE
22 # CODE SECTION 8: Utilities for setting breakpoints
23 # sub set_forced_breakpoint
24 # CODE SECTION 9: Process batches of code
25 # sub grind_batch_of_CODE
26 # CODE SECTION 10: Code to break long statments
27 # sub set_continuation_breaks
28 # CODE SECTION 11: Code to break long lists
30 # CODE SECTION 12: Code for setting indentation
31 # CODE SECTION 13: Preparing batches for vertical alignment
32 # sub send_lines_to_vertical_aligner
33 # CODE SECTION 14: Code for creating closing side comments
34 # sub add_closing_side_comment
35 # CODE SECTION 15: Summarize
38 #######################################################################
39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
40 #######################################################################
42 package Perl::Tidy::Formatter;
46 # this can be turned on for extra checking during development
47 use constant DEVEL_MODE => 0;
49 { #<<< A non-indenting brace to contain all lexical variables
52 our $VERSION = '20210717';
54 # The Tokenizer will be loaded with the Formatter
55 ##use Perl::Tidy::Tokenizer; # for is_keyword()
59 # Catch any undefined sub calls so that we are sure to get
60 # some diagnostic information. This sub should never be called
61 # except for a programming error.
63 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
64 my ( $pkg, $fname, $lno ) = caller();
65 my $my_package = __PACKAGE__;
67 ======================================================================
68 Error detected in package '$my_package', version $VERSION
69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
70 Called from package: '$pkg'
71 Called from File '$fname' at line '$lno'
72 This error is probably due to a recent programming change
73 ======================================================================
80 $self->_decrement_count();
86 Perl::Tidy::Die($msg);
87 croak "unexpected return from Perl::Tidy::Die";
92 Perl::Tidy::Warn($msg);
99 # This routine is called for errors that really should not occur
100 # except if there has been a bug introduced by a recent program change.
101 # Please add comments at calls to Fault to explain why the call
102 # should not occur, and where to look to fix it.
103 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
104 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
105 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
106 my $input_stream_name = get_input_stream_name();
109 ==============================================================================
110 While operating on input stream with name: '$input_stream_name'
111 A fault was detected at line $line0 of sub '$subroutine1'
113 which was called from line $line1 of sub '$subroutine2'
115 This is probably an error introduced by a recent programming change.
116 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
117 ==============================================================================
120 # We shouldn't get here, but this return is to keep Perl-Critic from
127 Perl::Tidy::Exit($msg);
128 croak "unexpected return from Perl::Tidy::Exit";
131 # Global variables ...
134 ##################################################################
135 # Section 1: Global variables which are either always constant or
136 # are constant after being configured by user-supplied
137 # parameters. They remain constant as a file is being processed.
138 ##################################################################
140 # user parameters and shortcuts
142 $rOpts_closing_side_comment_maximum_text,
143 $rOpts_continuation_indentation,
144 $rOpts_indent_columns,
145 $rOpts_line_up_parentheses,
146 $rOpts_maximum_line_length,
147 $rOpts_variable_maximum_line_length,
148 $rOpts_block_brace_tightness,
149 $rOpts_block_brace_vertical_tightness,
150 $rOpts_stack_closing_block_brace,
151 $rOpts_maximum_consecutive_blank_lines,
155 $rOpts_break_at_old_comma_breakpoints,
156 $rOpts_ignore_old_breakpoints,
158 $rOpts_keep_interior_semicolons,
159 $rOpts_comma_arrow_breakpoints,
160 $rOpts_maximum_fields_per_table,
161 $rOpts_one_line_block_semicolons,
162 $rOpts_break_at_old_semicolon_breakpoints,
164 $rOpts_tee_side_comments,
165 $rOpts_tee_block_comments,
167 $rOpts_delete_side_comments,
168 $rOpts_delete_closing_side_comments,
169 $rOpts_format_skipping,
171 $rOpts_static_block_comments,
173 $rOpts_add_whitespace,
174 $rOpts_delete_old_whitespace,
175 $rOpts_freeze_whitespace,
176 $rOpts_function_paren_vertical_alignment,
177 $rOpts_whitespace_cycle,
178 $rOpts_ignore_side_comment_lengths,
180 $rOpts_break_at_old_attribute_breakpoints,
181 $rOpts_break_at_old_keyword_breakpoints,
182 $rOpts_break_at_old_logical_breakpoints,
183 $rOpts_break_at_old_ternary_breakpoints,
184 $rOpts_short_concatenation_item_length,
185 $rOpts_closing_side_comment_else_flag,
186 $rOpts_fuzzy_line_length,
188 # Static hashes initialized in a BEGIN block
190 %is_keyword_returning_list,
191 %is_if_unless_and_or_last_next_redo_return,
192 %is_if_elsif_else_unless_while_until_for_foreach,
193 %is_if_unless_while_until_for,
194 %is_last_next_redo_return,
196 %is_sort_map_grep_eval,
200 %is_block_without_semicolon,
201 %ok_to_add_semicolon_for_block_type,
206 %is_equal_or_fat_comma,
209 %is_opening_sequence_token,
210 %is_closing_sequence_token,
211 %is_container_label_type,
215 # Initialized in check_options. These are constants and could
216 # just as well be initialized in a BEGIN block.
218 %is_if_brace_follower,
219 %is_else_brace_follower,
220 %is_anon_sub_brace_follower,
221 %is_anon_sub_1_brace_follower,
222 %is_other_brace_follower,
224 # Initialized in sub initialize_whitespace_hashes;
225 # Some can be modified according to user parameters.
230 # Configured in sub initialize_bond_strength_hashes
231 %right_bond_strength,
234 # Hashes for -kbb=s and -kba=s
235 %keep_break_before_type,
236 %keep_break_after_type,
238 # Initialized in check_options, modified by prepare_cuddled_block_types:
239 %want_one_line_block,
240 %is_braces_left_exclude_block,
242 # Initialized in sub prepare_cuddled_block_types
243 $rcuddled_block_types,
245 # Initialized and configured in check_optioms
247 %keyword_paren_inner_tightness,
251 %break_before_container_types,
252 %container_indentation_options,
254 %space_after_keyword,
259 %opening_vertical_tightness,
260 %closing_vertical_tightness,
261 %closing_token_indentation,
262 $some_closing_token_indentation,
264 %opening_token_right,
265 %stack_opening_token,
266 %stack_closing_token,
268 %weld_nested_exclusion_rules,
269 %line_up_parentheses_exclusion_rules,
271 # regex patterns for text identification.
272 # Most are initialized in a sub make_**_pattern during configuration.
273 # Most can be configured by user parameters.
277 $static_block_comment_pattern,
278 $static_side_comment_pattern,
279 $format_skipping_pattern_begin,
280 $format_skipping_pattern_end,
281 $non_indenting_brace_pattern,
283 $block_brace_vertical_tightness_pattern,
284 $blank_lines_after_opening_block_pattern,
285 $blank_lines_before_closing_block_pattern,
286 $keyword_group_list_pattern,
287 $keyword_group_list_comment_pattern,
288 $closing_side_comment_prefix_pattern,
289 $closing_side_comment_list_pattern,
291 # Table to efficiently find indentation and max line length
293 @maximum_line_length_at_level,
294 @maximum_text_length_at_level,
296 # Total number of sequence items in a weld, for quick checks
299 #########################################################
300 # Section 2: Work arrays for the current batch of tokens.
301 #########################################################
303 # These are re-initialized for each batch of code
304 # in sub initialize_batch_variables.
307 @type_sequence_to_go,
308 @bond_strength_to_go,
309 @forced_breakpoint_to_go,
310 @token_lengths_to_go,
311 @summed_lengths_to_go,
313 @leading_spaces_to_go,
314 @reduced_spaces_to_go,
317 @nesting_depth_to_go,
319 @old_breakpoint_to_go,
331 # Initialize constants...
333 # Array index names for token variables
336 _BLOCK_TYPE_ => $i++,
338 _CUMULATIVE_LENGTH_ => $i++,
339 _LINE_INDEX_ => $i++,
340 _KNEXT_SEQ_ITEM_ => $i++,
344 _TOKEN_LENGTH_ => $i++,
346 _TYPE_SEQUENCE_ => $i++,
348 # Number of token variables; must be last in list:
352 # Array index names for $self (which is an array ref)
356 _rlines_new_ => $i++,
359 _K_opening_container_ => $i++,
360 _K_closing_container_ => $i++,
361 _K_opening_ternary_ => $i++,
362 _K_closing_ternary_ => $i++,
363 _K_first_seq_item_ => $i++,
364 _rK_phantom_semicolons_ => $i++,
365 _rtype_count_by_seqno_ => $i++,
366 _ris_function_call_paren_ => $i++,
367 _rlec_count_by_seqno_ => $i++,
368 _ris_broken_container_ => $i++,
369 _ris_permanently_broken_ => $i++,
371 _rhas_broken_list_ => $i++,
372 _rhas_broken_list_with_lec_ => $i++,
373 _rhas_code_block_ => $i++,
374 _rhas_broken_code_block_ => $i++,
375 _rhas_ternary_ => $i++,
376 _ris_excluded_lp_container_ => $i++,
377 _rwant_reduced_ci_ => $i++,
378 _rno_xci_by_seqno_ => $i++,
379 _ris_bli_container_ => $i++,
380 _rparent_of_seqno_ => $i++,
381 _rchildren_of_seqno_ => $i++,
382 _ris_list_by_seqno_ => $i++,
383 _rbreak_container_ => $i++,
384 _rshort_nested_ => $i++,
385 _length_function_ => $i++,
386 _is_encoded_data_ => $i++,
388 _sink_object_ => $i++,
389 _file_writer_object_ => $i++,
390 _vertical_aligner_object_ => $i++,
391 _logger_object_ => $i++,
392 _radjusted_levels_ => $i++,
393 _this_batch_ => $i++,
395 _last_output_short_opening_token_ => $i++,
397 _last_line_leading_type_ => $i++,
398 _last_line_leading_level_ => $i++,
399 _last_last_line_leading_level_ => $i++,
401 _added_semicolon_count_ => $i++,
402 _first_added_semicolon_at_ => $i++,
403 _last_added_semicolon_at_ => $i++,
405 _deleted_semicolon_count_ => $i++,
406 _first_deleted_semicolon_at_ => $i++,
407 _last_deleted_semicolon_at_ => $i++,
409 _embedded_tab_count_ => $i++,
410 _first_embedded_tab_at_ => $i++,
411 _last_embedded_tab_at_ => $i++,
413 _first_tabbing_disagreement_ => $i++,
414 _last_tabbing_disagreement_ => $i++,
415 _tabbing_disagreement_count_ => $i++,
416 _in_tabbing_disagreement_ => $i++,
417 _first_brace_tabbing_disagreement_ => $i++,
418 _in_brace_tabbing_disagreement_ => $i++,
420 _saw_VERSION_in_this_file_ => $i++,
421 _saw_END_or_DATA_ => $i++,
423 _rK_weld_left_ => $i++,
424 _rK_weld_right_ => $i++,
425 _rweld_len_right_at_K_ => $i++,
427 _rspecial_side_comment_type_ => $i++,
429 _rseqno_controlling_my_ci_ => $i++,
430 _ris_seqno_controlling_ci_ => $i++,
431 _save_logfile_ => $i++,
432 _maximum_level_ => $i++,
434 _rKrange_code_without_comments_ => $i++,
435 _rbreak_before_Kfirst_ => $i++,
436 _rbreak_after_Klast_ => $i++,
437 _rwant_container_open_ => $i++,
440 _rstarting_multiline_qw_seqno_by_K_ => $i++,
441 _rending_multiline_qw_seqno_by_K_ => $i++,
442 _rKrange_multiline_qw_by_seqno_ => $i++,
443 _rmultiline_qw_has_extra_level_ => $i++,
444 _rbreak_before_container_by_seqno_ => $i++,
445 _ris_essential_old_breakpoint_ => $i++,
446 _roverride_cab3_ => $i++,
447 _ris_assigned_structure_ => $i++,
450 # Array index names for _this_batch_ (in above list)
451 # So _this_batch_ is a sub-array of $self for
452 # holding the batches of tokens being processed.
455 _starting_in_quote_ => $i++,
456 _ending_in_quote_ => $i++,
457 _is_static_block_comment_ => $i++,
459 _do_not_pad_ => $i++,
461 _peak_batch_size_ => $i++,
462 _max_index_to_go_ => $i++,
464 _batch_count_ => $i++,
465 _rix_seqno_controlling_ci_ => $i++,
466 _batch_CODE_type_ => $i++,
469 # Sequence number assigned to the root of sequence tree.
470 # The minimum of the actual sequences numbers is 4, so we can use 1
471 use constant SEQ_ROOT => 1;
473 # Codes for insertion and deletion of blanks
474 use constant DELETE => 0;
475 use constant STABLE => 1;
476 use constant INSERT => 2;
479 use constant WS_YES => 1;
480 use constant WS_OPTIONAL => 0;
481 use constant WS_NO => -1;
483 # Token bond strengths.
484 use constant NO_BREAK => 10000;
485 use constant VERY_STRONG => 100;
486 use constant STRONG => 2.1;
487 use constant NOMINAL => 1.1;
488 use constant WEAK => 0.8;
489 use constant VERY_WEAK => 0.55;
491 # values for testing indexes in output array
492 use constant UNDEFINED_INDEX => -1;
494 # Maximum number of little messages; probably need not be changed.
495 use constant MAX_NAG_MESSAGES => 6;
497 # increment between sequence numbers for each type
498 # For example, ?: pairs might have numbers 7,11,15,...
499 use constant TYPE_SEQUENCE_INCREMENT => 4;
501 # Initialize constant hashes ...
505 = **= += *= &= <<= &&=
510 @is_assignment{@q} = (1) x scalar(@q);
520 @is_keyword_returning_list{@q} = (1) x scalar(@q);
522 @q = qw(is if unless and or err last next redo return);
523 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
525 # These block types may have text between the keyword and opening
526 # curly. Note: 'else' does not, but must be included to allow trailing
527 # if/elsif text to be appended.
528 # patch for SWITCH/CASE: added 'case' and 'when'
529 @q = qw(if elsif else unless while until for foreach case when catch);
530 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
533 @q = qw(if unless while until for);
534 @is_if_unless_while_until_for{@q} =
537 @q = qw(last next redo return);
538 @is_last_next_redo_return{@q} = (1) x scalar(@q);
540 @q = qw(sort map grep);
541 @is_sort_map_grep{@q} = (1) x scalar(@q);
543 @q = qw(sort map grep eval);
544 @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
547 @is_if_unless{@q} = (1) x scalar(@q);
550 @is_and_or{@q} = (1) x scalar(@q);
552 # Identify certain operators which often occur in chains.
553 # Note: the minus (-) causes a side effect of padding of the first line in
554 # something like this (by sub set_logical_padding):
555 # Checkbutton => 'Transmission checked',
556 # -variable => \$TRANS
557 # This usually improves appearance so it seems ok.
558 @q = qw(&& || and or : ? . + - * /);
559 @is_chain_operator{@q} = (1) x scalar(@q);
561 # Operators that the user can request break before or after.
562 # Note that some are keywords
563 @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
564 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
565 . : ? && || and or err xor
568 # We can remove semicolons after blocks preceded by these keywords
570 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
571 unless while until for foreach given when default);
572 @is_block_without_semicolon{@q} = (1) x scalar(@q);
574 # We will allow semicolons to be added within these block types
575 # as well as sub and package blocks.
577 # 1. Note that these keywords are omitted:
578 # switch case given when default sort map grep
579 # 2. It is also ok to add for sub and package blocks and a labeled block
580 # 3. But not okay for other perltidy types including:
582 # 4. Test files: blktype.t, blktype1.t, semicolon.t
584 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
585 unless do while until eval for foreach );
586 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
588 # 'L' is token for opening { at hash key
590 @is_opening_type{@q} = (1) x scalar(@q);
592 # 'R' is token for closing } at hash key
594 @is_closing_type{@q} = (1) x scalar(@q);
597 @is_opening_token{@q} = (1) x scalar(@q);
600 @is_closing_token{@q} = (1) x scalar(@q);
603 @is_opening_sequence_token{@q} = (1) x scalar(@q);
606 @is_closing_sequence_token{@q} = (1) x scalar(@q);
608 # a hash needed by sub scan_list for labeling containers
609 @q = qw( k => && || ? : . );
610 @is_container_label_type{@q} = (1) x scalar(@q);
612 # Braces -bbht etc must follow these. Note: experimentation with
613 # including a simple comma shows that it adds little and can lead
614 # to poor formatting in complex lists.
616 @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
620 @is_counted_type{@q} = (1) x scalar(@q);
622 # These block types can take ci. This is used by the -xci option.
623 # Note that the 'sub' in this list is an anonymous sub. To be more correct
624 # we could remove sub and use ASUB pattern to also handle a
625 # prototype/signature. But that would slow things down and would probably
627 @q = qw( do sub eval sort map grep );
628 @is_block_with_ci{@q} = (1) x scalar(@q);
632 { ## begin closure to count instanes
634 # methods to count instances
636 sub get_count { return $_count; }
637 sub _increment_count { return ++$_count }
638 sub _decrement_count { return --$_count }
639 } ## end closure to count instanes
643 my ( $class, @args ) = @_;
645 # we are given an object with a write_line() method to take lines
647 sink_object => undef,
648 diagnostics_object => undef,
649 logger_object => undef,
650 length_function => sub { return length( $_[0] ) },
651 is_encoded_data => "",
654 my %args = ( %defaults, @args );
656 my $length_function = $args{length_function};
657 my $is_encoded_data = $args{is_encoded_data};
658 my $fh_tee = $args{fh_tee};
659 my $logger_object = $args{logger_object};
660 my $diagnostics_object = $args{diagnostics_object};
662 # we create another object with a get_line() and peek_ahead() method
663 my $sink_object = $args{sink_object};
664 my $file_writer_object =
665 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
667 # initialize closure variables...
668 set_logger_object($logger_object);
669 set_diagnostics_object($diagnostics_object);
670 initialize_gnu_vars();
671 initialize_csc_vars();
672 initialize_scan_list();
673 initialize_saved_opening_indentation();
674 initialize_undo_ci();
675 initialize_process_line_of_CODE();
676 initialize_grind_batch_of_CODE();
677 initialize_adjusted_indentation();
678 initialize_postponed_breakpoint();
679 initialize_batch_variables();
680 initialize_forced_breakpoint_vars();
681 initialize_gnu_batch_vars();
682 initialize_write_line();
684 my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
686 file_writer_object => $file_writer_object,
687 logger_object => $logger_object,
688 diagnostics_object => $diagnostics_object,
689 length_function => $length_function
692 write_logfile_entry("\nStarting tokenization pass...\n");
694 if ( $rOpts->{'entab-leading-whitespace'} ) {
696 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
699 elsif ( $rOpts->{'tabs'} ) {
700 write_logfile_entry("Indentation will be with a tab character\n");
704 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
707 # Initialize the $self array reference.
708 # To add an item, first add a constant index in the BEGIN block above.
711 # Basic data structures...
712 $self->[_rlines_] = []; # = ref to array of lines of the file
713 $self->[_rlines_new_] = []; # = ref to array of output lines
715 # 'rLL' = reference to the liner array of all tokens in the file.
716 # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
717 # 'LL' stuck because it is easy to type.
719 $self->[_Klimit_] = undef; # = maximum K index for rLL.
721 # Arrays for quickly traversing the structures
722 $self->[_K_opening_container_] = {};
723 $self->[_K_closing_container_] = {};
724 $self->[_K_opening_ternary_] = {};
725 $self->[_K_closing_ternary_] = {};
726 $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
728 # Array of phantom semicolons, in case we ever need to undo them
729 $self->[_rK_phantom_semicolons_] = undef;
731 # Mostly list characteristics and processing flags
732 $self->[_rtype_count_by_seqno_] = {};
733 $self->[_ris_function_call_paren_] = {};
734 $self->[_rlec_count_by_seqno_] = {};
735 $self->[_ris_broken_container_] = {};
736 $self->[_ris_permanently_broken_] = {};
737 $self->[_rhas_list_] = {};
738 $self->[_rhas_broken_list_] = {};
739 $self->[_rhas_broken_list_with_lec_] = {};
740 $self->[_rhas_code_block_] = {};
741 $self->[_rhas_broken_code_block_] = {};
742 $self->[_rhas_ternary_] = {};
743 $self->[_ris_excluded_lp_container_] = {};
744 $self->[_rwant_reduced_ci_] = {};
745 $self->[_rno_xci_by_seqno_] = {};
746 $self->[_ris_bli_container_] = {};
747 $self->[_rparent_of_seqno_] = {};
748 $self->[_rchildren_of_seqno_] = {};
749 $self->[_ris_list_by_seqno_] = {};
751 $self->[_rbreak_container_] = {}; # prevent one-line blocks
752 $self->[_rshort_nested_] = {}; # blocks not forced open
753 $self->[_length_function_] = $length_function;
754 $self->[_is_encoded_data_] = $is_encoded_data;
757 $self->[_fh_tee_] = $fh_tee;
758 $self->[_sink_object_] = $sink_object;
759 $self->[_file_writer_object_] = $file_writer_object;
760 $self->[_vertical_aligner_object_] = $vertical_aligner_object;
761 $self->[_logger_object_] = $logger_object;
763 # Reference to the batch being processed
764 $self->[_this_batch_] = [];
766 # Memory of processed text...
767 $self->[_last_last_line_leading_level_] = 0;
768 $self->[_last_line_leading_level_] = 0;
769 $self->[_last_line_leading_type_] = '#';
770 $self->[_last_output_short_opening_token_] = 0;
771 $self->[_added_semicolon_count_] = 0;
772 $self->[_first_added_semicolon_at_] = 0;
773 $self->[_last_added_semicolon_at_] = 0;
774 $self->[_deleted_semicolon_count_] = 0;
775 $self->[_first_deleted_semicolon_at_] = 0;
776 $self->[_last_deleted_semicolon_at_] = 0;
777 $self->[_embedded_tab_count_] = 0;
778 $self->[_first_embedded_tab_at_] = 0;
779 $self->[_last_embedded_tab_at_] = 0;
780 $self->[_first_tabbing_disagreement_] = 0;
781 $self->[_last_tabbing_disagreement_] = 0;
782 $self->[_tabbing_disagreement_count_] = 0;
783 $self->[_in_tabbing_disagreement_] = 0;
784 $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
785 $self->[_saw_END_or_DATA_] = 0;
787 # Hashes related to container welding...
788 $self->[_radjusted_levels_] = [];
790 # Weld data structures
791 $self->[_rK_weld_left_] = {};
792 $self->[_rK_weld_right_] = {};
793 $self->[_rweld_len_right_at_K_] = {};
796 $self->[_rseqno_controlling_my_ci_] = {};
797 $self->[_ris_seqno_controlling_ci_] = {};
799 $self->[_rspecial_side_comment_type_] = {};
800 $self->[_maximum_level_] = 0;
802 $self->[_rKrange_code_without_comments_] = [];
803 $self->[_rbreak_before_Kfirst_] = {};
804 $self->[_rbreak_after_Klast_] = {};
805 $self->[_rwant_container_open_] = {};
806 $self->[_converged_] = 0;
809 $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
810 $self->[_rending_multiline_qw_seqno_by_K_] = {};
811 $self->[_rKrange_multiline_qw_by_seqno_] = {};
812 $self->[_rmultiline_qw_has_extra_level_] = {};
814 $self->[_rbreak_before_container_by_seqno_] = {};
815 $self->[_ris_essential_old_breakpoint_] = {};
816 $self->[_roverride_cab3_] = {};
817 $self->[_ris_assigned_structure_] = {};
819 # This flag will be updated later by a call to get_save_logfile()
820 $self->[_save_logfile_] = defined($logger_object);
824 # Safety check..this is not a class yet
825 if ( _increment_count() > 1 ) {
827 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
832 ######################################
833 # CODE SECTION 2: Some Basic Utilities
834 ######################################
836 { ## begin closure for logger routines
839 # Called once per file to initialize the logger object
840 sub set_logger_object {
841 $logger_object = shift;
845 sub get_logger_object {
846 return $logger_object;
849 sub get_input_stream_name {
850 my $input_stream_name = "";
851 if ($logger_object) {
852 $input_stream_name = $logger_object->get_input_stream_name();
854 return $input_stream_name;
857 # interface to Perl::Tidy::Logger routines
860 if ($logger_object) { $logger_object->warning($msg); }
866 if ($logger_object) {
867 $logger_object->complain($msg);
872 sub write_logfile_entry {
874 if ($logger_object) {
875 $logger_object->write_logfile_entry(@msg);
880 sub report_definite_bug {
881 if ($logger_object) {
882 $logger_object->report_definite_bug();
887 sub get_saw_brace_error {
888 if ($logger_object) {
889 return $logger_object->get_saw_brace_error();
894 sub we_are_at_the_last_line {
895 if ($logger_object) {
896 $logger_object->we_are_at_the_last_line();
901 } ## end closure for logger routines
903 { ## begin closure for diagnostics routines
904 my $diagnostics_object;
906 # Called once per file to initialize the diagnostics object
907 sub set_diagnostics_object {
908 $diagnostics_object = shift;
912 sub write_diagnostics {
914 if ($diagnostics_object) {
915 $diagnostics_object->write_diagnostics($msg);
919 } ## end closure for diagnostics routines
921 sub get_convergence_check {
923 return $self->[_converged_];
926 sub get_added_semicolon_count {
928 return $self->[_added_semicolon_count_];
931 sub get_output_line_number {
933 my $vao = $self->[_vertical_aligner_object_];
934 return $vao->get_output_line_number();
937 sub check_token_array {
940 # Check for errors in the array of tokens. This is only called now
941 # when the DEVEL_MODE flag is set, so this Fault will only occur
942 # during code development.
943 my $rLL = $self->[_rLL_];
944 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
945 my $nvars = @{ $rLL->[$KK] };
946 if ( $nvars != _NVARS ) {
948 my $type = $rLL->[$KK]->[_TYPE_];
949 $type = '*' unless defined($type);
951 # The number of variables per token node is _NVARS and was set when
952 # the array indexes were generated. So if the number of variables
953 # is different we have done something wrong, like not store all of
954 # them in sub 'write_line' when they were received from the
957 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
960 foreach my $var ( _TOKEN_, _TYPE_ ) {
961 if ( !defined( $rLL->[$KK]->[$var] ) ) {
962 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
964 # This is a simple check that each token has some basic
965 # variables. In other words, that there are no holes in the
966 # array of tokens. Sub 'write_line' pushes tokens into the
967 # $rLL array, so this should guarantee no gaps.
968 Fault("Undefined variable $var for K=$KK, line=$iline\n");
975 sub want_blank_line {
978 my $file_writer_object = $self->[_file_writer_object_];
979 $file_writer_object->want_blank_line();
983 sub write_unindented_line {
984 my ( $self, $line ) = @_;
986 my $file_writer_object = $self->[_file_writer_object_];
987 $file_writer_object->write_line($line);
991 sub consecutive_nonblank_lines {
993 my $file_writer_object = $self->[_file_writer_object_];
994 my $vao = $self->[_vertical_aligner_object_];
995 return $file_writer_object->get_consecutive_nonblank_lines() +
996 $vao->get_cached_line_count();
1001 # trim leading and trailing whitespace from a string
1010 my $max = shift @vals;
1011 for (@vals) { $max = $_ > $max ? $_ : $max }
1017 my $min = shift @vals;
1018 for (@vals) { $min = $_ < $min ? $_ : $min }
1024 # given a string containing words separated by whitespace,
1025 # return the list of words
1030 return split( /\s+/, $str );
1033 ###########################################
1034 # CODE SECTION 3: Check and process options
1035 ###########################################
1039 # This routine is called to check the user-supplied run parameters
1040 # and to configure the control hashes to them.
1043 initialize_whitespace_hashes();
1044 initialize_bond_strength_hashes();
1046 # Make needed regex patterns for matching text.
1047 # NOTE: sub_matching_patterns must be made first because later patterns use
1048 # them; see RT #133130.
1049 make_sub_matching_pattern();
1050 make_static_block_comment_pattern();
1051 make_static_side_comment_pattern();
1052 make_closing_side_comment_prefix();
1053 make_closing_side_comment_list_pattern();
1054 $format_skipping_pattern_begin =
1055 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1056 $format_skipping_pattern_end =
1057 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1058 make_non_indenting_brace_pattern();
1060 # If closing side comments ARE selected, then we can safely
1061 # delete old closing side comments unless closing side comment
1062 # warnings are requested. This is a good idea because it will
1063 # eliminate any old csc's which fall below the line count threshold.
1064 # We cannot do this if warnings are turned on, though, because we
1065 # might delete some text which has been added. So that must
1066 # be handled when comments are created. And we cannot do this
1067 # with -io because -csc will be skipped altogether.
1068 if ( $rOpts->{'closing-side-comments'} ) {
1069 if ( !$rOpts->{'closing-side-comment-warnings'}
1070 && !$rOpts->{'indent-only'} )
1072 $rOpts->{'delete-closing-side-comments'} = 1;
1076 # If closing side comments ARE NOT selected, but warnings ARE
1077 # selected and we ARE DELETING csc's, then we will pretend to be
1078 # adding with a huge interval. This will force the comments to be
1079 # generated for comparison with the old comments, but not added.
1080 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1081 if ( $rOpts->{'delete-closing-side-comments'} ) {
1082 $rOpts->{'delete-closing-side-comments'} = 0;
1083 $rOpts->{'closing-side-comments'} = 1;
1084 $rOpts->{'closing-side-comment-interval'} = 100000000;
1089 make_block_brace_vertical_tightness_pattern();
1090 make_blank_line_pattern();
1091 make_keyword_group_list_pattern();
1093 # Make initial list of desired one line block types
1094 # They will be modified by 'prepare_cuddled_block_types'
1095 %want_one_line_block = %is_sort_map_grep_eval;
1097 # Default is to exclude one-line block types from -bl formatting
1098 # FIXME: Eventually a flag should be added to modify this.
1099 %is_braces_left_exclude_block = %is_sort_map_grep_eval;
1101 prepare_cuddled_block_types();
1102 if ( $rOpts->{'dump-cuddled-block-list'} ) {
1103 dump_cuddled_block_list(*STDOUT);
1107 if ( $rOpts->{'line-up-parentheses'} ) {
1109 if ( $rOpts->{'indent-only'}
1110 || !$rOpts->{'add-newlines'}
1111 || !$rOpts->{'delete-old-newlines'} )
1114 -----------------------------------------------------------------------
1115 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1117 The -lp indentation logic requires that perltidy be able to coordinate
1118 arbitrarily large numbers of line breakpoints. This isn't possible
1120 -----------------------------------------------------------------------
1122 $rOpts->{'line-up-parentheses'} = 0;
1125 if ( $rOpts->{'whitespace-cycle'} ) {
1127 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1129 $rOpts->{'whitespace-cycle'} = 0;
1133 # At present, tabs are not compatible with the line-up-parentheses style
1134 # (it would be possible to entab the total leading whitespace
1135 # just prior to writing the line, if desired).
1136 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1138 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
1140 $rOpts->{'tabs'} = 0;
1143 # Likewise, tabs are not compatible with outdenting..
1144 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1146 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1148 $rOpts->{'tabs'} = 0;
1151 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1153 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
1155 $rOpts->{'tabs'} = 0;
1158 if ( !$rOpts->{'space-for-semicolon'} ) {
1159 $want_left_space{'f'} = -1;
1162 if ( $rOpts->{'space-terminal-semicolon'} ) {
1163 $want_left_space{';'} = 1;
1166 # We should put an upper bound on any -sil=n value. Otherwise enormous
1167 # files could be created by mistake.
1168 for ( $rOpts->{'starting-indentation-level'} ) {
1169 if ( $_ && $_ > 100 ) {
1171 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1177 # implement outdenting preferences for keywords
1178 %outdent_keyword = ();
1179 my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1181 @okw = qw(next last redo goto return); # defaults
1184 # FUTURE: if not a keyword, assume that it is an identifier
1186 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1187 $outdent_keyword{$_} = 1;
1190 Warn("ignoring '$_' in -okwl list; not a perl keyword");
1194 # setup hash for -kpit option
1195 %keyword_paren_inner_tightness = ();
1196 my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1197 if ( defined($kpit_value) && $kpit_value != 1 ) {
1199 split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1201 @kpit = qw(if elsif unless while until for foreach); # defaults
1204 # we will allow keywords and user-defined identifiers
1206 $keyword_paren_inner_tightness{$_} = $kpit_value;
1210 # implement user whitespace preferences
1211 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1212 @want_left_space{@q} = (1) x scalar(@q);
1215 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1216 @want_right_space{@q} = (1) x scalar(@q);
1219 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1220 @want_left_space{@q} = (-1) x scalar(@q);
1223 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1224 @want_right_space{@q} = (-1) x scalar(@q);
1226 if ( $rOpts->{'dump-want-left-space'} ) {
1227 dump_want_left_space(*STDOUT);
1231 if ( $rOpts->{'dump-want-right-space'} ) {
1232 dump_want_right_space(*STDOUT);
1236 # default keywords for which space is introduced before an opening paren
1237 # (at present, including them messes up vertical alignment)
1238 my @sak = qw(my local our and or xor err eq ne if else elsif until
1239 unless while for foreach return switch case given when catch);
1240 %space_after_keyword = map { $_ => 1 } @sak;
1242 # first remove any or all of these if desired
1243 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
1245 # -nsak='*' selects all the above keywords
1246 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
1247 @space_after_keyword{@q} = (0) x scalar(@q);
1250 # then allow user to add to these defaults
1251 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
1252 @space_after_keyword{@q} = (1) x scalar(@q);
1255 # implement user break preferences
1256 my $break_after = sub {
1258 foreach my $tok (@toks) {
1259 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
1260 my $lbs = $left_bond_strength{$tok};
1261 my $rbs = $right_bond_strength{$tok};
1262 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
1263 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1269 my $break_before = sub {
1271 foreach my $tok (@toks) {
1272 my $lbs = $left_bond_strength{$tok};
1273 my $rbs = $right_bond_strength{$tok};
1274 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
1275 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1281 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
1282 $break_before->(@all_operators)
1283 if ( $rOpts->{'break-before-all-operators'} );
1285 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
1286 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
1288 # make note if breaks are before certain key types
1289 %want_break_before = ();
1290 foreach my $tok ( @all_operators, ',' ) {
1291 $want_break_before{$tok} =
1292 $left_bond_strength{$tok} < $right_bond_strength{$tok};
1295 # Coordinate ?/: breaks, which must be similar
1296 if ( !$want_break_before{':'} ) {
1297 $want_break_before{'?'} = $want_break_before{':'};
1298 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
1299 $left_bond_strength{'?'} = NO_BREAK;
1302 # Only make a hash entry for the next parameters if values are defined.
1303 # That allows a quick check to be made later.
1304 %break_before_container_types = ();
1305 for ( $rOpts->{'break-before-hash-brace'} ) {
1306 $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
1308 for ( $rOpts->{'break-before-square-bracket'} ) {
1309 $break_before_container_types{'['} = $_ if $_ && $_ > 0;
1311 for ( $rOpts->{'break-before-paren'} ) {
1312 $break_before_container_types{'('} = $_ if $_ && $_ > 0;
1315 %container_indentation_options = ();
1317 [ 'break-before-hash-brace-and-indent', '{' ],
1318 [ 'break-before-square-bracket-and-indent', '[' ],
1319 [ 'break-before-paren-and-indent', '(' ],
1322 my ( $key, $tok ) = @{$pair};
1323 my $opt = $rOpts->{$key};
1324 if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1327 # (1) -lp is not compatable with opt=2, silently set to opt=0
1328 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1330 if ( $rOpts->{'line-up-parentheses'}
1331 || $rOpts->{'indent-columns'} ==
1332 $rOpts->{'continuation-indentation'} )
1337 $container_indentation_options{$tok} = $opt;
1341 # Define here tokens which may follow the closing brace of a do statement
1342 # on the same line, as in:
1343 # } while ( $something);
1344 my @dof = qw(until while unless if ; : );
1346 @is_do_follower{@dof} = (1) x scalar(@dof);
1348 # What tokens may follow the closing brace of an if or elsif block?
1349 # Not used. Previously used for cuddled else, but no longer needed.
1350 %is_if_brace_follower = ();
1352 # nothing can follow the closing curly of an else { } block:
1353 %is_else_brace_follower = ();
1355 # what can follow a multi-line anonymous sub definition closing curly:
1356 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
1358 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
1360 # what can follow a one-line anonymous sub closing curly:
1361 # one-line anonymous subs also have ']' here...
1362 # see tk3.t and PP.pm
1363 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
1365 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
1367 # What can follow a closing curly of a block
1368 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
1369 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
1370 my @obf = qw# ; : => or and && || ) #;
1372 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
1374 $right_bond_strength{'{'} = WEAK;
1375 $left_bond_strength{'{'} = VERY_STRONG;
1377 # make -l=0 equal to -l=infinite
1378 if ( !$rOpts->{'maximum-line-length'} ) {
1379 $rOpts->{'maximum-line-length'} = 1000000;
1382 # make -lbl=0 equal to -lbl=infinite
1383 if ( !$rOpts->{'long-block-line-count'} ) {
1384 $rOpts->{'long-block-line-count'} = 1000000;
1387 my $ole = $rOpts->{'output-line-ending'};
1396 # Patch for RT #99514, a memoization issue.
1397 # Normally, the user enters one of 'dos', 'win', etc, and we change the
1398 # value in the options parameter to be the corresponding line ending
1399 # character. But, if we are using memoization, on later passes through
1400 # here the option parameter will already have the desired ending
1401 # character rather than the keyword 'dos', 'win', etc. So
1402 # we must check to see if conversion has already been done and, if so,
1403 # bypass the conversion step.
1404 my %endings_inverted = (
1405 "\015\012" => 'dos',
1406 "\015\012" => 'win',
1411 if ( defined( $endings_inverted{$ole} ) ) {
1413 # we already have valid line ending, nothing more to do
1417 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
1418 my $str = join " ", keys %endings;
1420 Unrecognized line ending '$ole'; expecting one of: $str
1423 if ( $rOpts->{'preserve-line-endings'} ) {
1424 Warn("Ignoring -ple; conflicts with -ole\n");
1425 $rOpts->{'preserve-line-endings'} = undef;
1430 # hashes used to simplify setting whitespace
1432 '{' => $rOpts->{'brace-tightness'},
1433 '}' => $rOpts->{'brace-tightness'},
1434 '(' => $rOpts->{'paren-tightness'},
1435 ')' => $rOpts->{'paren-tightness'},
1436 '[' => $rOpts->{'square-bracket-tightness'},
1437 ']' => $rOpts->{'square-bracket-tightness'},
1446 # note any requested old line breaks to keep
1447 %keep_break_before_type = ();
1448 %keep_break_after_type = ();
1449 if ( !$rOpts->{'ignore-old-breakpoints'} ) {
1451 # FIXME: could check for valid types here.
1452 # Invalid types are harmless but probably not intended.
1454 @types = ( split_words( $rOpts->{'keep-old-breakpoints-before'} ) );
1455 @keep_break_before_type{@types} = (1) x scalar(@types);
1456 @types = ( split_words( $rOpts->{'keep-old-breakpoints-after'} ) );
1457 @keep_break_after_type{@types} = (1) x scalar(@types);
1460 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1461 Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
1463 $rOpts->{'break-at-old-method-breakpoints'} = 0;
1465 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1466 Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
1468 $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1470 if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1471 Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
1473 $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1475 if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1476 Warn("Conflicting parameters: -iob and -kbb; -kbb will be ignored\n"
1478 $rOpts->{'keep-old-breakpoints-before'} = "";
1480 if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1481 Warn("Conflicting parameters: -iob and -kba; -kba will be ignored\n"
1483 $rOpts->{'keep-old-breakpoints-after'} = "";
1486 # Note: These additional parameters are made inactive by -iob.
1487 # They are silently turned off here because they are on by default.
1488 # We would generate unexpected warnings if we issued a warning.
1489 $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
1490 $rOpts->{'break-at-old-logical-breakpoints'} = 0;
1491 $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
1492 $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1495 #############################################################
1496 # Make global vars for frequently used options for efficiency
1497 #############################################################
1499 $rOpts_closing_side_comment_maximum_text =
1500 $rOpts->{'closing-side-comment-maximum-text'};
1501 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
1502 $rOpts_indent_columns = $rOpts->{'indent-columns'};
1503 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
1504 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
1505 $rOpts_variable_maximum_line_length =
1506 $rOpts->{'variable-maximum-line-length'};
1507 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
1508 $rOpts_block_brace_vertical_tightness =
1509 $rOpts->{'block-brace-vertical-tightness'};
1510 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
1511 $rOpts_maximum_consecutive_blank_lines =
1512 $rOpts->{'maximum-consecutive-blank-lines'};
1513 $rOpts_recombine = $rOpts->{'recombine'};
1514 $rOpts_add_newlines = $rOpts->{'add-newlines'};
1515 $rOpts_break_at_old_comma_breakpoints =
1516 $rOpts->{'break-at-old-comma-breakpoints'};
1517 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
1518 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
1519 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
1520 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
1521 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
1522 $rOpts_break_at_old_semicolon_breakpoints =
1523 $rOpts->{'break-at-old-semicolon-breakpoints'};
1525 $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
1526 $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
1527 $rOpts_tee_pod = $rOpts->{'tee-pod'};
1528 $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
1529 $rOpts_delete_closing_side_comments =
1530 $rOpts->{'delete-closing-side-comments'};
1531 $rOpts_format_skipping = $rOpts->{'format-skipping'};
1532 $rOpts_indent_only = $rOpts->{'indent-only'};
1533 $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
1535 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
1536 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
1537 $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
1539 $rOpts_function_paren_vertical_alignment =
1540 $rOpts->{'function-paren-vertical-alignment'};
1541 $rOpts_ignore_side_comment_lengths =
1542 $rOpts->{'ignore-side-comment-lengths'};
1544 $rOpts_break_at_old_attribute_breakpoints =
1545 $rOpts->{'break-at-old-attribute-breakpoints'};
1546 $rOpts_break_at_old_keyword_breakpoints =
1547 $rOpts->{'break-at-old-keyword-breakpoints'};
1548 $rOpts_break_at_old_logical_breakpoints =
1549 $rOpts->{'break-at-old-logical-breakpoints'};
1550 $rOpts_break_at_old_ternary_breakpoints =
1551 $rOpts->{'break-at-old-ternary-breakpoints'};
1552 $rOpts_short_concatenation_item_length =
1553 $rOpts->{'short-concatenation-item-length'};
1554 $rOpts_closing_side_comment_else_flag =
1555 $rOpts->{'closing-side-comment-else-flag'};
1556 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
1558 # Note that both opening and closing tokens can access the opening
1559 # and closing flags of their container types.
1560 %opening_vertical_tightness = (
1561 '(' => $rOpts->{'paren-vertical-tightness'},
1562 '{' => $rOpts->{'brace-vertical-tightness'},
1563 '[' => $rOpts->{'square-bracket-vertical-tightness'},
1564 ')' => $rOpts->{'paren-vertical-tightness'},
1565 '}' => $rOpts->{'brace-vertical-tightness'},
1566 ']' => $rOpts->{'square-bracket-vertical-tightness'},
1569 %closing_vertical_tightness = (
1570 '(' => $rOpts->{'paren-vertical-tightness-closing'},
1571 '{' => $rOpts->{'brace-vertical-tightness-closing'},
1572 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1573 ')' => $rOpts->{'paren-vertical-tightness-closing'},
1574 '}' => $rOpts->{'brace-vertical-tightness-closing'},
1575 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1578 # assume flag for '>' same as ')' for closing qw quotes
1579 %closing_token_indentation = (
1580 ')' => $rOpts->{'closing-paren-indentation'},
1581 '}' => $rOpts->{'closing-brace-indentation'},
1582 ']' => $rOpts->{'closing-square-bracket-indentation'},
1583 '>' => $rOpts->{'closing-paren-indentation'},
1586 # flag indicating if any closing tokens are indented
1587 $some_closing_token_indentation =
1588 $rOpts->{'closing-paren-indentation'}
1589 || $rOpts->{'closing-brace-indentation'}
1590 || $rOpts->{'closing-square-bracket-indentation'}
1591 || $rOpts->{'indent-closing-brace'};
1593 %opening_token_right = (
1594 '(' => $rOpts->{'opening-paren-right'},
1595 '{' => $rOpts->{'opening-hash-brace-right'},
1596 '[' => $rOpts->{'opening-square-bracket-right'},
1599 %stack_opening_token = (
1600 '(' => $rOpts->{'stack-opening-paren'},
1601 '{' => $rOpts->{'stack-opening-hash-brace'},
1602 '[' => $rOpts->{'stack-opening-square-bracket'},
1605 %stack_closing_token = (
1606 ')' => $rOpts->{'stack-closing-paren'},
1607 '}' => $rOpts->{'stack-closing-hash-brace'},
1608 ']' => $rOpts->{'stack-closing-square-bracket'},
1611 # Create a table of maximum line length vs level for later efficient use.
1612 # We will make the tables very long to be sure it will not be exceeded.
1613 # But we have to choose a fixed length. A check will be made at the start
1614 # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
1615 # my standard test problems have indentation levels of about 150, so this
1616 # should be fairly large. If the choice of a maximum level ever becomes
1617 # an issue then these table values could be returned in a sub with a simple
1618 # memoization scheme.
1620 # Also create a table of the maximum spaces available for text due to the
1621 # level only. If a line has continuation indentation, then that space must
1622 # be subtracted from the table value. This table is used for preliminary
1623 # estimates in welding, extended_ci, BBX, and marking short blocks.
1624 my $level_max = 1000;
1627 foreach my $level ( 0 .. $level_max ) {
1628 my $indent = $level * $rOpts_indent_columns;
1629 $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
1630 $maximum_text_length_at_level[$level] =
1631 $rOpts_maximum_line_length - $indent;
1634 # Correct the maximum_text_length table if the -wc=n flag is used
1635 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
1636 if ($rOpts_whitespace_cycle) {
1637 if ( $rOpts_whitespace_cycle > 0 ) {
1638 foreach my $level ( 0 .. $level_max ) {
1639 my $level_mod = $level % $rOpts_whitespace_cycle;
1640 my $indent = $level_mod * $rOpts_indent_columns;
1641 $maximum_text_length_at_level[$level] =
1642 $rOpts_maximum_line_length - $indent;
1646 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
1650 # Correct the tables if the -vmll flag is used. These values override the
1652 if ($rOpts_variable_maximum_line_length) {
1653 foreach my $level ( 0 .. $level_max ) {
1654 $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
1655 $maximum_line_length_at_level[$level] =
1656 $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
1660 initialize_weld_nested_exclusion_rules($rOpts);
1661 initialize_line_up_parentheses_exclusion_rules($rOpts);
1665 sub initialize_weld_nested_exclusion_rules {
1667 %weld_nested_exclusion_rules = ();
1669 my $opt_name = 'weld-nested-exclusion-list';
1670 my $str = $rOpts->{$opt_name};
1671 return unless ($str);
1674 return unless ($str);
1676 # There are four container tokens.
1684 # We are parsing an exclusion list for nested welds. The list is a string
1685 # with spaces separating any number of items. Each item consists of three
1686 # pieces of information:
1687 # <optional position> <optional type> <type of container>
1688 # < ^ or . > < k or K > < ( [ { >
1690 # The last character is the required container type and must be one of:
1692 # [ = square bracket
1695 # An optional leading position indicator:
1696 # ^ means the leading token position in the weld
1697 # . means a secondary token position in the weld
1698 # no position indicator means all positions match
1700 # An optional alphanumeric character between the position and container
1701 # token selects to which the rule applies:
1703 # K = any non-keyword
1705 # F = not a function call
1706 # w = function or keyword
1707 # W = not a function or keyword
1708 # no letter means any preceding type matches
1711 # ^( - the weld must not start with a paren
1712 # .( - the second and later tokens may not be parens
1713 # ( - no parens in weld
1714 # ^K( - exclude a leading paren not preceded by a keyword
1715 # .k( - exclude a secondary paren preceded by a keyword
1716 # [ { - exclude all brackets and braces
1718 my @items = split /\s+/, $str;
1721 foreach my $item (@items) {
1722 my $item_save = $item;
1723 my $tok = chop($item);
1724 my $key = $token_keys{$tok};
1725 if ( !defined($key) ) {
1726 $msg1 .= " '$item_save'";
1729 if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
1730 $weld_nested_exclusion_rules{$key} = [];
1732 my $rflags = $weld_nested_exclusion_rules{$key};
1734 # A 'q' means do not weld quotes
1735 if ( $tok eq 'q' ) {
1744 if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
1746 $select = $2 if ($2);
1749 $msg1 .= " '$item_save'";
1755 if ( $pos eq '^' || $pos eq '*' ) {
1756 if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
1759 $rflags->[0] = $select;
1761 if ( $pos eq '.' || $pos eq '*' ) {
1762 if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
1765 $rflags->[1] = $select;
1767 if ($err) { $msg2 .= " '$item_save'"; }
1771 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
1777 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
1779 Only the last will be used.
1785 sub initialize_line_up_parentheses_exclusion_rules {
1787 %line_up_parentheses_exclusion_rules = ();
1788 my $opt_name = 'line-up-parentheses-exclusion-list';
1789 my $str = $rOpts->{$opt_name};
1790 return unless ($str);
1793 return unless ($str);
1795 # The format is space separated items, where each item must consist of a
1796 # string with a token type preceded by an optional text token and followed
1800 # = (flag1)(key)(flag2), where
1805 my @items = split /\s+/, $str;
1808 foreach my $item (@items) {
1809 my $item_save = $item;
1810 my ( $flag1, $key, $flag2 );
1811 if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
1817 $msg1 .= " '$item_save'";
1821 if ( !defined($key) ) {
1822 $msg1 .= " '$item_save'";
1826 # Check for valid flag1
1827 if ( !defined($flag1) ) { $flag1 = '*' }
1828 elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
1829 $msg1 .= " '$item_save'";
1833 # Check for valid flag2
1834 # 0 or blank: ignore container contents
1835 # 1 all containers with sublists match
1836 # 2 all containers with sublists, code blocks or ternary operators match
1837 # ... this could be extended in the future
1838 if ( !defined($flag2) ) { $flag2 = 0 }
1839 elsif ( $flag2 !~ /^[012]$/ ) {
1840 $msg1 .= " '$item_save'";
1844 if ( !defined( $line_up_parentheses_exclusion_rules{$key} ) ) {
1845 $line_up_parentheses_exclusion_rules{$key} = [ $flag1, $flag2 ];
1849 # check for multiple conflicting specifications
1850 my $rflags = $line_up_parentheses_exclusion_rules{$key};
1852 if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
1854 $rflags->[0] = $flag1;
1856 if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
1858 $rflags->[1] = $flag2;
1860 $msg2 .= " '$item_save'" if ($err);
1865 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
1871 Multiple specifications were encountered in the $opt_name at:
1873 Only the last will be used.
1877 # Possible speedup: we could turn off -lp if it is not actually used
1879 foreach my $key (qw# ( { [ #) {
1880 my $rflags = $line_up_parentheses_exclusion_rules{$key};
1881 if ( defined($rflags) ) {
1882 my ( $flag1, $flag2 ) = @{$rflags};
1883 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
1884 if ($flag2) { $all_off = 0; last }
1889 # FIXME: This speedup works but is currently deactivated because at
1890 # present users of -lp could see some discontinuities in formatting,
1891 # such as those involving the choice of breaks at '='. Only if/when
1892 # these issues have been checked and resolved it should be reactivated
1894 ## $rOpts->{'line-up-parentheses'} = "";
1900 sub initialize_whitespace_hashes {
1902 # This is called once before formatting begins to initialize these global
1903 # hashes, which control the use of whitespace around tokens:
1908 # %space_after_keyword
1910 # Many token types are identical to the tokens themselves.
1911 # See the tokenizer for a complete list. Here are some special types:
1913 # f = semicolon in for statement
1916 # Note that :: is excluded since it should be contained in an identifier
1917 # Note that '->' is excluded because it never gets space
1918 # parentheses and brackets are excluded since they are handled specially
1919 # curly braces are included but may be overridden by logic, such as
1922 # NEW_TOKENS: create a whitespace rule here. This can be as
1923 # simple as adding your new letter to @spaces_both_sides, for
1926 my @opening_type = qw< L { ( [ >;
1927 @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
1929 my @closing_type = qw< R } ) ] >;
1930 @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
1932 my @spaces_both_sides = qw#
1933 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
1934 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
1935 &&= ||= //= <=> A k f w F n C Y U G v
1938 my @spaces_left_side = qw<
1939 t ! ~ m p { \ h pp mm Z j
1941 push( @spaces_left_side, '#' ); # avoids warning message
1943 my @spaces_right_side = qw<
1944 ; } ) ] R J ++ -- **=
1946 push( @spaces_right_side, ',' ); # avoids warning message
1948 %want_left_space = ();
1949 %want_right_space = ();
1950 %binary_ws_rules = ();
1952 # Note that we setting defaults here. Later in processing
1953 # the values of %want_left_space and %want_right_space
1954 # may be overridden by any user settings specified by the
1955 # -wls and -wrs parameters. However the binary_whitespace_rules
1956 # are hardwired and have priority.
1957 @want_left_space{@spaces_both_sides} =
1958 (1) x scalar(@spaces_both_sides);
1959 @want_right_space{@spaces_both_sides} =
1960 (1) x scalar(@spaces_both_sides);
1961 @want_left_space{@spaces_left_side} =
1962 (1) x scalar(@spaces_left_side);
1963 @want_right_space{@spaces_left_side} =
1964 (-1) x scalar(@spaces_left_side);
1965 @want_left_space{@spaces_right_side} =
1966 (-1) x scalar(@spaces_right_side);
1967 @want_right_space{@spaces_right_side} =
1968 (1) x scalar(@spaces_right_side);
1969 $want_left_space{'->'} = WS_NO;
1970 $want_right_space{'->'} = WS_NO;
1971 $want_left_space{'**'} = WS_NO;
1972 $want_right_space{'**'} = WS_NO;
1973 $want_right_space{'CORE::'} = WS_NO;
1975 # These binary_ws_rules are hardwired and have priority over the above
1976 # settings. It would be nice to allow adjustment by the user,
1977 # but it would be complicated to specify.
1979 # hash type information must stay tightly bound
1981 $binary_ws_rules{'i'}{'L'} = WS_NO;
1982 $binary_ws_rules{'i'}{'{'} = WS_YES;
1983 $binary_ws_rules{'k'}{'{'} = WS_YES;
1984 $binary_ws_rules{'U'}{'{'} = WS_YES;
1985 $binary_ws_rules{'i'}{'['} = WS_NO;
1986 $binary_ws_rules{'R'}{'L'} = WS_NO;
1987 $binary_ws_rules{'R'}{'{'} = WS_NO;
1988 $binary_ws_rules{'t'}{'L'} = WS_NO;
1989 $binary_ws_rules{'t'}{'{'} = WS_NO;
1990 $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
1991 $binary_ws_rules{'}'}{'L'} = WS_NO;
1992 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
1993 $binary_ws_rules{'$'}{'L'} = WS_NO;
1994 $binary_ws_rules{'$'}{'{'} = WS_NO;
1995 $binary_ws_rules{'@'}{'L'} = WS_NO;
1996 $binary_ws_rules{'@'}{'{'} = WS_NO;
1997 $binary_ws_rules{'='}{'L'} = WS_YES;
1998 $binary_ws_rules{'J'}{'J'} = WS_YES;
2000 # the following includes ') {'
2001 # as in : if ( xxx ) { yyy }
2002 $binary_ws_rules{']'}{'L'} = WS_NO;
2003 $binary_ws_rules{']'}{'{'} = WS_NO;
2004 $binary_ws_rules{')'}{'{'} = WS_YES;
2005 $binary_ws_rules{')'}{'['} = WS_NO;
2006 $binary_ws_rules{']'}{'['} = WS_NO;
2007 $binary_ws_rules{']'}{'{'} = WS_NO;
2008 $binary_ws_rules{'}'}{'['} = WS_NO;
2009 $binary_ws_rules{'R'}{'['} = WS_NO;
2011 $binary_ws_rules{']'}{'++'} = WS_NO;
2012 $binary_ws_rules{']'}{'--'} = WS_NO;
2013 $binary_ws_rules{')'}{'++'} = WS_NO;
2014 $binary_ws_rules{')'}{'--'} = WS_NO;
2016 $binary_ws_rules{'R'}{'++'} = WS_NO;
2017 $binary_ws_rules{'R'}{'--'} = WS_NO;
2019 $binary_ws_rules{'i'}{'Q'} = WS_YES;
2020 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
2022 $binary_ws_rules{'i'}{'('} = WS_NO;
2024 $binary_ws_rules{'w'}{'('} = WS_NO;
2025 $binary_ws_rules{'w'}{'{'} = WS_YES;
2028 } ## end initialize_whitespace_hashes
2030 sub set_whitespace_flags {
2032 # This routine is called once per file to set whitespace flags for that
2033 # file. This routine examines each pair of nonblank tokens and sets a flag
2034 # indicating if white space is needed.
2036 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2037 # BEFORE token $j is needed, with the following values:
2039 # WS_NO = -1 do not want a space BEFORE token $j
2040 # WS_OPTIONAL= 0 optional space or $j is a whitespace
2041 # WS_YES = 1 want a space BEFORE token $j
2045 my $rLL = $self->[_rLL_];
2046 use constant DEBUG_WHITE => 0;
2048 my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
2049 my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2050 my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
2052 my $rwhitespace_flags = [];
2053 my $ris_function_call_paren = {};
2055 my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2057 my ( $token, $type, $block_type, $seqno, $input_line_no );
2059 $last_token, $last_type, $last_block_type,
2060 $last_seqno, $last_input_line_no
2063 my $j_tight_closing_paren = -1;
2072 $last_block_type = '';
2074 $last_input_line_no = 0;
2076 my $jmax = @{$rLL} - 1;
2080 # This is some logic moved to a sub to avoid deep nesting of if stmts
2081 my $ws_in_container = sub {
2085 if ( $j + 1 > $jmax ) { return (WS_NO) }
2087 # Patch to count '-foo' as single token so that
2088 # each of $a{-foo} and $a{foo} and $a{'foo'} do
2089 # not get spaces with default formatting.
2093 && $last_token eq '{'
2094 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
2096 # Patch to count a sign separated from a number as a single token, as
2097 # in the following line. Otherwise, it takes two steps to converge:
2099 if ( ( $type eq 'm' || $type eq 'p' )
2101 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
2102 && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
2103 && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
2108 # $j_next is where a closing token should be if
2109 # the container has a single token
2110 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
2112 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
2116 if ( $j_next > $jmax ) { return WS_NO }
2117 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
2118 my $type_next = $rLL->[$j_next]->[_TYPE_];
2120 # for tightness = 1, if there is just one token
2121 # within the matching pair, we will keep it tight
2123 $tok_next eq $matching_token{$last_token}
2125 # but watch out for this: [ [ ] (misc.t)
2126 && $last_token ne $token
2128 # double diamond is usually spaced
2134 # remember where to put the space for the closing paren
2135 $j_tight_closing_paren = $j_next;
2141 # Local hashes to set spaces around container tokens according to their
2142 # sequence numbers. These are set as keywords are examined.
2143 # They are controlled by the -kpit and -kpitl flags.
2144 my %opening_container_inside_ws;
2145 my %closing_container_inside_ws;
2146 my $set_container_ws_by_keyword = sub {
2148 return unless (%keyword_paren_inner_tightness);
2150 my ( $word, $sequence_number ) = @_;
2152 # We just saw a keyword (or other function name) followed by an opening
2153 # paren. Now check to see if the following paren should have special
2154 # treatment for its inside space. If so we set a hash value using the
2155 # sequence number as key.
2156 if ( $word && $sequence_number ) {
2157 my $tightness = $keyword_paren_inner_tightness{$word};
2158 if ( defined($tightness) && $tightness != 1 ) {
2159 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
2160 $opening_container_inside_ws{$sequence_number} = $ws_flag;
2161 $closing_container_inside_ws{$sequence_number} = $ws_flag;
2166 my $ws_opening_container_override = sub {
2167 my ( $ws, $sequence_number ) = @_;
2168 return $ws unless (%opening_container_inside_ws);
2169 if ($sequence_number) {
2170 my $ws_override = $opening_container_inside_ws{$sequence_number};
2171 if ($ws_override) { $ws = $ws_override }
2176 my $ws_closing_container_override = sub {
2177 my ( $ws, $sequence_number ) = @_;
2178 return $ws unless (%closing_container_inside_ws);
2179 if ($sequence_number) {
2180 my $ws_override = $closing_container_inside_ws{$sequence_number};
2181 if ($ws_override) { $ws = $ws_override }
2186 # main loop over all tokens to define the whitespace flags
2187 for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
2189 my $rtokh = $rLL->[$j];
2192 $rwhitespace_flags->[$j] = WS_OPTIONAL;
2194 if ( $rtokh->[_TYPE_] eq 'b' ) {
2198 # set a default value, to be changed as needed
2200 $last_token = $token;
2202 $last_block_type = $block_type;
2203 $last_seqno = $seqno;
2204 $last_input_line_no = $input_line_no;
2205 $token = $rtokh->[_TOKEN_];
2206 $type = $rtokh->[_TYPE_];
2207 $block_type = $rtokh->[_BLOCK_TYPE_];
2208 $seqno = $rtokh->[_TYPE_SEQUENCE_];
2209 $input_line_no = $rtokh->[_LINE_INDEX_];
2211 #---------------------------------------------------------------
2212 # Whitespace Rules Section 1:
2213 # Handle space on the inside of opening braces.
2214 #---------------------------------------------------------------
2217 if ( $is_opening_type{$last_type} ) {
2219 $j_tight_closing_paren = -1;
2221 # let us keep empty matched braces together: () {} []
2223 if ( $token eq $matching_token{$last_token} ) {
2233 # we're considering the right of an opening brace
2234 # tightness = 0 means always pad inside with space
2235 # tightness = 1 means pad inside if "complex"
2236 # tightness = 2 means never pad inside with space
2239 if ( $last_type eq '{'
2240 && $last_token eq '{'
2241 && $last_block_type )
2243 $tightness = $rOpts_block_brace_tightness;
2245 else { $tightness = $tightness{$last_token} }
2247 #=============================================================
2248 # Patch for test problem <<snippets/fabrice_bug.in>>
2249 # We must always avoid spaces around a bare word beginning
2251 # my $before = ${^PREMATCH};
2252 # Because all of the following cause an error in perl:
2253 # my $before = ${ ^PREMATCH };
2254 # my $before = ${ ^PREMATCH};
2255 # my $before = ${^PREMATCH };
2256 # So if brace tightness flag is -bt=0 we must temporarily reset
2257 # to bt=1. Note that here we must set tightness=1 and not 2 so
2258 # that the closing space
2259 # is also avoided (via the $j_tight_closing_paren flag in coding)
2260 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2262 #=============================================================
2264 if ( $tightness <= 0 ) {
2267 elsif ( $tightness > 1 ) {
2271 $ws = $ws_in_container->($j);
2275 # check for special cases which override the above rules
2276 $ws = $ws_opening_container_override->( $ws, $last_seqno );
2278 } # end setting space flag inside opening tokens
2283 #---------------------------------------------------------------
2284 # Whitespace Rules Section 2:
2285 # Handle space on inside of closing brace pairs.
2286 #---------------------------------------------------------------
2289 if ( $is_closing_type{$type} ) {
2291 if ( $j == $j_tight_closing_paren ) {
2293 $j_tight_closing_paren = -1;
2298 if ( !defined($ws) ) {
2301 if ( $type eq '}' && $token eq '}' && $block_type ) {
2302 $tightness = $rOpts_block_brace_tightness;
2304 else { $tightness = $tightness{$token} }
2306 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2310 # check for special cases which override the above rules
2311 $ws = $ws_closing_container_override->( $ws, $seqno );
2313 } # end setting space flag inside closing tokens
2319 #---------------------------------------------------------------
2320 # Whitespace Rules Section 3:
2321 # Use the binary rule table.
2322 #---------------------------------------------------------------
2323 if ( !defined($ws) ) {
2324 $ws = $binary_ws_rules{$last_type}{$type};
2330 #---------------------------------------------------------------
2331 # Whitespace Rules Section 4:
2332 # Handle some special cases.
2333 #---------------------------------------------------------------
2334 if ( $token eq '(' ) {
2336 # This will have to be tweaked as tokenization changes.
2337 # We usually want a space at '} (', for example:
2338 # <<snippets/space1.in>>
2339 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2342 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2343 # At present, the above & block is marked as type L/R so this case
2344 # won't go through here.
2345 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
2347 # NOTE: some older versions of Perl had occasional problems if
2348 # spaces are introduced between keywords or functions and opening
2349 # parens. So the default is not to do this except is certain
2350 # cases. The current Perl seems to tolerate spaces.
2352 # Space between keyword and '('
2353 elsif ( $last_type eq 'k' ) {
2355 unless ( $rOpts_space_keyword_paren
2356 || $space_after_keyword{$last_token} );
2358 # Set inside space flag if requested
2359 $set_container_ws_by_keyword->( $last_token, $seqno );
2362 # Space between function and '('
2363 # -----------------------------------------------------
2364 # 'w' and 'i' checks for something like:
2365 # myfun( &myfun( ->myfun(
2366 # -----------------------------------------------------
2368 # Note that at this point an identifier may still have a leading
2369 # arrow, but the arrow will be split off during token respacing.
2370 # After that, the token may become a bare word without leading
2371 # arrow. The point is, it is best to mark function call parens
2372 # right here before that happens.
2373 # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
2374 # NOTE: this would be the place to allow spaces between repeated
2375 # parens, like () () (), as in case c017, but I decided that would
2376 # not be a good idea.
2377 elsif (( $last_type =~ /^[wCUG]$/ )
2378 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ ) )
2380 $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
2381 $set_container_ws_by_keyword->( $last_token, $seqno );
2382 $ris_function_call_paren->{$seqno} = 1;
2385 # space between something like $i and ( in <<snippets/space2.in>>
2386 # for $i ( 0 .. 20 ) {
2387 # FIXME: eventually, type 'i' could be split into multiple
2388 # token types so this can be a hardwired rule.
2389 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2393 # allow constant function followed by '()' to retain no space
2394 elsif ($last_type eq 'C'
2395 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2401 # patch for SWITCH/CASE: make space at ']{' optional
2402 # since the '{' might begin a case or when block
2403 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2407 # keep space between 'sub' and '{' for anonymous sub definition
2408 if ( $type eq '{' ) {
2409 if ( $last_token eq 'sub' ) {
2413 # this is needed to avoid no space in '){'
2414 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
2416 # avoid any space before the brace or bracket in something like
2417 # @opts{'a','b',...}
2418 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
2423 elsif ( $type eq 'i' ) {
2425 # never a space before ->
2426 if ( substr( $token, 0, 2 ) eq '->' ) {
2431 # retain any space between '-' and bare word
2432 elsif ( $type eq 'w' || $type eq 'C' ) {
2433 $ws = WS_OPTIONAL if $last_type eq '-';
2435 # never a space before ->
2436 if ( substr( $token, 0, 2 ) eq '->' ) {
2441 # retain any space between '-' and bare word; for example
2442 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2443 # $myhash{USER-NAME}='steve';
2444 elsif ( $type eq 'm' || $type eq '-' ) {
2445 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2448 # always space before side comment
2449 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2451 # always preserver whatever space was used after a possible
2452 # filehandle (except _) or here doc operator
2455 && ( ( $last_type eq 'Z' && $last_token ne '_' )
2456 || $last_type eq 'h' )
2462 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
2463 # allow a space between a backslash and single or double quote
2464 # to avoid fooling html formatters
2465 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
2466 if ($rOpts_space_backslash_quote) {
2467 if ( $rOpts_space_backslash_quote == 1 ) {
2470 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2471 else { } # shouldnt happen
2477 elsif ( $type eq 'k' ) {
2479 # Keywords 'for', 'foreach' are special cases for -kpit since the
2480 # opening paren does not always immediately follow the keyword. So
2481 # we have to search forward for the paren in this case. I have
2482 # limited the search to 10 tokens ahead, just in case somebody
2483 # has a big file and no opening paren. This should be enough for
2485 if ( $is_for_foreach{$token}
2486 && %keyword_paren_inner_tightness
2487 && defined( $keyword_paren_inner_tightness{$token} )
2491 for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
2493 last if ( $jp > $jmax );
2494 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
2495 my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
2496 $set_container_ws_by_keyword->( $token, $seqno );
2506 #---------------------------------------------------------------
2507 # Whitespace Rules Section 5:
2508 # Apply default rules not covered above.
2509 #---------------------------------------------------------------
2511 # If we fall through to here, look at the pre-defined hash tables for
2512 # the two tokens, and:
2513 # if (they are equal) use the common value
2514 # if (either is zero or undef) use the other
2515 # if (either is -1) use it
2529 if ( !defined($ws) ) {
2530 my $wl = $want_left_space{$type};
2531 my $wr = $want_right_space{$last_type};
2532 if ( !defined($wl) ) { $wl = 0 }
2533 if ( !defined($wr) ) { $wr = 0 }
2534 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
2537 if ( !defined($ws) ) {
2540 "WS flag is undefined for tokens $last_token $token\n");
2543 # Treat newline as a whitespace. Otherwise, we might combine
2544 # 'Send' and '-recipients' here according to the above rules:
2545 # <<snippets/space3.in>>
2546 # my $msg = new Fax::Send
2547 # -recipients => $to,
2549 if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
2551 $rwhitespace_flags->[$j] = $ws;
2554 my $str = substr( $last_token, 0, 15 );
2555 $str .= ' ' x ( 16 - length($str) );
2556 if ( !defined($ws_1) ) { $ws_1 = "*" }
2557 if ( !defined($ws_2) ) { $ws_2 = "*" }
2558 if ( !defined($ws_3) ) { $ws_3 = "*" }
2559 if ( !defined($ws_4) ) { $ws_4 = "*" }
2561 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
2565 if ( $rOpts->{'tight-secret-operators'} ) {
2566 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
2568 $self->[_ris_function_call_paren_] = $ris_function_call_paren;
2569 return $rwhitespace_flags;
2571 } ## end sub set_whitespace_flags
2573 sub dump_want_left_space {
2577 These values are the main control of whitespace to the left of a token type;
2578 They may be altered with the -wls parameter.
2579 For a list of token types, use perltidy --dump-token-types (-dtt)
2580 1 means the token wants a space to its left
2581 -1 means the token does not want a space to its left
2582 ------------------------------------------------------------------------
2584 foreach my $key ( sort keys %want_left_space ) {
2585 $fh->print("$key\t$want_left_space{$key}\n");
2590 sub dump_want_right_space {
2594 These values are the main control of whitespace to the right of a token type;
2595 They may be altered with the -wrs parameter.
2596 For a list of token types, use perltidy --dump-token-types (-dtt)
2597 1 means the token wants a space to its right
2598 -1 means the token does not want a space to its right
2599 ------------------------------------------------------------------------
2601 foreach my $key ( sort keys %want_right_space ) {
2602 $fh->print("$key\t$want_right_space{$key}\n");
2607 { ## begin closure is_essential_whitespace
2609 my %is_sort_grep_map;
2613 my %essential_whitespace_filter_l1;
2614 my %essential_whitespace_filter_r1;
2615 my %essential_whitespace_filter_l2;
2616 my %essential_whitespace_filter_r2;
2617 my %is_type_with_space_before_bareword;
2622 @q = qw(sort grep map);
2623 @is_sort_grep_map{@q} = (1) x scalar(@q);
2625 @q = qw(for foreach);
2626 @is_for_foreach{@q} = (1) x scalar(@q);
2629 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
2630 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
2632 @is_digraph{@q} = (1) x scalar(@q);
2634 @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
2635 @is_trigraph{@q} = (1) x scalar(@q);
2637 # These are used as a speedup filters for sub is_essential_whitespace.
2640 # These left side token types USUALLY do not require a space:
2641 @q = qw( ; { } [ ] L R );
2645 @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
2647 # BUT some might if followed by these right token types
2648 @q = qw( pp mm << <<= h );
2649 @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
2652 # These right side filters usually do not require a space
2656 @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
2658 # BUT some might if followed by these left token types
2660 @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
2662 # Keep a space between certain types and any bareword:
2663 # Q: keep a space between a quote and a bareword to prevent the
2664 # bareword from becoming a quote modifier.
2665 # &: do not remove space between an '&' and a bare word because
2666 # it may turn into a function evaluation, like here
2667 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
2668 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
2670 @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
2674 sub is_essential_whitespace {
2676 # Essential whitespace means whitespace which cannot be safely deleted
2677 # without risking the introduction of a syntax error.
2678 # We are given three tokens and their types:
2679 # ($tokenl, $typel) is the token to the left of the space in question
2680 # ($tokenr, $typer) is the token to the right of the space in question
2681 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
2683 # Note1: This routine should almost never need to be changed. It is
2684 # for avoiding syntax problems rather than for formatting.
2686 # Note2: The -mangle option causes large numbers of calls to this
2687 # routine and therefore is a good test. So if a change is made, be sure
2688 # to run a large number of files with the -mangle option and check for
2691 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
2693 # This is potentially a very slow routine but the following quick
2694 # filters typically catch and handle over 90% of the calls.
2696 # Filter 1: usually no space required after common types ; , [ ] { } ( )
2698 if ( $essential_whitespace_filter_l1{$typel}
2699 && !$essential_whitespace_filter_r1{$typer} );
2701 # Filter 2: usually no space before common types ; ,
2703 if ( $essential_whitespace_filter_r2{$typer}
2704 && !$essential_whitespace_filter_l2{$typel} );
2706 # Filter 3: Handle side comments: a space is only essential if the left
2707 # token ends in '$' For example, we do not want to create $#foo below:
2716 # Also, I prefer not to put a ? and # together because ? used to be
2717 # a pattern delmiter and spacing was used if guessing was needed.
2719 if ( $typer eq '#' ) {
2723 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
2727 my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
2728 my $tokenr_is_open_paren = $tokenr eq '(';
2729 my $token_joined = $tokenl . $tokenr;
2730 my $tokenl_is_dash = $tokenl eq '-';
2734 # never combine two bare words or numbers
2735 # examples: and ::ok(1)
2737 # for bla::bla:: abc
2738 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
2739 # $input eq"quit" to make $inputeq"quit"
2740 # my $size=-s::SINK if $file; <==OK but we won't do it
2741 # don't join something like: for bla::bla:: abc
2742 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
2743 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
2744 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
2746 # do not combine a number with a concatenation dot
2747 # example: pom.caputo:
2748 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
2749 || $typel eq 'n' && $tokenr eq '.'
2753 # cases of a space before a bareword...
2755 $tokenr_is_bareword && (
2757 # do not join a minus with a bare word, because you might form
2758 # a file test operator. Example from Complex.pm:
2759 # if (CORE::abs($z - i) < $eps);
2760 # "z-i" would be taken as a file test.
2761 $tokenl_is_dash && length($tokenr) == 1
2763 # and something like this could become ambiguous without space
2765 # use constant III=>1;
2769 || $tokenl_is_dash && $typer =~ /^[wC]$/
2771 # keep space between types Q & and a bareword
2772 || $is_type_with_space_before_bareword{$typel}
2774 # +-: binary plus and minus before a bareword could get
2775 # converted into unary plus and minus on next pass through the
2776 # tokenizer. This can lead to blinkers: cases b660 b670 b780
2777 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
2778 # follows an operator
2779 || ( ( $typel eq '+' || $typel eq '-' )
2780 && $typell !~ /^[niC\)\}\]R]$/ )
2782 # keep a space between a token ending in '$' and any word;
2783 # this caused trouble: "die @$ if $@"
2784 || $typel eq 'i' && $tokenl =~ /\$$/
2786 # don't combine $$ or $# with any alphanumeric
2787 # (testfile mangle.t with --mangle)
2788 || $tokenl =~ /^\$[\$\#]$/
2791 ) ## end $tokenr_is_bareword
2794 # '= -' should not become =- or you will get a warning
2796 # || ($tokenr eq '-')
2798 # do not join a bare word with a minus, like between 'Send' and
2799 # '-recipients' here <<snippets/space3.in>>
2800 # my $msg = new Fax::Send
2801 # -recipients => $to,
2803 # This is the safest thing to do. If we had the token to the right of
2804 # the minus we could do a better check.
2806 # And do not combine a bareword and a quote, like this:
2807 # oops "Your login, $Bad_Login, is not valid";
2808 # It can cause a syntax error if oops is a sub
2809 || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
2811 # perl is very fussy about spaces before <<
2812 || $tokenr =~ /^\<\</
2814 # avoid combining tokens to create new meanings. Example:
2815 # $a+ +$b must not become $a++$b
2816 || ( $is_digraph{$token_joined} )
2817 || $is_trigraph{$token_joined}
2819 # another example: do not combine these two &'s:
2820 # allow_options & &OPT_EXECCGI
2821 || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
2823 # retain any space after possible filehandle
2824 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
2827 # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
2828 # space after type Y. Otherwise, it will get parsed as type 'Z' later
2829 # and any space would have to be added back manually if desired.
2832 # Perl is sensitive to whitespace after the + here:
2833 # $b = xvals $a + 0.1 * yvals $a;
2834 || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
2837 $tokenr_is_open_paren && (
2839 # keep paren separate in 'use Foo::Bar ()'
2840 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
2842 # OLD: keep any space between filehandle and paren:
2843 # file mangle.t with --mangle:
2844 # NEW: this test is no longer necessary here (moved above)
2847 # must have space between grep and left paren; "grep(" will fail
2848 || $is_sort_grep_map{$tokenl}
2850 # don't stick numbers next to left parens, as in:
2851 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
2854 ) ## end $tokenr_is_open_paren
2856 # retain any space after here doc operator ( hereerr.t)
2859 # be careful with a space around ++ and --, to avoid ambiguity as to
2860 # which token it applies
2861 || $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
2862 || $typel =~ /^(\+\+|\-\-)$/
2863 && $tokenr !~ /^[\;\}\)\]]/
2865 # need space after foreach my; for example, this will fail in
2866 # older versions of Perl:
2867 # foreach my$ft(@filetypes)...
2872 && $is_for_foreach{$tokenll}
2876 # We must be sure that a space between a ? and a quoted string
2877 # remains if the space before the ? remains. [Loca.pm, lockarea]
2879 # $b=join $comma ? ',' : ':', @_; # ok
2880 # $b=join $comma?',' : ':', @_; # ok!
2881 # $b=join $comma ?',' : ':', @_; # error!
2882 # Not really required:
2883 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
2885 # Space stacked labels...
2886 # Not really required: Perl seems to accept non-spaced labels.
2887 ## || $typel eq 'J' && $typer eq 'J'
2889 ; # the value of this long logic sequence is the result we want
2892 } ## end closure is_essential_whitespace
2894 { ## begin closure new_secret_operator_whitespace
2896 my %secret_operators;
2897 my %is_leading_secret_token;
2901 # token lists for perl secret operators as compiled by Philippe Bruhat
2902 # at: https://metacpan.org/module/perlsecret
2903 %secret_operators = (
2904 'Goatse' => [qw#= ( ) =#], #=( )=
2905 'Venus1' => [qw#0 +#], # 0+
2906 'Venus2' => [qw#+ 0#], # +0
2907 'Enterprise' => [qw#) x ! !#], # ()x!!
2908 'Kite1' => [qw#~ ~ <>#], # ~~<>
2909 'Kite2' => [qw#~~ <>#], # ~~<>
2910 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
2911 'Bang bang ' => [qw#! !#], # !!
2914 # The following operators and constants are not included because they
2915 # are normally kept tight by perltidy:
2919 # Make a lookup table indexed by the first token of each operator:
2920 # first token => [list, list, ...]
2921 foreach my $value ( values(%secret_operators) ) {
2922 my $tok = $value->[0];
2923 push @{ $is_leading_secret_token{$tok} }, $value;
2927 sub new_secret_operator_whitespace {
2929 my ( $rlong_array, $rwhitespace_flags ) = @_;
2931 # Loop over all tokens in this line
2932 my ( $token, $type );
2933 my $jmax = @{$rlong_array} - 1;
2934 foreach my $j ( 0 .. $jmax ) {
2936 $token = $rlong_array->[$j]->[_TOKEN_];
2937 $type = $rlong_array->[$j]->[_TYPE_];
2939 # Skip unless this token might start a secret operator
2940 next if ( $type eq 'b' );
2941 next unless ( $is_leading_secret_token{$token} );
2943 # Loop over all secret operators with this leading token
2944 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
2946 foreach my $tok ( @{$rpattern} ) {
2951 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
2953 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
2962 # set flags to prevent spaces within this operator
2963 foreach my $jj ( $j + 1 .. $jend ) {
2964 $rwhitespace_flags->[$jj] = WS_NO;
2969 } ## End Loop over all operators
2970 } ## End loop over all tokens
2973 } ## end closure new_secret_operator_whitespace
2975 { ## begin closure set_bond_strengths
2977 # These routines and variables are involved in deciding where to break very
2980 my %is_good_keyword_breakpoint;
2982 my %is_container_token;
2984 my %binary_bond_strength_nospace;
2985 my %binary_bond_strength;
2994 sub initialize_bond_strength_hashes {
2997 @q = qw(if unless while until for foreach);
2998 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
3000 @q = qw(lt gt le ge);
3001 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3003 @q = qw/ ( [ { } ] ) /;
3004 @is_container_token{@q} = (1) x scalar(@q);
3006 # The decision about where to break a line depends upon a "bond
3007 # strength" between tokens. The LOWER the bond strength, the MORE
3008 # likely a break. A bond strength may be any value but to simplify
3009 # things there are several pre-defined strength levels:
3011 # NO_BREAK => 10000;
3012 # VERY_STRONG => 100;
3016 # VERY_WEAK => 0.55;
3018 # The strength values are based on trial-and-error, and need to be
3019 # tweaked occasionally to get desired results. Some comments:
3021 # 1. Only relative strengths are important. small differences
3022 # in strengths can make big formatting differences.
3023 # 2. Each indentation level adds one unit of bond strength.
3024 # 3. A value of NO_BREAK makes an unbreakable bond
3025 # 4. A value of VERY_WEAK is the strength of a ','
3026 # 5. Values below NOMINAL are considered ok break points.
3027 # 6. Values above NOMINAL are considered poor break points.
3029 # The bond strengths should roughly follow precedence order where
3030 # possible. If you make changes, please check the results very
3031 # carefully on a variety of scripts. Testing with the -extrude
3032 # options is particularly helpful in exercising all of the rules.
3034 # Wherever possible, bond strengths are defined in the following
3035 # tables. There are two main stages to setting bond strengths and
3036 # two types of tables:
3038 # The first stage involves looking at each token individually and
3039 # defining left and right bond strengths, according to if we want
3040 # to break to the left or right side, and how good a break point it
3041 # is. For example tokens like =, ||, && make good break points and
3042 # will have low strengths, but one might want to break on either
3043 # side to put them at the end of one line or beginning of the next.
3045 # The second stage involves looking at certain pairs of tokens and
3046 # defining a bond strength for that particular pair. This second
3047 # stage has priority.
3049 #---------------------------------------------------------------
3050 # Bond Strength BEGIN Section 1.
3051 # Set left and right bond strengths of individual tokens.
3052 #---------------------------------------------------------------
3054 # NOTE: NO_BREAK's set in this section first are HINTS which will
3055 # probably not be honored. Essential NO_BREAKS's should be set in
3056 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
3057 # of this subroutine.
3059 # Note that we are setting defaults in this section. The user
3060 # cannot change bond strengths but can cause the left and right
3061 # bond strengths of any token type to be swapped through the use of
3062 # the -wba and -wbb flags. In this way the user can determine if a
3063 # breakpoint token should appear at the end of one line or the
3064 # beginning of the next line.
3066 %right_bond_strength = ();
3067 %left_bond_strength = ();
3068 %binary_bond_strength_nospace = ();
3069 %binary_bond_strength = ();
3073 # The hash keys in this section are token types, plus the text of
3074 # certain keywords like 'or', 'and'.
3076 # no break around possible filehandle
3077 $left_bond_strength{'Z'} = NO_BREAK;
3078 $right_bond_strength{'Z'} = NO_BREAK;
3080 # never put a bare word on a new line:
3081 # example print (STDERR, "bla"); will fail with break after (
3082 $left_bond_strength{'w'} = NO_BREAK;
3084 # blanks always have infinite strength to force breaks after
3086 $right_bond_strength{'b'} = NO_BREAK;
3088 # try not to break on exponentation
3089 @q = qw# ** .. ... <=> #;
3090 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3091 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3093 # The comma-arrow has very low precedence but not a good break point
3094 $left_bond_strength{'=>'} = NO_BREAK;
3095 $right_bond_strength{'=>'} = NOMINAL;
3097 # ok to break after label
3098 $left_bond_strength{'J'} = NO_BREAK;
3099 $right_bond_strength{'J'} = NOMINAL;
3100 $left_bond_strength{'j'} = STRONG;
3101 $right_bond_strength{'j'} = STRONG;
3102 $left_bond_strength{'A'} = STRONG;
3103 $right_bond_strength{'A'} = STRONG;
3105 $left_bond_strength{'->'} = STRONG;
3106 $right_bond_strength{'->'} = VERY_STRONG;
3108 $left_bond_strength{'CORE::'} = NOMINAL;
3109 $right_bond_strength{'CORE::'} = NO_BREAK;
3111 # breaking AFTER modulus operator is ok:
3113 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3114 @right_bond_strength{@q} =
3115 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
3117 # Break AFTER math operators * and /
3119 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3120 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3122 # Break AFTER weakest math operators + and -
3123 # Make them weaker than * but a bit stronger than '.'
3125 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3126 @right_bond_strength{@q} =
3127 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
3129 # Define left strength of unary plus and minus (fixes case b511)
3130 $left_bond_strength{p} = $left_bond_strength{'+'};
3131 $left_bond_strength{m} = $left_bond_strength{'-'};
3133 # And make right strength of unary plus and minus very high.
3134 # Fixes cases b670 b790
3135 $right_bond_strength{p} = NO_BREAK;
3136 $right_bond_strength{m} = NO_BREAK;
3138 # breaking BEFORE these is just ok:
3140 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3141 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
3143 # breaking before the string concatenation operator seems best
3144 # because it can be hard to see at the end of a line
3145 $right_bond_strength{'.'} = STRONG;
3146 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
3149 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3150 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3152 # make these a little weaker than nominal so that they get
3153 # favored for end-of-line characters
3154 @q = qw< != == =~ !~ ~~ !~~ >;
3155 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3156 @right_bond_strength{@q} =
3157 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
3160 @q = qw# < > | & >= <= #;
3161 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
3162 @right_bond_strength{@q} =
3163 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
3165 # breaking either before or after a quote is ok
3166 # but bias for breaking before a quote
3167 $left_bond_strength{'Q'} = NOMINAL;
3168 $right_bond_strength{'Q'} = NOMINAL + 0.02;
3169 $left_bond_strength{'q'} = NOMINAL;
3170 $right_bond_strength{'q'} = NOMINAL;
3172 # starting a line with a keyword is usually ok
3173 $left_bond_strength{'k'} = NOMINAL;
3175 # we usually want to bond a keyword strongly to what immediately
3176 # follows, rather than leaving it stranded at the end of a line
3177 $right_bond_strength{'k'} = STRONG;
3179 $left_bond_strength{'G'} = NOMINAL;
3180 $right_bond_strength{'G'} = STRONG;
3182 # assignment operators
3184 = **= += *= &= <<= &&=
3185 -= /= |= >>= ||= //=
3190 # Default is to break AFTER various assignment operators
3191 @left_bond_strength{@q} = (STRONG) x scalar(@q);
3192 @right_bond_strength{@q} =
3193 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
3195 # Default is to break BEFORE '&&' and '||' and '//'
3196 # set strength of '||' to same as '=' so that chains like
3197 # $a = $b || $c || $d will break before the first '||'
3198 $right_bond_strength{'||'} = NOMINAL;
3199 $left_bond_strength{'||'} = $right_bond_strength{'='};
3201 # same thing for '//'
3202 $right_bond_strength{'//'} = NOMINAL;
3203 $left_bond_strength{'//'} = $right_bond_strength{'='};
3205 # set strength of && a little higher than ||
3206 $right_bond_strength{'&&'} = NOMINAL;
3207 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
3209 $left_bond_strength{';'} = VERY_STRONG;
3210 $right_bond_strength{';'} = VERY_WEAK;
3211 $left_bond_strength{'f'} = VERY_STRONG;
3213 # make right strength of for ';' a little less than '='
3214 # to make for contents break after the ';' to avoid this:
3215 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
3216 # $number_of_fields )
3217 # and make it weaker than ',' and 'and' too
3218 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
3220 # The strengths of ?/: should be somewhere between
3221 # an '=' and a quote (NOMINAL),
3222 # make strength of ':' slightly less than '?' to help
3223 # break long chains of ? : after the colons
3224 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
3225 $right_bond_strength{':'} = NO_BREAK;
3226 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
3227 $right_bond_strength{'?'} = NO_BREAK;
3229 $left_bond_strength{','} = VERY_STRONG;
3230 $right_bond_strength{','} = VERY_WEAK;
3232 # remaining digraphs and trigraphs not defined above
3233 @q = qw( :: <> ++ --);
3234 @left_bond_strength{@q} = (WEAK) x scalar(@q);
3235 @right_bond_strength{@q} = (STRONG) x scalar(@q);
3237 # Set bond strengths of certain keywords
3238 # make 'or', 'err', 'and' slightly weaker than a ','
3239 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
3240 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
3241 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
3242 $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
3243 $right_bond_strength{'and'} = NOMINAL;
3244 $right_bond_strength{'or'} = NOMINAL;
3245 $right_bond_strength{'err'} = NOMINAL;
3246 $right_bond_strength{'xor'} = NOMINAL;
3248 #---------------------------------------------------------------
3249 # Bond Strength BEGIN Section 2.
3250 # Set binary rules for bond strengths between certain token types.
3251 #---------------------------------------------------------------
3253 # We have a little problem making tables which apply to the
3254 # container tokens. Here is a list of container tokens and
3257 # type tokens // meaning
3258 # { {, [, ( // indent
3259 # } }, ], ) // outdent
3260 # [ [ // left non-structural [ (enclosing an array index)
3261 # ] ] // right non-structural square bracket
3262 # ( ( // left non-structural paren
3263 # ) ) // right non-structural paren
3264 # L { // left non-structural curly brace (enclosing a key)
3265 # R } // right non-structural curly brace
3267 # Some rules apply to token types and some to just the token
3268 # itself. We solve the problem by combining type and token into a
3269 # new hash key for the container types.
3271 # If a rule applies to a token 'type' then we need to make rules
3272 # for each of these 'type.token' combinations:
3283 # If a rule applies to a token then we need to make rules for
3284 # these 'type.token' combinations:
3293 # allow long lines before final { in an if statement, as in:
3298 # Otherwise, the line before the { tends to be too short.
3300 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
3301 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
3303 # break on something like '} (', but keep this stronger than a ','
3304 # example is in 'howe.pl'
3305 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3306 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3308 # keep matrix and hash indices together
3309 # but make them a little below STRONG to allow breaking open
3310 # something like {'some-word'}{'some-very-long-word'} at the }{
3312 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3313 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3314 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3315 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3317 # increase strength to the point where a break in the following
3318 # will be after the opening paren rather than at the arrow:
3320 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
3322 # Note that the following alternative strength would make the break at the
3323 # '->' rather than opening the '('. Both have advantages and disadvantages.
3324 # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
3326 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3327 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3328 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3329 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3330 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3331 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3333 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3334 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3335 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3336 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3338 #---------------------------------------------------------------
3339 # Binary NO_BREAK rules
3340 #---------------------------------------------------------------
3342 # use strict requires that bare word and => not be separated
3343 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
3344 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
3346 # Never break between a bareword and a following paren because
3347 # perl may give an error. For example, if a break is placed
3348 # between 'to_filehandle' and its '(' the following line will
3349 # give a syntax error [Carp.pm]: my( $no) =fileno(
3350 # to_filehandle( $in)) ;
3351 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
3352 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
3353 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
3354 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
3356 # use strict requires that bare word within braces not start new
3358 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
3360 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
3362 # The following two rules prevent a syntax error caused by breaking up
3363 # a construction like '{-y}'. The '-' quotes the 'y' and prevents
3364 # it from being taken as a transliteration. We have to keep
3365 # token types 'L m w' together to prevent this error.
3366 $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
3367 $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
3369 # keep 'bareword-' together, but only if there is no space between
3370 # the word and dash. Do not keep together if there is a space.
3371 # example 'use perl6-alpha'
3372 $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
3374 # use strict requires that bare word and => not be separated
3375 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
3377 # use strict does not allow separating type info from trailing { }
3378 # testfile is readmail.pl
3379 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
3380 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
3382 # As a defensive measure, do not break between a '(' and a
3383 # filehandle. In some cases, this can cause an error. For
3384 # example, the following program works:
3391 # But this program fails:
3399 # This is normally only a problem with the 'extrude' option
3400 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
3401 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
3403 # never break between sub name and opening paren
3404 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
3405 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
3407 # keep '}' together with ';'
3408 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
3410 # Breaking before a ++ can cause perl to guess wrong. For
3411 # example the following line will cause a syntax error
3412 # with -extrude if we break between '$i' and '++' [fixstyle2]
3413 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
3414 $nobreak_lhs{'++'} = NO_BREAK;
3416 # Do not break before a possible file handle
3417 $nobreak_lhs{'Z'} = NO_BREAK;
3419 # use strict hates bare words on any new line. For
3420 # example, a break before the underscore here provokes the
3421 # wrath of use strict:
3422 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
3423 $nobreak_rhs{'F'} = NO_BREAK;
3424 $nobreak_rhs{'CORE::'} = NO_BREAK;
3426 # To prevent the tokenizer from switching between types 'w' and 'G' we
3427 # need to avoid breaking between type 'G' and the following code block
3428 # brace. Fixes case b929.
3429 $nobreak_rhs{G} = NO_BREAK;
3431 #---------------------------------------------------------------
3432 # Bond Strength BEGIN Section 3.
3433 # Define tables and values for applying a small bias to the above
3435 #---------------------------------------------------------------
3436 # Adding a small 'bias' to strengths is a simple way to make a line
3437 # break at the first of a sequence of identical terms. For
3438 # example, to force long string of conditional operators to break
3439 # with each line ending in a ':', we can add a small number to the
3440 # bond strength of each ':' (colon.t)
3441 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
3442 %bias_hash = map { $_ => 0 } @bias_tokens;
3443 $delta_bias = 0.0001; # a very small strength level
3446 } ## end sub initialize_bond_strength_hashes
3448 use constant DEBUG_BOND => 0;
3450 sub set_bond_strengths {
3454 my $rK_weld_right = $self->[_rK_weld_right_];
3455 my $rK_weld_left = $self->[_rK_weld_left_];
3457 # patch-its always ok to break at end of line
3458 $nobreak_to_go[$max_index_to_go] = 0;
3460 # we start a new set of bias values for each line
3463 my $code_bias = -.01; # bias for closing block braces
3467 my $token_length = 1;
3469 my $last_nonblank_type = $type;
3470 my $last_nonblank_token = $token;
3471 my $list_str = $left_bond_strength{'?'};
3473 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
3474 $next_nonblank_type, $next_token, $next_type,
3475 $total_nesting_depth, );
3477 # main loop to compute bond strengths between each pair of tokens
3478 foreach my $i ( 0 .. $max_index_to_go ) {
3480 if ( $type ne 'b' ) {
3481 $last_nonblank_type = $type;
3482 $last_nonblank_token = $token;
3484 $type = $types_to_go[$i];
3486 # strength on both sides of a blank is the same
3487 if ( $type eq 'b' && $last_type ne 'b' ) {
3488 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
3492 $token = $tokens_to_go[$i];
3493 $token_length = $token_lengths_to_go[$i];
3494 $block_type = $block_type_to_go[$i];
3496 $next_type = $types_to_go[$i_next];
3497 $next_token = $tokens_to_go[$i_next];
3498 $total_nesting_depth = $nesting_depth_to_go[$i_next];
3499 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
3500 $next_nonblank_type = $types_to_go[$i_next_nonblank];
3501 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
3503 my $seqno = $type_sequence_to_go[$i];
3504 my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
3506 # We are computing the strength of the bond between the current
3507 # token and the NEXT token.
3509 #---------------------------------------------------------------
3510 # Bond Strength Section 1:
3511 # First Approximation.
3512 # Use minimum of individual left and right tabulated bond
3514 #---------------------------------------------------------------
3515 my $bsr = $right_bond_strength{$type};
3516 my $bsl = $left_bond_strength{$next_nonblank_type};
3518 # define right bond strengths of certain keywords
3519 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
3520 $bsr = $right_bond_strength{$token};
3522 elsif ( $token eq 'ne' or $token eq 'eq' ) {
3526 # set terminal bond strength to the nominal value
3527 # this will cause good preceding breaks to be retained
3528 if ( $i_next_nonblank > $max_index_to_go ) {
3532 # define right bond strengths of certain keywords
3533 if ( $next_nonblank_type eq 'k'
3534 && defined( $left_bond_strength{$next_nonblank_token} ) )
3536 $bsl = $left_bond_strength{$next_nonblank_token};
3538 elsif ($next_nonblank_token eq 'ne'
3539 or $next_nonblank_token eq 'eq' )
3543 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
3544 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
3547 # Use the minimum of the left and right strengths. Note: it might
3548 # seem that we would want to keep a NO_BREAK if either token has
3549 # this value. This didn't work, for example because in an arrow
3550 # list, it prevents the comma from separating from the following
3551 # bare word (which is probably quoted by its arrow). So necessary
3552 # NO_BREAK's have to be handled as special cases in the final
3554 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
3555 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
3556 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
3557 my $bond_str_1 = $bond_str;
3559 #---------------------------------------------------------------
3560 # Bond Strength Section 2:
3561 # Apply hardwired rules..
3562 #---------------------------------------------------------------
3564 # Patch to put terminal or clauses on a new line: Weaken the bond
3565 # at an || followed by die or similar keyword to make the terminal
3566 # or clause fall on a new line, like this:
3569 # || die "Cannot add broadcast: No class identifier found";
3571 # Otherwise the break will be at the previous '=' since the || and
3572 # = have the same starting strength and the or is biased, like
3576 # shift || die "Cannot add broadcast: No class identifier found";
3578 # In any case if the user places a break at either the = or the ||
3579 # it should remain there.
3580 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
3581 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
3582 if ( $want_break_before{$token} && $i > 0 ) {
3583 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
3585 # keep bond strength of a token and its following blank
3587 if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
3588 $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
3592 $bond_str -= $delta_bias;
3597 # good to break after end of code blocks
3598 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
3600 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
3601 $code_bias += $delta_bias;
3604 if ( $type eq 'k' ) {
3606 # allow certain control keywords to stand out
3607 if ( $next_nonblank_type eq 'k'
3608 && $is_last_next_redo_return{$token} )
3610 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
3613 # Don't break after keyword my. This is a quick fix for a
3614 # rare problem with perl. An example is this line from file
3617 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
3618 # $this->{'question'} ) )
3620 if ( $token eq 'my' ) {
3621 $bond_str = NO_BREAK;
3626 # good to break before 'if', 'unless', etc
3627 if ( $is_if_brace_follower{$next_nonblank_token} ) {
3628 $bond_str = VERY_WEAK;
3631 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
3633 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
3634 $bond_str = $list_str if ( $bond_str > $list_str );
3637 # keywords like 'unless', 'if', etc, within statements
3639 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
3640 $bond_str = VERY_WEAK / 1.05;
3644 # try not to break before a comma-arrow
3645 elsif ( $next_nonblank_type eq '=>' ) {
3646 if ( $bond_str < STRONG ) { $bond_str = STRONG }
3649 #---------------------------------------------------------------
3650 # Additional hardwired NOBREAK rules
3651 #---------------------------------------------------------------
3653 # map1.t -- correct for a quirk in perl
3655 && $next_nonblank_type eq 'i'
3656 && $last_nonblank_type eq 'k'
3657 && $is_sort_map_grep{$last_nonblank_token} )
3659 # /^(sort|map|grep)$/ )
3661 $bond_str = NO_BREAK;
3664 # extrude.t: do not break before paren at:
3666 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
3667 $bond_str = NO_BREAK;
3670 # in older version of perl, use strict can cause problems with
3671 # breaks before bare words following opening parens. For example,
3672 # this will fail under older versions if a break is made between
3673 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
3674 # command"); close MAIL;
3675 if ( $type eq '{' ) {
3677 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
3679 # but it's fine to break if the word is followed by a '=>'
3680 # or if it is obviously a sub call
3681 my $i_next_next_nonblank = $i_next_nonblank + 1;
3682 my $next_next_type = $types_to_go[$i_next_next_nonblank];
3683 if ( $next_next_type eq 'b'
3684 && $i_next_nonblank < $max_index_to_go )
3686 $i_next_next_nonblank++;
3687 $next_next_type = $types_to_go[$i_next_next_nonblank];
3690 # We'll check for an old breakpoint and keep a leading
3691 # bareword if it was that way in the input file.
3692 # Presumably it was ok that way. For example, the
3693 # following would remain unchanged:
3696 # January, February, March, April,
3697 # May, June, July, August,
3698 # September, October, November, December,
3701 # This should be sufficient:
3703 !$old_breakpoint_to_go[$i]
3704 && ( $next_next_type eq ','
3705 || $next_next_type eq '}' )
3708 $bond_str = NO_BREAK;
3713 # Do not break between a possible filehandle and a ? or / and do
3714 # not introduce a break after it if there is no blank
3716 elsif ( $type eq 'Z' ) {
3721 # if there is no blank and we do not want one. Examples:
3722 # print $x++ # do not break after $x
3723 # print HTML"HELLO" # break ok after HTML
3726 && defined( $want_left_space{$next_type} )
3727 && $want_left_space{$next_type} == WS_NO
3730 # or we might be followed by the start of a quote,
3731 # and this is not an existing breakpoint; fixes c039.
3732 || !$old_breakpoint_to_go[$i]
3733 && substr( $next_nonblank_token, 0, 1 ) eq '/'
3737 $bond_str = NO_BREAK;
3741 # Breaking before a ? before a quote can cause trouble if
3742 # they are not separated by a blank.
3743 # Example: a syntax error occurs if you break before the ? here
3744 # my$logic=join$all?' && ':' || ',@regexps;
3745 # From: Professional_Perl_Programming_Code/multifind.pl
3746 if ( $next_nonblank_type eq '?' ) {
3747 $bond_str = NO_BREAK
3748 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
3751 # Breaking before a . followed by a number
3752 # can cause trouble if there is no intervening space
3753 # Example: a syntax error occurs if you break before the .2 here
3754 # $str .= pack($endian.2, ensurrogate($ord));
3755 # From: perl58/Unicode.pm
3756 elsif ( $next_nonblank_type eq '.' ) {
3757 $bond_str = NO_BREAK
3758 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
3762 elsif ( $type eq 'w' ) {
3763 $bond_str = NO_BREAK
3764 if ( !$old_breakpoint_to_go[$i]
3765 && substr( $next_nonblank_token, 0, 1 ) eq '/' );
3768 my $bond_str_2 = $bond_str;
3770 #---------------------------------------------------------------
3771 # End of hardwired rules
3772 #---------------------------------------------------------------
3774 #---------------------------------------------------------------
3775 # Bond Strength Section 3:
3776 # Apply table rules. These have priority over the above
3778 #---------------------------------------------------------------
3780 my $tabulated_bond_str;
3782 my $rtype = $next_nonblank_type;
3783 if ( $seqno && $is_container_token{$token} ) {
3784 $ltype = $type . $token;
3787 if ( $next_nonblank_seqno
3788 && $is_container_token{$next_nonblank_token} )
3790 $rtype = $next_nonblank_type . $next_nonblank_token;
3793 # apply binary rules which apply regardless of space between tokens
3794 if ( $binary_bond_strength{$ltype}{$rtype} ) {
3795 $bond_str = $binary_bond_strength{$ltype}{$rtype};
3796 $tabulated_bond_str = $bond_str;
3799 # apply binary rules which apply only if no space between tokens
3800 if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
3801 $bond_str = $binary_bond_strength{$ltype}{$next_type};
3802 $tabulated_bond_str = $bond_str;
3805 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
3806 $bond_str = NO_BREAK;
3807 $tabulated_bond_str = $bond_str;
3809 my $bond_str_3 = $bond_str;
3811 # If the hardwired rules conflict with the tabulated bond
3812 # strength then there is an inconsistency that should be fixed
3814 && $tabulated_bond_str
3816 && $bond_str_1 != $bond_str_2
3817 && $bond_str_2 != $tabulated_bond_str
3820 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
3823 #-----------------------------------------------------------------
3824 # Bond Strength Section 4:
3825 # Modify strengths of certain tokens which often occur in sequence
3826 # by adding a small bias to each one in turn so that the breaks
3827 # occur from left to right.
3829 # Note that we only changing strengths by small amounts here,
3830 # and usually increasing, so we should not be altering any NO_BREAKs.
3831 # Other routines which check for NO_BREAKs will use a tolerance
3832 # of one to avoid any problem.
3833 #-----------------------------------------------------------------
3835 # The bias tables use special keys:
3836 # $type - if not keyword
3837 # $token - if keyword, but map some keywords together
3839 $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
3841 $next_nonblank_type eq 'k'
3842 ? $next_nonblank_token eq 'err'
3844 : $next_nonblank_token
3845 : $next_nonblank_type;
3847 if ( $type eq ',' ) {
3849 # add any bias set by sub scan_list at old comma break points
3850 $bond_str += $bond_strength_to_go[$i];
3855 elsif ( defined( $bias{$left_key} ) ) {
3856 if ( !$want_break_before{$left_key} ) {
3857 $bias{$left_key} += $delta_bias;
3858 $bond_str += $bias{$left_key};
3863 if ( defined( $bias{$right_key} ) ) {
3864 if ( $want_break_before{$right_key} ) {
3866 # for leading '.' align all but 'short' quotes; the idea
3867 # is to not place something like "\n" on a single line.
3868 if ( $right_key eq '.' ) {
3870 $last_nonblank_type eq '.'
3871 && ( $token_length <=
3872 $rOpts_short_concatenation_item_length )
3873 && ( !$is_closing_token{$token} )
3876 $bias{$right_key} += $delta_bias;
3880 $bias{$right_key} += $delta_bias;
3882 $bond_str += $bias{$right_key};
3885 my $bond_str_4 = $bond_str;
3887 #---------------------------------------------------------------
3888 # Bond Strength Section 5:
3889 # Fifth Approximation.
3890 # Take nesting depth into account by adding the nesting depth
3891 # to the bond strength.
3892 #---------------------------------------------------------------
3895 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
3896 if ( $total_nesting_depth > 0 ) {
3897 $strength = $bond_str + $total_nesting_depth;
3900 $strength = $bond_str;
3904 $strength = NO_BREAK;
3906 # For critical code such as lines with here targets we must
3907 # be absolutely sure that we do not allow a break. So for
3908 # these the nobreak flag exceeds 1 as a signal. Otherwise we
3909 # can run into trouble when small tolerances are added.
3910 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
3913 #---------------------------------------------------------------
3914 # Bond Strength Section 6:
3915 # Sixth Approximation. Welds.
3916 #---------------------------------------------------------------
3918 # Do not allow a break within welds
3919 if ( $total_weld_count && $seqno ) {
3920 my $KK = $K_to_go[$i];
3921 if ( $rK_weld_right->{$KK} ) {
3922 $strength = NO_BREAK;
3925 # But encourage breaking after opening welded tokens
3926 elsif ($rK_weld_left->{$KK}
3927 && $is_opening_token{$token} )
3933 # always break after side comment
3934 if ( $type eq '#' ) { $strength = 0 }
3936 $bond_strength_to_go[$i] = $strength;
3938 # Fix for case c001: be sure NO_BREAK's are enforced by later
3939 # routines, except at a '?' because '?' as quote delimiter is
3941 if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
3942 $nobreak_to_go[$i] ||= 1;
3946 my $str = substr( $token, 0, 15 );
3947 $str .= ' ' x ( 16 - length($str) );
3949 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
3953 } ## end sub set_bond_strengths
3954 } ## end closure set_bond_strengths
3958 # See if a pattern will compile. We have to use a string eval here,
3959 # but it should be safe because the pattern has been constructed
3962 eval "'##'=~/$pattern/";
3966 { ## begin closure prepare_cuddled_block_types
3970 # Add keywords here which really should not be cuddled
3972 my @q = qw(if unless for foreach while);
3973 @no_cuddle{@q} = (1) x scalar(@q);
3976 sub prepare_cuddled_block_types {
3978 # the cuddled-else style, if used, is controlled by a hash that
3981 # Include keywords here which should not be cuddled
3983 my $cuddled_string = "";
3984 if ( $rOpts->{'cuddled-else'} ) {
3987 $cuddled_string = 'elsif else continue catch finally'
3988 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
3990 # This is the old equivalent but more complex version
3991 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
3993 # Add users other blocks to be cuddled
3994 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
3995 if ($cuddled_block_list) {
3996 $cuddled_string .= " " . $cuddled_block_list;
4001 # If we have a cuddled string of the form
4002 # 'try-catch-finally'
4004 # we want to prepare a hash of the form
4006 # $rcuddled_block_types = {
4013 # use -dcbl to dump this hash
4015 # Multiple such strings are input as a space or comma separated list
4017 # If we get two lists with the same leading type, such as
4018 # -cbl = "-try-catch-finally -try-catch-otherwise"
4019 # then they will get merged as follows:
4020 # $rcuddled_block_types = {
4027 # This will allow either type of chain to be followed.
4029 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
4030 my @cuddled_strings = split /\s+/, $cuddled_string;
4032 $rcuddled_block_types = {};
4034 # process each dash-separated string...
4035 my $string_count = 0;
4036 foreach my $string (@cuddled_strings) {
4037 next unless $string;
4038 my @words = split /-+/, $string; # allow multiple dashes
4040 # we could look for and report possible errors here...
4041 next unless ( @words > 0 );
4043 # allow either '-continue' or *-continue' for arbitrary starting type
4046 # a single word without dashes is a secondary block type
4048 $start = shift @words;
4051 # always make an entry for the leading word. If none follow, this
4052 # will still prevent a wildcard from matching this word.
4053 if ( !defined( $rcuddled_block_types->{$start} ) ) {
4054 $rcuddled_block_types->{$start} = {};
4057 # The count gives the original word order in case we ever want it.
4060 foreach my $word (@words) {
4062 if ( $no_cuddle{$word} ) {
4064 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
4069 $rcuddled_block_types->{$start}->{$word} =
4070 1; #"$string_count.$word_count";
4072 # git#9: Remove this word from the list of desired one-line
4074 $want_one_line_block{$word} = 0;
4079 } ## begin closure prepare_cuddled_block_types
4081 sub dump_cuddled_block_list {
4084 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
4085 # which controls this routine
4086 # my $rcuddled_block_types = {
4097 # SIMPLFIED METHOD: the simplified method uses a wildcard for
4098 # the starting block type and puts all cuddled blocks together:
4099 # my $rcuddled_block_types = {
4108 # Both methods work, but the simplified method has proven to be adequate and
4111 my $cuddled_string = $rOpts->{'cuddled-block-list'};
4112 $cuddled_string = '' unless $cuddled_string;
4115 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
4116 $flags .= " -cbl='$cuddled_string'";
4118 unless ( $rOpts->{'cuddled-else'} ) {
4119 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
4123 ------------------------------------------------------------------------
4124 Hash of cuddled block types prepared for a run with these parameters:
4126 ------------------------------------------------------------------------
4130 $fh->print( Dumper($rcuddled_block_types) );
4133 ------------------------------------------------------------------------
4138 sub make_static_block_comment_pattern {
4140 # create the pattern used to identify static block comments
4141 $static_block_comment_pattern = '^\s*##';
4143 # allow the user to change it
4144 if ( $rOpts->{'static-block-comment-prefix'} ) {
4145 my $prefix = $rOpts->{'static-block-comment-prefix'};
4146 $prefix =~ s/^\s*//;
4147 my $pattern = $prefix;
4149 # user may give leading caret to force matching left comments only
4150 if ( $prefix !~ /^\^#/ ) {
4151 if ( $prefix !~ /^#/ ) {
4153 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
4156 $pattern = '^\s*' . $prefix;
4158 if ( bad_pattern($pattern) ) {
4160 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
4163 $static_block_comment_pattern = $pattern;
4168 sub make_format_skipping_pattern {
4169 my ( $opt_name, $default ) = @_;
4170 my $param = $rOpts->{$opt_name};
4171 unless ($param) { $param = $default }
4173 if ( $param !~ /^#/ ) {
4174 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
4176 my $pattern = '^' . $param . '\s';
4177 if ( bad_pattern($pattern) ) {
4179 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
4185 sub make_non_indenting_brace_pattern {
4187 # Create the pattern used to identify static side comments.
4188 # Note that we are ending the pattern in a \s. This will allow
4189 # the pattern to be followed by a space and some text, or a newline.
4190 # The pattern is used in sub 'non_indenting_braces'
4191 $non_indenting_brace_pattern = '^#<<<\s';
4193 # allow the user to change it
4194 if ( $rOpts->{'non-indenting-brace-prefix'} ) {
4195 my $prefix = $rOpts->{'non-indenting-brace-prefix'};
4196 $prefix =~ s/^\s*//;
4197 if ( $prefix !~ /^#/ ) {
4198 Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
4200 my $pattern = '^' . $prefix . '\s';
4201 if ( bad_pattern($pattern) ) {
4203 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
4206 $non_indenting_brace_pattern = $pattern;
4211 sub make_closing_side_comment_list_pattern {
4213 # turn any input list into a regex for recognizing selected block types
4214 $closing_side_comment_list_pattern = '^\w+';
4215 if ( defined( $rOpts->{'closing-side-comment-list'} )
4216 && $rOpts->{'closing-side-comment-list'} )
4218 $closing_side_comment_list_pattern =
4219 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
4224 sub make_sub_matching_pattern {
4226 # Patterns for standardizing matches to block types for regular subs and
4227 # anonymous subs. Examples
4228 # 'sub process' is a named sub
4229 # 'sub ::m' is a named sub
4230 # 'sub' is an anonymous sub
4231 # 'sub:' is a label, not a sub
4232 # 'substr' is a keyword
4233 $SUB_PATTERN = '^sub\s+(::|\w)'; # match normal sub
4234 $ASUB_PATTERN = '^sub$'; # match anonymous sub
4235 $ANYSUB_PATTERN = '^sub\b'; # match either type of sub
4237 # Note (see also RT #133130): These patterns are used by
4238 # sub make_block_pattern, which is used for making most patterns.
4239 # So this sub needs to be called before other pattern-making routines.
4241 if ( $rOpts->{'sub-alias-list'} ) {
4243 # Note that any 'sub-alias-list' has been preprocessed to
4244 # be a trimmed, space-separated list which includes 'sub'
4245 # for example, it might be 'sub method fun'
4246 my $sub_alias_list = $rOpts->{'sub-alias-list'};
4247 $sub_alias_list =~ s/\s+/\|/g;
4248 $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4249 $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4250 $ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
4255 sub make_bli_pattern {
4257 # default list of block types for which -bli would apply
4258 my $bli_list_string = 'if else elsif unless while for foreach do : sub';
4260 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
4261 && $rOpts->{'brace-left-and-indent-list'} )
4263 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
4266 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
4270 sub make_keyword_group_list_pattern {
4272 # turn any input list into a regex for recognizing selected block types.
4273 # Here are the defaults:
4274 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
4275 $keyword_group_list_comment_pattern = '';
4276 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
4277 && $rOpts->{'keyword-group-blanks-list'} )
4279 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
4282 foreach my $word (@words) {
4283 if ( $word =~ /^(BC|SBC)$/ ) {
4284 push @comment_list, $word;
4285 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
4288 push @keyword_list, $word;
4291 $keyword_group_list_pattern =
4292 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
4293 $keyword_group_list_comment_pattern =
4294 make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
4299 sub make_block_brace_vertical_tightness_pattern {
4301 # turn any input list into a regex for recognizing selected block types
4302 $block_brace_vertical_tightness_pattern =
4303 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4304 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
4305 && $rOpts->{'block-brace-vertical-tightness-list'} )
4307 $block_brace_vertical_tightness_pattern =
4308 make_block_pattern( '-bbvtl',
4309 $rOpts->{'block-brace-vertical-tightness-list'} );
4314 sub make_blank_line_pattern {
4316 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
4317 my $key = 'blank-lines-before-closing-block-list';
4318 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4319 $blank_lines_before_closing_block_pattern =
4320 make_block_pattern( '-blbcl', $rOpts->{$key} );
4323 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
4324 $key = 'blank-lines-after-opening-block-list';
4325 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4326 $blank_lines_after_opening_block_pattern =
4327 make_block_pattern( '-blaol', $rOpts->{$key} );
4332 sub make_block_pattern {
4334 # given a string of block-type keywords, return a regex to match them
4335 # The only tricky part is that labels are indicated with a single ':'
4336 # and the 'sub' token text may have additional text after it (name of
4341 # input string: "if else elsif unless while for foreach do : sub";
4342 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4346 # To distinguish between anonymous subs and named subs, use 'sub' to
4347 # indicate a named sub, and 'asub' to indicate an anonymous sub
4349 my ( $abbrev, $string ) = @_;
4350 my @list = split_words($string);
4354 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
4357 if ( $i eq 'sub' ) {
4359 elsif ( $i eq 'asub' ) {
4361 elsif ( $i eq ';' ) {
4364 elsif ( $i eq '{' ) {
4367 elsif ( $i eq ':' ) {
4368 push @words, '\w+:';
4370 elsif ( $i =~ /^\w/ ) {
4374 Warn("unrecognized block type $i after $abbrev, ignoring\n");
4377 my $pattern = '(' . join( '|', @words ) . ')$';
4378 my $sub_patterns = "";
4379 if ( $seen{'sub'} ) {
4380 $sub_patterns .= '|' . $SUB_PATTERN;
4382 if ( $seen{'asub'} ) {
4383 $sub_patterns .= '|' . $ASUB_PATTERN;
4385 if ($sub_patterns) {
4386 $pattern = '(' . $pattern . $sub_patterns . ')';
4388 $pattern = '^' . $pattern;
4392 sub make_static_side_comment_pattern {
4394 # create the pattern used to identify static side comments
4395 $static_side_comment_pattern = '^##';
4397 # allow the user to change it
4398 if ( $rOpts->{'static-side-comment-prefix'} ) {
4399 my $prefix = $rOpts->{'static-side-comment-prefix'};
4400 $prefix =~ s/^\s*//;
4401 my $pattern = '^' . $prefix;
4402 if ( bad_pattern($pattern) ) {
4404 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
4407 $static_side_comment_pattern = $pattern;
4412 sub make_closing_side_comment_prefix {
4414 # Be sure we have a valid closing side comment prefix
4415 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
4416 my $csc_prefix_pattern;
4417 if ( !defined($csc_prefix) ) {
4418 $csc_prefix = '## end';
4419 $csc_prefix_pattern = '^##\s+end';
4422 my $test_csc_prefix = $csc_prefix;
4423 if ( $test_csc_prefix !~ /^#/ ) {
4424 $test_csc_prefix = '#' . $test_csc_prefix;
4427 # make a regex to recognize the prefix
4428 my $test_csc_prefix_pattern = $test_csc_prefix;
4430 # escape any special characters
4431 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
4433 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
4435 # allow exact number of intermediate spaces to vary
4436 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
4438 # make sure we have a good pattern
4439 # if we fail this we probably have an error in escaping
4442 if ( bad_pattern($test_csc_prefix_pattern) ) {
4444 # shouldn't happen..must have screwed up escaping, above
4445 report_definite_bug();
4447 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
4450 # just warn and keep going with defaults
4451 Warn("Please consider using a simpler -cscp prefix\n");
4452 Warn("Using default -cscp instead; please check output\n");
4455 $csc_prefix = $test_csc_prefix;
4456 $csc_prefix_pattern = $test_csc_prefix_pattern;
4459 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
4460 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
4464 ##################################################
4465 # CODE SECTION 4: receive lines from the tokenizer
4466 ##################################################
4468 { ## begin closure write_line
4470 my $Last_line_had_side_comment;
4471 my $In_format_skipping_section;
4472 my $Saw_VERSION_in_this_file;
4474 sub initialize_write_line {
4476 $Last_line_had_side_comment = 0;
4477 $In_format_skipping_section = 0;
4478 $Saw_VERSION_in_this_file = 0;
4485 # This routine originally received lines of code and immediately processed
4486 # them. That was efficient when memory was limited, but now it just saves
4487 # the lines it receives. They get processed all together after the last
4490 # As tokenized lines are received they are converted to the format needed
4491 # for the final formatting.
4492 my ( $self, $line_of_tokens_old ) = @_;
4493 my $rLL = $self->[_rLL_];
4494 my $Klimit = $self->[_Klimit_];
4495 my $rlines_new = $self->[_rlines_];
4496 my $maximum_level = $self->[_maximum_level_];
4499 my $line_of_tokens = {};
4504 _guessed_indentation_level
4510 _square_bracket_depth
4515 $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
4518 # Data needed by Logger
4519 $line_of_tokens->{_level_0} = 0;
4520 $line_of_tokens->{_ci_level_0} = 0;
4521 $line_of_tokens->{_nesting_blocks_0} = "";
4522 $line_of_tokens->{_nesting_tokens_0} = "";
4524 # Needed to avoid trimming quotes
4525 $line_of_tokens->{_ended_in_blank_token} = undef;
4527 my $line_type = $line_of_tokens_old->{_line_type};
4528 my $input_line_no = $line_of_tokens_old->{_line_number};
4532 # Handle line of non-code
4533 if ( $line_type ne 'CODE' ) {
4534 $tee_output ||= $rOpts_tee_pod
4535 && substr( $line_type, 0, 3 ) eq 'POD';
4538 # Handle line of code
4541 my $rtokens = $line_of_tokens_old->{_rtokens};
4542 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
4543 my $rblock_type = $line_of_tokens_old->{_rblock_type};
4544 my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
4545 my $rcontainer_environment =
4546 $line_of_tokens_old->{_rcontainer_environment};
4547 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
4548 my $rlevels = $line_of_tokens_old->{_rlevels};
4549 my $rslevels = $line_of_tokens_old->{_rslevels};
4550 my $rci_levels = $line_of_tokens_old->{_rci_levels};
4551 my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
4552 my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
4554 my $jmax = @{$rtokens} - 1;
4556 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
4557 foreach my $j ( 0 .. $jmax ) {
4559 # Clip negative nesting depths to zero to avoid problems.
4560 # Negative values can occur in files with unbalanced containers
4561 my $slevel = $rslevels->[$j];
4562 if ( $slevel < 0 ) { $slevel = 0 }
4564 if ( $rlevels->[$j] > $maximum_level ) {
4565 $maximum_level = $rlevels->[$j];
4568 # But do not clip the 'level' variable yet. We will do this
4569 # later, in sub 'store_token_to_go'. The reason is that in
4570 # files with level errors, the logic in 'weld_cuddled_else'
4571 # uses a stack logic that will give bad welds if we clip
4573 ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
4577 _TOKEN_, _TYPE_, _BLOCK_TYPE_,
4578 _TYPE_SEQUENCE_, _LEVEL_, _SLEVEL_,
4579 _CI_LEVEL_, _LINE_INDEX_,
4582 $rtokens->[$j], $rtoken_type->[$j],
4583 $rblock_type->[$j], $rtype_sequence->[$j],
4584 $rlevels->[$j], $slevel,
4585 $rci_levels->[$j], $input_line_no - 1,
4587 push @{$rLL}, \@tokary;
4588 } ## end foreach my $j ( 0 .. $jmax )
4590 $Klimit = @{$rLL} - 1;
4592 # Need to remember if we can trim the input line
4593 $line_of_tokens->{_ended_in_blank_token} =
4594 $rtoken_type->[$jmax] eq 'b';
4596 $line_of_tokens->{_level_0} = $rlevels->[0];
4597 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
4598 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
4599 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
4600 } ## end if ( $jmax >= 0 )
4603 $self->get_CODE_type( $line_of_tokens, $Kfirst, $Klimit,
4607 $rOpts_tee_block_comments
4609 && $rLL->[$Kfirst]->[_TYPE_] eq '#';
4612 $rOpts_tee_side_comments
4614 && $Klimit > $Kfirst
4615 && $rLL->[$Klimit]->[_TYPE_] eq '#';
4617 # Handle any requested side comment deletions. It is easier to get
4618 # this done here rather than farther down the pipeline because IO
4619 # lines take a different route, and because lines with deleted HSC
4620 # become BL lines. An since we are deleting now, we have to also
4621 # handle any tee- requests before the side comments vanish.
4622 my $delete_side_comment =
4623 $rOpts_delete_side_comments
4625 && $rLL->[$Klimit]->[_TYPE_] eq '#'
4626 && ( $Klimit > $Kfirst || $CODE_type eq 'HSC' )
4628 || $CODE_type eq 'HSC'
4629 || $CODE_type eq 'IO'
4630 || $CODE_type eq 'NIN' );
4633 $rOpts_delete_closing_side_comments
4634 && !$delete_side_comment
4636 && $Klimit > $Kfirst
4637 && $rLL->[$Klimit]->[_TYPE_] eq '#'
4639 || $CODE_type eq 'HSC'
4640 || $CODE_type eq 'IO'
4641 || $CODE_type eq 'NIN' )
4644 my $token = $rLL->[$Klimit]->[_TOKEN_];
4645 my $K_m = $Klimit - 1;
4646 my $type_m = $rLL->[$K_m]->[_TYPE_];
4647 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
4648 my $last_nonblank_block_type = $rLL->[$K_m]->[_BLOCK_TYPE_];
4649 if ( $token =~ /$closing_side_comment_prefix_pattern/
4650 && $last_nonblank_block_type =~
4651 /$closing_side_comment_list_pattern/ )
4653 $delete_side_comment = 1;
4655 } ## end if ( $rOpts_delete_closing_side_comments...)
4657 if ($delete_side_comment) {
4660 if ( $Klimit > $Kfirst
4661 && $rLL->[$Klimit]->[_TYPE_] eq 'b' )
4667 # The -io option outputs the line text, so we have to update
4668 # the line text so that the comment does not reappear.
4669 if ( $CODE_type eq 'IO' ) {
4671 foreach my $KK ( $Kfirst .. $Klimit ) {
4672 $line .= $rLL->[$KK]->[_TOKEN_];
4674 $line_of_tokens->{_line_text} = $line . "\n";
4677 # If we delete a hanging side comment the line becomes blank.
4678 if ( $CODE_type eq 'HSC' ) { $CODE_type = 'BL' }
4681 } ## end if ( $line_type eq 'CODE')
4683 # Finish storing line variables
4685 my $fh_tee = $self->[_fh_tee_];
4686 my $line_text = $line_of_tokens_old->{_line_text};
4687 $fh_tee->print($line_text) if ($fh_tee);
4690 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
4691 $line_of_tokens->{_code_type} = $CODE_type;
4692 $self->[_Klimit_] = $Klimit;
4693 $self->[_maximum_level_] = $maximum_level;
4695 push @{$rlines_new}, $line_of_tokens;
4700 my ( $self, $line_of_tokens, $Kfirst, $Klast, $input_line_no ) = @_;
4702 # We are looking at a line of code and setting a flag to
4703 # describe any special processing that it requires
4705 # Possible CODE_types
4706 # 'VB' = Verbatim - line goes out verbatim (a quote)
4707 # 'FS' = Format Skipping - line goes out verbatim
4709 # 'HSC' = Hanging Side Comment - fix this hanging side comment
4710 # 'SBCX'= Static Block Comment Without Leading Space
4711 # 'SBC' = Static Block Comment
4712 # 'BC' = Block Comment - an ordinary full line comment
4713 # 'IO' = Indent Only - line goes out unchanged except for indentation
4714 # 'NIN' = No Internal Newlines - line does not get broken
4715 # 'VER' = VERSION statement
4716 # '' = ordinary line of code with no restructions
4718 my $rLL = $self->[_rLL_];
4721 my $input_line = $line_of_tokens->{_line_text};
4722 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
4724 my $is_block_comment = 0;
4725 my $has_side_comment = 0;
4727 if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
4728 if ( $jmax == 0 ) { $is_block_comment = 1; }
4729 else { $has_side_comment = 1 }
4732 # Write line verbatim if we are in a formatting skip section
4733 if ($In_format_skipping_section) {
4735 # Note: extra space appended to comment simplifies pattern matching
4736 if ( $is_block_comment
4737 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
4738 /$format_skipping_pattern_end/ )
4740 $In_format_skipping_section = 0;
4741 write_logfile_entry(
4742 "Line $input_line_no: Exiting format-skipping section\n");
4748 # Check for a continued quote..
4749 if ( $line_of_tokens->{_starting_in_quote} ) {
4751 # A line which is entirely a quote or pattern must go out
4752 # verbatim. Note: the \n is contained in $input_line.
4754 if ( ( $input_line =~ "\t" ) ) {
4755 my $input_line_number = $line_of_tokens->{_line_number};
4756 $self->note_embedded_tab($input_line_number);
4763 # See if we are entering a formatting skip section
4764 if ( $rOpts_format_skipping
4765 && $is_block_comment
4766 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
4767 /$format_skipping_pattern_begin/ )
4769 $In_format_skipping_section = 1;
4770 write_logfile_entry(
4771 "Line $input_line_no: Entering format-skipping section\n");
4776 # ignore trailing blank tokens (they will get deleted later)
4777 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
4787 # see if this is a static block comment (starts with ## by default)
4788 my $is_static_block_comment = 0;
4789 my $is_static_block_comment_without_leading_space = 0;
4790 if ( $is_block_comment
4791 && $rOpts->{'static-block-comments'}
4792 && $input_line =~ /$static_block_comment_pattern/ )
4794 $is_static_block_comment = 1;
4795 $is_static_block_comment_without_leading_space =
4796 substr( $input_line, 0, 1 ) eq '#';
4799 # Check for comments which are line directives
4800 # Treat exactly as static block comments without leading space
4801 # reference: perlsyn, near end, section Plain Old Comments (Not!)
4802 # example: '# line 42 "new_filename.plx"'
4805 && $input_line =~ /^\# \s*
4807 (?:\s("?)([^"]+)\2)? \s*
4811 $is_static_block_comment = 1;
4812 $is_static_block_comment_without_leading_space = 1;
4815 # look for hanging side comment
4818 && $Last_line_had_side_comment # last line had side comment
4819 && $input_line =~ /^\s/ # there is some leading space
4820 && !$is_static_block_comment # do not make static comment hanging
4821 && $rOpts->{'hanging-side-comments'} # user is allowing
4822 # hanging side comments
4826 $has_side_comment = 1;
4831 # Handle a block (full-line) comment..
4832 if ($is_block_comment) {
4834 if ($is_static_block_comment_without_leading_space) {
4835 $CODE_type = 'SBCX';
4838 elsif ($is_static_block_comment) {
4842 elsif ($Last_line_had_side_comment
4843 && !$rOpts_maximum_consecutive_blank_lines
4844 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
4846 # Emergency fix to keep a block comment from becoming a hanging
4847 # side comment. This fix is for the case that blank lines
4848 # cannot be inserted. There is related code in sub
4849 # 'process_line_of_CODE'
4850 $CODE_type = 'SBCX';
4859 # End of comments. Handle a line of normal code:
4861 if ($rOpts_indent_only) {
4866 if ( !$rOpts_add_newlines ) {
4871 # Patch needed for MakeMaker. Do not break a statement
4872 # in which $VERSION may be calculated. See MakeMaker.pm;
4873 # this is based on the coding in it.
4874 # The first line of a file that matches this will be eval'd:
4875 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
4877 # *VERSION = \'1.01';
4878 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
4879 # We will pass such a line straight through without breaking
4880 # it unless -npvl is used.
4882 # Patch for problem reported in RT #81866, where files
4883 # had been flattened into a single line and couldn't be
4884 # tidied without -npvl. There are two parts to this patch:
4885 # First, it is not done for a really long line (80 tokens for now).
4886 # Second, we will only allow up to one semicolon
4887 # before the VERSION. We need to allow at least one semicolon
4888 # for statements like this:
4889 # require Exporter; our $VERSION = $Exporter::VERSION;
4890 # where both statements must be on a single line for MakeMaker
4892 my $is_VERSION_statement = 0;
4893 if ( !$Saw_VERSION_in_this_file
4896 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
4898 $Saw_VERSION_in_this_file = 1;
4899 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
4901 # This code type has lower priority than others
4907 $Last_line_had_side_comment = $has_side_comment;
4911 } ## end closure write_line
4913 #############################################
4914 # CODE SECTION 5: Pre-process the entire file
4915 #############################################
4917 sub finish_formatting {
4919 my ( $self, $severe_error ) = @_;
4921 # The file has been tokenized and is ready to be formatted.
4922 # All of the relevant data is stored in $self, ready to go.
4924 # Check the maximum level. If it is extremely large we will
4925 # give up and output the file verbatim.
4926 my $maximum_level = $self->[_maximum_level_];
4927 my $maximum_table_index = $#maximum_line_length_at_level;
4928 if ( !$severe_error && $maximum_level > $maximum_table_index ) {
4929 $severe_error ||= 1;
4931 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
4932 Something may be wrong; formatting will be skipped.
4936 # output file verbatim if severe error or no formatting requested
4937 if ( $severe_error || $rOpts->{notidy} ) {
4938 $self->dump_verbatim();
4943 # Update the 'save_logfile' flag based to include any tokenization errors.
4944 # We can save time by skipping logfile calls if it is not going to be saved.
4945 my $logger_object = $self->[_logger_object_];
4946 if ($logger_object) {
4947 $self->[_save_logfile_] = $logger_object->get_save_logfile();
4950 # Make a pass through all tokens, adding or deleting any whitespace as
4951 # required. Also make any other changes, such as adding semicolons.
4952 # All token changes must be made here so that the token data structure
4953 # remains fixed for the rest of this iteration.
4954 $self->respace_tokens();
4956 $self->find_multiline_qw();
4958 $self->keep_old_line_breaks();
4960 # Implement any welding needed for the -wn or -cb options
4961 $self->weld_containers();
4963 # Locate small nested blocks which should not be broken
4964 $self->mark_short_nested_blocks();
4966 $self->adjust_indentation_levels();
4968 $self->set_excluded_lp_containers();
4970 # Finishes formatting and write the result to the line sink.
4971 # Eventually this call should just change the 'rlines' data according to the
4972 # new line breaks and then return so that we can do an internal iteration
4973 # before continuing with the next stages of formatting.
4974 $self->process_all_lines();
4976 # A final routine to tie up any loose ends
4983 my $rlines = $self->[_rlines_];
4984 foreach my $line ( @{$rlines} ) {
4985 my $input_line = $line->{_line_text};
4986 $self->write_unindented_line($input_line);
4993 my %is_nonlist_keyword;
4994 my %is_nonlist_type;
4998 # added 'U' to fix cases b1125 b1126 b1127
5000 @{wU}{@q} = (1) x scalar(@q);
5002 @q = qw(w i q Q G C Z);
5003 @{wiq}{@q} = (1) x scalar(@q);
5005 # Parens following these keywords will not be marked as lists. Note that
5006 # 'for' is not included and is handled separately, by including 'f' in the
5007 # hash %is_counted_type, since it may or may not be a c-style for loop.
5008 @q = qw( if elsif unless and or );
5009 @is_nonlist_keyword{@q} = (1) x scalar(@q);
5011 # Parens following these types will not be marked as lists
5013 @is_nonlist_type{@q} = (1) x scalar(@q);
5017 sub respace_tokens {
5020 return if $rOpts->{'indent-only'};
5022 # This routine is called once per file to do as much formatting as possible
5023 # before new line breaks are set.
5025 # This routine makes all necessary and possible changes to the tokenization
5026 # after the initial tokenization of the file. This is a tedious routine,
5027 # but basically it consists of inserting and deleting whitespace between
5028 # nonblank tokens according to the selected parameters. In a few cases
5029 # non-space characters are added, deleted or modified.
5031 # The goal of this routine is to create a new token array which only needs
5032 # the definition of new line breaks and padding to complete formatting. In
5033 # a few cases we have to cheat a little to achieve this goal. In
5034 # particular, we may not know if a semicolon will be needed, because it
5035 # depends on how the line breaks go. To handle this, we include the
5036 # semicolon as a 'phantom' which can be displayed as normal or as an empty
5039 # Method: The old tokens are copied one-by-one, with changes, from the old
5040 # linear storage array $rLL to a new array $rLL_new.
5042 my $rLL = $self->[_rLL_];
5043 my $Klimit_old = $self->[_Klimit_];
5044 my $rlines = $self->[_rlines_];
5045 my $length_function = $self->[_length_function_];
5046 my $is_encoded_data = $self->[_is_encoded_data_];
5048 my $rLL_new = []; # This is the new array
5050 my $Ktoken_vars; # the old K value of $rtoken_vars
5051 my ( $Kfirst_old, $Klast_old ); # Range of old line
5052 my $Klast_old_code; # K of last token if side comment
5053 my $Kmax = @{$rLL} - 1;
5058 # Set the whitespace flags, which indicate the token spacing preference.
5059 my $rwhitespace_flags = $self->set_whitespace_flags();
5061 # we will be setting token lengths as we go
5062 my $cumulative_length = 0;
5065 my %K_old_opening_by_seqno = (); # Note: old K index
5067 my $depth_next_max = 0;
5069 my $K_closing_container = $self->[_K_closing_container_];
5070 my $K_closing_ternary = $self->[_K_closing_ternary_];
5071 my $K_opening_container = $self->[_K_opening_container_];
5072 my $K_opening_ternary = $self->[_K_opening_ternary_];
5073 my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_];
5074 my $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
5075 my $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
5076 my $rhas_broken_list = $self->[_rhas_broken_list_];
5077 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
5078 my $rhas_code_block = $self->[_rhas_code_block_];
5079 my $rhas_list = $self->[_rhas_list_];
5080 my $rhas_ternary = $self->[_rhas_ternary_];
5081 my $ris_assigned_structure = $self->[_ris_assigned_structure_];
5082 my $ris_broken_container = $self->[_ris_broken_container_];
5083 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
5084 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
5085 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
5086 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
5087 my $roverride_cab3 = $self->[_roverride_cab3_];
5088 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
5089 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
5091 my $last_nonblank_type = ';';
5092 my $last_nonblank_token = ';';
5093 my $last_nonblank_block_type = '';
5094 my $nonblank_token_count = 0;
5095 my $last_nonblank_token_lx = 0;
5097 my %K_first_here_doc_by_seqno;
5099 my $set_permanently_broken = sub {
5101 while ( defined($seqno) ) {
5102 $ris_permanently_broken->{$seqno} = 1;
5103 $seqno = $rparent_of_seqno->{$seqno};
5107 my $store_token = sub {
5110 # This will be the index of this item in the new array
5111 my $KK_new = @{$rLL_new};
5113 my $type = $item->[_TYPE_];
5114 my $is_blank = $type eq 'b';
5116 # Do not output consecutive blanks. This should not happen, but
5117 # is worth checking because later routines make this assumption.
5118 if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
5122 # check for a sequenced item (i.e., container or ?/:)
5123 my $type_sequence = $item->[_TYPE_SEQUENCE_];
5124 if ($type_sequence) {
5126 my $token = $item->[_TOKEN_];
5127 if ( $is_opening_token{$token} ) {
5129 $K_opening_container->{$type_sequence} = $KK_new;
5131 # Fix for case b1100: Count a line ending in ', [' as having
5132 # a line-ending comma. Otherwise, these commas can be hidden
5133 # with something like --opening-square-bracket-right
5134 if ( $last_nonblank_type eq ','
5135 && $Ktoken_vars == $Klast_old_code
5136 && $Ktoken_vars > $Kfirst_old )
5138 $rlec_count_by_seqno->{$type_sequence}++;
5141 if ( $last_nonblank_type eq '='
5142 || $last_nonblank_type eq '=>' )
5144 $ris_assigned_structure->{$type_sequence} =
5145 $last_nonblank_type;
5148 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
5149 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
5150 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
5151 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
5152 $seqno_stack{$depth_next} = $type_sequence;
5153 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
5156 if ( $depth_next > $depth_next_max ) {
5157 $depth_next_max = $depth_next;
5160 elsif ( $is_closing_token{$token} ) {
5162 $K_closing_container->{$type_sequence} = $KK_new;
5164 # Do not include terminal commas in counts
5165 if ( $last_nonblank_type eq ','
5166 || $last_nonblank_type eq '=>' )
5168 my $seqno = $seqno_stack{ $depth_next - 1 };
5170 $rtype_count_by_seqno->{$seqno}->{$last_nonblank_type}
5173 if ( $Ktoken_vars == $Kfirst_old
5174 && $last_nonblank_type eq ','
5175 && $rlec_count_by_seqno->{$seqno} )
5177 $rlec_count_by_seqno->{$seqno}--;
5182 # Update the stack...
5187 # For ternary, note parent but do not include as child
5188 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
5189 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
5190 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
5192 # These are not yet used but could be useful
5193 if ( $token eq '?' ) {
5194 $K_opening_ternary->{$type_sequence} = $KK_new;
5196 elsif ( $token eq ':' ) {
5197 $K_closing_ternary->{$type_sequence} = $KK_new;
5201 # We really shouldn't arrive here, just being cautious:
5202 # The only sequenced types output by the tokenizer are the
5203 # opening & closing containers and the ternary types. Each
5204 # of those was checked above. So we would only get here
5205 # if the tokenizer has been changed to mark some other
5206 # tokens with sequence numbers.
5207 my $type = $item->[_TYPE_];
5209 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
5215 # Find the length of this token. Later it may be adjusted if phantom
5216 # or ignoring side comment lengths.
5219 ? $length_function->( $item->[_TOKEN_] )
5220 : length( $item->[_TOKEN_] );
5223 my $is_comment = $type eq '#';
5226 # trim comments if necessary
5227 if ( $item->[_TOKEN_] =~ s/\s+$// ) {
5228 $token_length = $length_function->( $item->[_TOKEN_] );
5231 # Mark length of side comments as just 1 if sc lengths are ignored
5232 if ( $rOpts_ignore_side_comment_lengths
5233 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
5237 my $seqno = $seqno_stack{ $depth_next - 1 };
5238 if ( defined($seqno)
5239 && !$ris_permanently_broken->{$seqno} )
5241 $set_permanently_broken->($seqno);
5246 $item->[_TOKEN_LENGTH_] = $token_length;
5248 # and update the cumulative length
5249 $cumulative_length += $token_length;
5251 # Save the length sum to just AFTER this token
5252 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
5254 if ( !$is_blank && !$is_comment ) {
5255 $last_nonblank_type = $type;
5256 $last_nonblank_token = $item->[_TOKEN_];
5257 $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
5258 $last_nonblank_token_lx = $item->[_LINE_INDEX_];
5259 $nonblank_token_count++;
5261 # count selected types
5262 if ( $is_counted_type{$type} ) {
5263 my $seqno = $seqno_stack{ $depth_next - 1 };
5264 if ( defined($seqno) ) {
5265 $rtype_count_by_seqno->{$seqno}->{$type}++;
5267 # Count line-ending commas for -bbx
5268 if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
5269 $rlec_count_by_seqno->{$seqno}++;
5272 # Remember index of first here doc target
5273 if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
5274 $K_first_here_doc_by_seqno{$seqno} = $KK_new;
5280 # For reference, here is how to get the parent sequence number.
5281 # This is not used because it is slower than finding it on the fly
5282 # in sub parent_seqno_by_K:
5284 # my $seqno_parent =
5285 # $type_sequence && $is_opening_token{$token}
5286 # ? $seqno_stack{ $depth_next - 2 }
5287 # : $seqno_stack{ $depth_next - 1 };
5288 # my $KK = @{$rLL_new};
5289 # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
5291 # and finally, add this item to the new array
5292 push @{$rLL_new}, $item;
5295 my $store_token_and_space = sub {
5296 my ( $item, $want_space ) = @_;
5298 # store a token with preceding space if requested and needed
5300 # First store the space
5303 && $rLL_new->[-1]->[_TYPE_] ne 'b'
5304 && $rOpts_add_whitespace )
5306 my $rcopy = copy_token_as_type( $item, 'b', ' ' );
5307 $rcopy->[_LINE_INDEX_] =
5308 $rLL_new->[-1]->[_LINE_INDEX_];
5310 # Patch 23-Jan-2021 to fix -lp blinkers:
5311 # The level and ci_level of newly created spaces should be the same
5312 # as the previous token. Otherwise the coding for the -lp option,
5313 # in sub set_leading_whitespace, can create a blinking state in
5316 $rLL_new->[-1]->[_LEVEL_];
5317 $rcopy->[_CI_LEVEL_] =
5318 $rLL_new->[-1]->[_CI_LEVEL_];
5320 $store_token->($rcopy);
5324 $store_token->($item);
5332 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
5334 while ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
5338 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
5344 my $add_phantom_semicolon = sub {
5348 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
5349 return unless ( defined($Kp) );
5351 # we are only adding semicolons for certain block types
5352 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
5354 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
5355 || $block_type =~ /^(sub|package)/
5356 || $block_type =~ /^\w+\:$/ );
5358 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
5360 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
5361 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
5363 # Do not add a semicolon if...
5367 # it would follow a comment (and be isolated)
5368 $previous_nonblank_type eq '#'
5370 # it follows a code block ( because they are not always wanted
5371 # there and may add clutter)
5372 || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
5374 # it would follow a label
5375 || $previous_nonblank_type eq 'J'
5377 # it would be inside a 'format' statement (and cause syntax error)
5378 || ( $previous_nonblank_type eq 'k'
5379 && $previous_nonblank_token =~ /format/ )
5383 # Do not add a semicolon if it would impede a weld with an immediately
5384 # following closing token...like this
5386 # ^--No semicolon can go here
5388 # look at the previous token... note use of the _NEW rLL array here,
5389 # but sequence numbers are invariant.
5390 my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
5392 # If it is also a CLOSING token we have to look closer...
5395 && $is_closing_token{$previous_nonblank_token}
5397 # we only need to look if there is just one inner container..
5398 && defined( $rchildren_of_seqno->{$type_sequence} )
5399 && @{ $rchildren_of_seqno->{$type_sequence} } == 1
5403 # Go back and see if the corresponding two OPENING tokens are also
5404 # together. Note that we are using the OLD K indexing here:
5405 my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
5406 if ( defined($K_outer_opening) ) {
5407 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
5408 if ( defined($K_nxt) ) {
5409 my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
5411 # Is the next token after the outer opening the same as
5412 # our inner closing (i.e. same sequence number)?
5413 # If so, do not insert a semicolon here.
5414 return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
5419 # We will insert an empty semicolon here as a placeholder. Later, if
5420 # it becomes the last token on a line, we will bring it to life. The
5421 # advantage of doing this is that (1) we just have to check line
5422 # endings, and (2) the phantom semicolon has zero width and therefore
5423 # won't cause needless breaks of one-line blocks.
5425 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
5426 && $want_left_space{';'} == WS_NO )
5429 # convert the blank into a semicolon..
5430 # be careful: we are working on the new stack top
5431 # on a token which has been stored.
5432 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
5434 # Convert the existing blank to:
5435 # a phantom semicolon for one_line_block option = 0 or 1
5436 # a real semicolon for one_line_block option = 2
5439 if ( $rOpts_one_line_block_semicolons == 2 ) {
5444 $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
5445 $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
5446 $rLL_new->[$Ktop]->[_TYPE_] = ';';
5447 $rLL_new->[$Ktop]->[_SLEVEL_] =
5448 $rLL->[$KK]->[_SLEVEL_];
5450 # Save list of new K indexes of phantom semicolons.
5451 # This will be needed if we want to undo them for iterations in
5453 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
5455 # Then store a new blank
5456 $store_token->($rcopy);
5460 # insert a new token
5461 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
5462 $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
5463 $store_token->($rcopy);
5464 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
5470 # Check that a quote looks okay
5471 # This sub works but needs to by sync'd with the log file output
5472 # before it can be used.
5473 my ( $KK, $Kfirst, $line_number ) = @_;
5474 my $token = $rLL->[$KK]->[_TOKEN_];
5475 $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
5477 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
5478 return unless ( defined($Kp) );
5479 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
5480 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
5482 my $previous_nonblank_type_2 = 'b';
5483 my $previous_nonblank_token_2 = "";
5484 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
5485 if ( defined($Kpp) ) {
5486 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
5487 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
5490 my $next_nonblank_token = "";
5492 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
5493 if ( $Kn <= $Kmax ) {
5494 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
5497 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
5498 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
5500 # make note of something like '$var = s/xxx/yyy/;'
5501 # in case it should have been '$var =~ s/xxx/yyy/;'
5503 $token =~ /^(s|tr|y|m|\/)/
5504 && $previous_nonblank_token =~ /^(=|==|!=)$/
5506 # preceded by simple scalar
5507 && $previous_nonblank_type_2 eq 'i'
5508 && $previous_nonblank_token_2 =~ /^\$/
5510 # followed by some kind of termination
5511 # (but give complaint if we can not see far enough ahead)
5512 && $next_nonblank_token =~ /^[; \)\}]$/
5514 # scalar is not declared
5515 && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
5518 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
5520 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
5525 ############################################
5526 # Main loop to respace all lines of the file
5527 ############################################
5530 # Testing option to break qw. Do not use; it can make a mess.
5531 my $ALLOW_BREAK_MULTILINE_QW = 0;
5532 my $in_multiline_qw;
5533 foreach my $line_of_tokens ( @{$rlines} ) {
5535 my $input_line_number = $line_of_tokens->{_line_number};
5536 my $last_line_type = $line_type;
5537 $line_type = $line_of_tokens->{_line_type};
5538 next unless ( $line_type eq 'CODE' );
5539 my $last_CODE_type = $CODE_type;
5540 $CODE_type = $line_of_tokens->{_code_type};
5541 my $rK_range = $line_of_tokens->{_rK_range};
5542 my ( $Kfirst, $Klast ) = @{$rK_range};
5543 next unless defined($Kfirst);
5544 ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
5545 $Klast_old_code = $Klast_old;
5547 # Be sure an old K value is defined for sub $store_token
5548 $Ktoken_vars = $Kfirst;
5550 # Check for correct sequence of token indexes...
5551 # An error here means that sub write_line() did not correctly
5552 # package the tokenized lines as it received them. If we
5553 # get a fault here it has not output a continuous sequence
5554 # of K values. Or a line of CODE may have been mismarked as
5556 if ( defined($last_K_out) ) {
5557 if ( $Kfirst != $last_K_out + 1 ) {
5559 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
5565 # The first token should always have been given index 0 by sub
5567 if ( $Kfirst != 0 ) {
5568 Fault("Program Bug: first K is $Kfirst but should be 0");
5571 $last_K_out = $Klast;
5573 # Handle special lines of code
5574 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
5576 # CODE_types are as follows.
5578 # 'VB' = Verbatim - line goes out verbatim
5579 # 'FS' = Format Skipping - line goes out verbatim, no blanks
5580 # 'IO' = Indent Only - only indentation may be changed
5581 # 'NIN' = No Internal Newlines - line does not get broken
5582 # 'HSC'=Hanging Side Comment - fix this hanging side comment
5583 # 'BC'=Block Comment - an ordinary full line comment
5584 # 'SBC'=Static Block Comment - a block comment which does not get
5586 # 'SBCX'=Static Block Comment Without Leading Space
5587 # 'VER'=VERSION statement
5588 # '' or (undefined) - no restructions
5590 # For a hanging side comment we insert an empty quote before
5591 # the comment so that it becomes a normal side comment and
5592 # will be aligned by the vertical aligner
5593 if ( $CODE_type eq 'HSC' ) {
5595 # Safety Check: This must be a line with one token (a comment)
5596 my $rtoken_vars = $rLL->[$Kfirst];
5597 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
5599 # Note that even if the flag 'noadd-whitespace' is set, we
5600 # will make an exception here and allow a blank to be
5601 # inserted to push the comment to the right. We can think
5602 # of this as an adjustment of indentation rather than
5603 # whitespace between tokens. This will also prevent the
5604 # hanging side comment from getting converted to a block
5605 # comment if whitespace gets deleted, as for example with
5606 # the -extrude and -mangle options.
5607 my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
5608 $store_token->($rcopy);
5609 $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
5610 $store_token->($rcopy);
5611 $store_token->($rtoken_vars);
5616 # This line was mis-marked by sub scan_comment
5618 "Program bug. A hanging side comment has been mismarked"
5623 if ( $CODE_type eq 'BL' ) {
5624 my $seqno = $seqno_stack{ $depth_next - 1 };
5625 if ( defined($seqno)
5626 && !$ris_permanently_broken->{$seqno}
5627 && $rOpts_maximum_consecutive_blank_lines )
5629 $set_permanently_broken->($seqno);
5633 # Copy tokens unchanged
5634 foreach my $KK ( $Kfirst .. $Klast ) {
5636 $store_token->( $rLL->[$KK] );
5641 # Handle normal line..
5643 # Define index of last token before any side comment for comma counts
5644 my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
5645 if ( ( $type_end eq '#' || $type_end eq 'b' )
5646 && $Klast_old_code > $Kfirst_old )
5649 if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
5650 && $Klast_old_code > $Kfirst_old )
5656 # Insert any essential whitespace between lines
5657 # if last line was normal CODE.
5658 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
5659 # because comments may disappear.
5660 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
5661 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
5662 my $Kp = $self->K_previous_code( undef, $rLL_new );
5663 if ( $last_line_type eq 'CODE'
5664 && $type_next ne 'b'
5667 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
5668 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
5670 my ( $token_pp, $type_pp );
5671 my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
5672 if ( defined($Kpp) ) {
5673 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
5674 $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
5683 is_essential_whitespace(
5684 $token_pp, $type_pp, $token_p,
5685 $type_p, $token_next, $type_next,
5690 # Copy this first token as blank, but use previous line number
5691 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
5692 $rcopy->[_LINE_INDEX_] =
5693 $rLL_new->[-1]->[_LINE_INDEX_];
5695 # The level and ci_level of newly created spaces should be the
5696 # same as the previous token. Otherwise blinking states can
5697 # be created if the -lp mode is used. See similar coding in
5698 # sub 'store_token_and_space'. Fixes cases b1109 b1110.
5700 $rLL_new->[-1]->[_LEVEL_];
5701 $rcopy->[_CI_LEVEL_] =
5702 $rLL_new->[-1]->[_CI_LEVEL_];
5704 $store_token->($rcopy);
5708 ########################################################
5709 # Loop to copy all tokens on this line, with any changes
5710 ########################################################
5712 for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
5714 $rtoken_vars = $rLL->[$KK];
5715 my $token = $rtoken_vars->[_TOKEN_];
5716 my $type = $rtoken_vars->[_TYPE_];
5717 my $last_type_sequence = $type_sequence;
5718 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
5720 # Handle a blank space ...
5721 if ( $type eq 'b' ) {
5723 # Delete it if not wanted by whitespace rules
5724 # or we are deleting all whitespace
5725 # Note that whitespace flag is a flag indicating whether a
5726 # white space BEFORE the token is needed
5727 next if ( $KK >= $Klast ); # skip terminal blank
5728 my $Knext = $KK + 1;
5730 if ($rOpts_freeze_whitespace) {
5731 $store_token->($rtoken_vars);
5735 my $ws = $rwhitespace_flags->[$Knext];
5737 || $rOpts_delete_old_whitespace )
5740 my $Kp = $self->K_previous_nonblank($KK);
5741 next unless defined($Kp);
5742 my $token_p = $rLL->[$Kp]->[_TOKEN_];
5743 my $type_p = $rLL->[$Kp]->[_TYPE_];
5745 my ( $token_pp, $type_pp );
5747 my $Kpp = $self->K_previous_nonblank($Kp);
5748 if ( defined($Kpp) ) {
5749 $token_pp = $rLL->[$Kpp]->[_TOKEN_];
5750 $type_pp = $rLL->[$Kpp]->[_TYPE_];
5756 my $token_next = $rLL->[$Knext]->[_TOKEN_];
5757 my $type_next = $rLL->[$Knext]->[_TYPE_];
5759 my $do_not_delete = is_essential_whitespace(
5760 $token_pp, $type_pp, $token_p,
5761 $type_p, $token_next, $type_next,
5764 next unless ($do_not_delete);
5767 # make it just one character
5768 $rtoken_vars->[_TOKEN_] = ' ';
5769 $store_token->($rtoken_vars);
5773 # Handle a nonblank token...
5775 if ($type_sequence) {
5777 if ( $is_closing_token{$token} ) {
5779 # Insert a tentative missing semicolon if the next token is
5780 # a closing block brace
5785 # not preceded by a ';'
5786 && $last_nonblank_type ne ';'
5788 # and this is not a VERSION stmt (is all one line, we
5789 # are not inserting semicolons on one-line blocks)
5790 && $CODE_type ne 'VER'
5792 # and we are allowed to add semicolons
5793 && $rOpts->{'add-semicolons'}
5796 $add_phantom_semicolon->($KK);
5801 # Modify certain tokens here for whitespace
5802 # The following is not yet done, but could be:
5804 elsif ( $type =~ /^[wit]$/ ) {
5806 # Examples: <<snippets/space1.in>>
5807 # change '$ var' to '$var' etc
5808 # change '@ ' to '@'
5809 my ( $sigil, $word ) = split /\s+/, $token, 2;
5810 if ( length($sigil) == 1
5811 && $sigil =~ /^[\$\&\%\*\@]$/ )
5814 $token .= $word if ($word);
5815 $rtoken_vars->[_TOKEN_] = $token;
5818 # Split identifiers with leading arrows, inserting blanks if
5819 # necessary. It is easier and safer here than in the
5820 # tokenizer. For example '->new' becomes two tokens, '->' and
5821 # 'new' with a possible blank between.
5823 # Note: there is a related patch in sub set_whitespace_flags
5824 if ( substr( $token, 0, 1 ) eq '-'
5825 && $token =~ /^\-\>(.*)$/
5829 my $token_save = $1;
5830 my $type_save = $type;
5832 # Change '-> new' to '->new'
5833 $token_save =~ s/^\s+//g;
5835 # store a blank to left of arrow if necessary
5836 my $Kprev = $self->K_previous_nonblank($KK);
5837 if ( defined($Kprev)
5838 && $rLL->[$Kprev]->[_TYPE_] ne 'b'
5839 && $rOpts_add_whitespace
5840 && $want_left_space{'->'} == WS_YES )
5843 copy_token_as_type( $rtoken_vars, 'b', ' ' );
5844 $store_token->($rcopy);
5847 # then store the arrow
5848 my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
5849 $store_token->($rcopy);
5851 # store a blank after the arrow if requested
5852 # added for issue git #33
5853 if ( $want_right_space{'->'} == WS_YES ) {
5855 copy_token_as_type( $rtoken_vars, 'b', ' ' );
5856 $store_token->($rcopy);
5859 # then reset the current token to be the remainder,
5860 # and reset the whitespace flag according to the arrow
5861 $token = $rtoken_vars->[_TOKEN_] = $token_save;
5862 $type = $rtoken_vars->[_TYPE_] = $type_save;
5863 $store_token->($rtoken_vars);
5867 if ( $token =~ /$ANYSUB_PATTERN/ ) {
5869 # -spp = 0 : no space before opening prototype paren
5870 # -spp = 1 : stable (follow input spacing)
5871 # -spp = 2 : always space before opening prototype paren
5872 my $spp = $rOpts->{'space-prototype-paren'};
5873 if ( defined($spp) ) {
5874 if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
5875 elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
5878 # one space max, and no tabs
5879 $token =~ s/\s+/ /g;
5880 $rtoken_vars->[_TOKEN_] = $token;
5883 # clean up spaces in package identifiers, like
5884 # "package Bob::Dog;"
5885 if ( $token =~ /^package\s/ ) {
5886 $token =~ s/\s+/ /g;
5887 $rtoken_vars->[_TOKEN_] = $token;
5890 # trim identifiers of trailing blanks which can occur
5891 # under some unusual circumstances, such as if the
5892 # identifier 'witch' has trailing blanks on input here:
5896 # () # prototype may be on new line ...
5898 if ( $type eq 'i' ) {
5899 $token =~ s/\s+$//g;
5900 $rtoken_vars->[_TOKEN_] = $token;
5905 elsif ( $type eq ';' ) {
5907 # Remove unnecessary semicolons, but not after bare
5908 # blocks, where it could be unsafe if the brace is
5911 $rOpts->{'delete-semicolons'}
5914 $last_nonblank_type eq '}'
5916 $is_block_without_semicolon{
5917 $last_nonblank_block_type}
5918 || $last_nonblank_block_type =~ /$SUB_PATTERN/
5919 || $last_nonblank_block_type =~ /^\w+:$/
5922 || $last_nonblank_type eq ';'
5927 # This looks like a deletable semicolon, but even if a
5928 # semicolon can be deleted it is necessarily best to do so.
5929 # We apply these additional rules for deletion:
5930 # - Always ok to delete a ';' at the end of a line
5931 # - Never delete a ';' before a '#' because it would
5932 # promote it to a block comment.
5933 # - If a semicolon is not at the end of line, then only
5934 # delete if it is followed by another semicolon or closing
5935 # token. This includes the comment rule. It may take
5936 # two passes to get to a final state, but it is a little
5937 # safer. For example, keep the first semicolon here:
5938 # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
5939 # It is not required but adds some clarity.
5940 my $ok_to_delete = 1;
5941 if ( $KK < $Klast ) {
5942 my $Kn = $self->K_next_nonblank($KK);
5943 if ( defined($Kn) && $Kn <= $Klast ) {
5944 my $next_nonblank_token_type =
5945 $rLL->[$Kn]->[_TYPE_];
5946 $ok_to_delete = $next_nonblank_token_type eq ';'
5947 || $next_nonblank_token_type eq '}';
5951 # do not delete only nonblank token in a file
5953 my $Kn = $self->K_next_nonblank($KK);
5954 $ok_to_delete = defined($Kn) || $nonblank_token_count;
5957 if ($ok_to_delete) {
5958 $self->note_deleted_semicolon($input_line_number);
5962 write_logfile_entry("Extra ';'\n");
5967 # patch to add space to something like "x10"
5968 # This avoids having to split this token in the pre-tokenizer
5969 elsif ( $type eq 'n' ) {
5970 if ( $token =~ /^x\d+/ ) {
5972 $rtoken_vars->[_TOKEN_] = $token;
5976 # check for a qw quote
5977 elsif ( $type eq 'q' ) {
5979 # trim blanks from right of qw quotes
5980 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
5983 $rtoken_vars->[_TOKEN_] = $token;
5984 $self->note_embedded_tab($input_line_number)
5985 if ( $token =~ "\t" );
5987 if ($in_multiline_qw) {
5989 # If we are at the end of a multiline qw ..
5990 if ( $in_multiline_qw == $KK ) {
5992 # Split off the closing delimiter character
5993 # so that the formatter can put a line break there if necessary
5995 my $part2 = substr( $part1, -1, 1, "" );
5999 copy_token_as_type( $rtoken_vars, 'q', $part1 );
6000 $store_token->($rcopy);
6002 $rtoken_vars->[_TOKEN_] = $token;
6005 $in_multiline_qw = undef;
6007 # store without preceding blank
6008 $store_token->($rtoken_vars);
6012 # continuing a multiline qw
6013 $store_token->($rtoken_vars);
6020 # we are encountered new qw token...see if multiline
6021 if ($ALLOW_BREAK_MULTILINE_QW) {
6022 my $K_end = $K_end_q->($KK);
6023 if ( $K_end != $KK ) {
6025 # Starting multiline qw...
6026 # set flag equal to the ending K
6027 $in_multiline_qw = $K_end;
6029 # Split off the leading part so that the formatter can
6030 # put a line break there if necessary
6031 if ( $token =~ /^(qw\s*.)(.*)$/ ) {
6036 copy_token_as_type( $rtoken_vars, 'q',
6038 $store_token_and_space->(
6040 $rwhitespace_flags->[$KK] == WS_YES
6043 $rtoken_vars->[_TOKEN_] = $token;
6045 # Second part goes without intermediate blank
6046 $store_token->($rtoken_vars);
6054 # this is a new single token qw -
6055 # store with possible preceding blank
6056 $store_token_and_space->(
6057 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
6062 } ## end if ( $type eq 'q' )
6064 # change 'LABEL :' to 'LABEL:'
6065 elsif ( $type eq 'J' ) {
6067 $rtoken_vars->[_TOKEN_] = $token;
6070 # check a quote for problems
6071 elsif ( $type eq 'Q' ) {
6072 $check_Q->( $KK, $Kfirst, $input_line_number );
6075 # Store this token with possible previous blank
6076 $store_token_and_space->(
6077 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
6083 # Walk backwards through the tokens, making forward links to sequence items.
6084 if ( @{$rLL_new} ) {
6086 for ( my $KK = @{$rLL_new} - 1 ; $KK >= 0 ; $KK-- ) {
6087 $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
6088 if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
6090 $self->[_K_first_seq_item_] = $KNEXT;
6093 # Find and remember lists by sequence number
6094 foreach my $seqno ( keys %{$K_opening_container} ) {
6095 my $K_opening = $K_opening_container->{$seqno};
6096 next unless defined($K_opening);
6098 # code errors may leave undefined closing tokens
6099 my $K_closing = $K_closing_container->{$seqno};
6100 next unless defined($K_closing);
6102 my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
6103 my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
6104 my $line_diff = $lx_close - $lx_open;
6105 $ris_broken_container->{$seqno} = $line_diff;
6107 # See if this is a list
6109 my $rtype_count = $rtype_count_by_seqno->{$seqno};
6111 my $comma_count = $rtype_count->{','};
6112 my $fat_comma_count = $rtype_count->{'=>'};
6113 my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
6115 # We will define a list to be a container with one or more commas
6116 # and no semicolons. Note that we have included the semicolons
6117 # in a 'for' container in the simicolon count to keep c-style for
6118 # statements from being formatted as lists.
6119 if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
6122 # We need to do one more check for a perenthesized list:
6123 # At an opening paren following certain tokens, such as 'if',
6124 # we do not want to format the contents as a list.
6125 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
6126 my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
6127 if ( defined($Kp) ) {
6128 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
6129 if ( $type_p eq 'k' ) {
6130 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
6131 $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
6134 $is_list = 0 if ( $is_nonlist_type{$type_p} );
6141 # Look for a block brace marked as uncertain. If the tokenizer thinks
6142 # its guess is uncertain for the type of a brace following an unknown
6143 # bareword then it adds a trailing space as a signal. We can fix the
6144 # type here now that we have had a better look at the contents of the
6145 # container. This fixes case b1085. To find the corresponding code in
6146 # Tokenizer.pm search for 'b1085' with an editor.
6147 my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
6148 if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
6150 # Always remove the trailing space
6151 $block_type =~ s/\s+$//;
6153 # Try to filter out parenless sub calls
6154 my ( $Knn1, $Knn2 );
6155 my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
6156 $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
6157 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
6158 $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
6159 $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
6161 # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
6162 if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
6166 # Convert to a hash brace if it looks like it holds a list
6171 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
6172 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
6175 $rLL_new->[$K_opening]->[_BLOCK_TYPE_] = $block_type;
6176 $rLL_new->[$K_closing]->[_BLOCK_TYPE_] = $block_type;
6179 # Handle a list container
6180 if ( $is_list && !$block_type ) {
6181 $ris_list_by_seqno->{$seqno} = $seqno;
6182 my $seqno_parent = $rparent_of_seqno->{$seqno};
6184 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
6187 # for $rhas_list we need to save the minimum depth
6188 if ( !$rhas_list->{$seqno_parent}
6189 || $rhas_list->{$seqno_parent} > $depth )
6191 $rhas_list->{$seqno_parent} = $depth;
6195 $rhas_broken_list->{$seqno_parent} = 1;
6197 # Patch1: We need to mark broken lists with non-terminal
6198 # line-ending commas for the -bbx=2 parameter. This insures
6199 # that the list will stay broken. Otherwise the flag
6200 # -bbx=2 can be unstable. This fixes case b789 and b938.
6202 # Patch2: Updated to also require either one fat comma or
6203 # one more line-ending comma. Fixes cases b1069 b1070
6206 $rlec_count_by_seqno->{$seqno}
6207 && ( $rlec_count_by_seqno->{$seqno} > 1
6208 || $rtype_count_by_seqno->{$seqno}->{'=>'} )
6211 $rhas_broken_list_with_lec->{$seqno_parent} = 1;
6214 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
6218 # Handle code blocks ...
6219 # The -lp option needs to know if a container holds a code block
6220 elsif ( $block_type && $rOpts_line_up_parentheses ) {
6221 my $seqno_parent = $rparent_of_seqno->{$seqno};
6222 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
6223 $rhas_code_block->{$seqno_parent} = 1;
6224 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
6225 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
6230 # Find containers with ternaries, needed for -lp formatting.
6231 foreach my $seqno ( keys %{$K_opening_ternary} ) {
6232 my $seqno_parent = $rparent_of_seqno->{$seqno};
6233 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
6234 $rhas_ternary->{$seqno_parent} = 1;
6235 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
6239 # Turn off -lp for containers with here-docs with text within a container,
6240 # since they have their own fixed indentation. Fixes case b1081.
6241 if ($rOpts_line_up_parentheses) {
6242 foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
6243 my $Kh = $K_first_here_doc_by_seqno{$seqno};
6244 my $Kc = $K_closing_container->{$seqno};
6245 my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
6246 my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
6247 next if ( $line_Kh == $line_Kc );
6248 $ris_excluded_lp_container->{$seqno} = 1;
6252 # Set a flag to turn off -cab=3 in complex structures. Otherwise,
6253 # instability can occur. When it is overridden the behavior of the closest
6254 # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
6255 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
6256 foreach my $seqno ( keys %{$K_opening_container} ) {
6258 my $rtype_count = $rtype_count_by_seqno->{$seqno};
6259 next unless ( $rtype_count && $rtype_count->{'=>'} );
6261 # override -cab=3 if this contains a sub-list
6262 if ( $rhas_list->{$seqno} ) {
6263 $roverride_cab3->{$seqno} = 1;
6266 # or if this is a sub-list of its parent container
6268 my $seqno_parent = $rparent_of_seqno->{$seqno};
6269 if ( defined($seqno_parent)
6270 && $ris_list_by_seqno->{$seqno_parent} )
6272 $roverride_cab3->{$seqno} = 1;
6278 # Reset memory to be the new array
6279 $self->[_rLL_] = $rLL_new;
6281 if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
6282 $self->[_Klimit_] = $Klimit;
6284 # DEBUG OPTION: make sure the new array looks okay.
6285 # This is no longer needed but should be retained for future development.
6286 DEVEL_MODE && $self->check_token_array();
6288 # reset the token limits of each line
6289 $self->resync_lines_and_tokens();
6294 sub copy_token_as_type {
6296 # This provides a quick way to create a new token by
6297 # slightly modifying an existing token.
6298 my ( $rold_token, $type, $token ) = @_;
6299 if ( $type eq 'b' ) {
6300 $token = " " unless defined($token);
6302 elsif ( $type eq 'q' ) {
6303 $token = '' unless defined($token);
6305 elsif ( $type eq '->' ) {
6306 $token = '->' unless defined($token);
6308 elsif ( $type eq ';' ) {
6309 $token = ';' unless defined($token);
6313 # This sub assumes it will be called with just two types, 'b' or 'q'
6315 "Programming error: copy_token_as has type $type but should be 'b' or 'q'"
6319 my @rnew_token = @{$rold_token};
6320 $rnew_token[_TYPE_] = $type;
6321 $rnew_token[_TOKEN_] = $token;
6322 $rnew_token[_BLOCK_TYPE_] = '';
6323 $rnew_token[_TYPE_SEQUENCE_] = '';
6324 return \@rnew_token;
6327 sub Debug_dump_tokens {
6329 # a debug routine, not normally used
6330 my ( $self, $msg ) = @_;
6331 my $rLL = $self->[_rLL_];
6332 my $nvars = @{$rLL};
6333 print STDERR "$msg\n";
6334 print STDERR "ntokens=$nvars\n";
6335 print STDERR "K\t_TOKEN_\t_TYPE_\n";
6338 foreach my $item ( @{$rLL} ) {
6339 print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
6346 my ( $self, $KK, $rLL ) = @_;
6348 # return the index K of the next nonblank, non-comment token
6349 return unless ( defined($KK) && $KK >= 0 );
6351 # use the standard array unless given otherwise
6352 $rLL = $self->[_rLL_] unless ( defined($rLL) );
6355 while ( $Knnb < $Num ) {
6356 if ( !defined( $rLL->[$Knnb] ) ) {
6358 # We seem to have encountered a gap in our array.
6359 # This shouldn't happen because sub write_line() pushed
6360 # items into the $rLL array.
6361 Fault("Undefined entry for k=$Knnb");
6363 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
6364 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
6373 sub K_next_nonblank {
6374 my ( $self, $KK, $rLL ) = @_;
6376 # return the index K of the next nonblank token, or
6377 # return undef if none
6378 return unless ( defined($KK) && $KK >= 0 );
6380 # The third arg allows this routine to be used on any array. This is
6381 # useful in sub respace_tokens when we are copying tokens from an old $rLL
6382 # to a new $rLL array. But usually the third arg will not be given and we
6383 # will just use the $rLL array in $self.
6384 $rLL = $self->[_rLL_] unless ( defined($rLL) );
6387 return unless ( $Knnb < $Num );
6388 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
6389 return unless ( ++$Knnb < $Num );
6390 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
6392 # Backup loop. Very unlikely to get here; it means we have neighboring
6393 # blanks in the token stream.
6395 while ( $Knnb < $Num ) {
6397 # Safety check, this fault shouldn't happen: The $rLL array is the
6398 # main array of tokens, so all entries should be used. It is
6399 # initialized in sub write_line, and then re-initialized by sub
6400 # $store_token() within sub respace_tokens. Tokens are pushed on
6401 # so there shouldn't be any gaps.
6402 if ( !defined( $rLL->[$Knnb] ) ) {
6403 Fault("Undefined entry for k=$Knnb");
6405 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
6411 sub K_previous_code {
6413 # return the index K of the previous nonblank, non-comment token
6414 # Call with $KK=undef to start search at the top of the array
6415 my ( $self, $KK, $rLL ) = @_;
6417 # use the standard array unless given otherwise
6418 $rLL = $self->[_rLL_] unless ( defined($rLL) );
6420 if ( !defined($KK) ) { $KK = $Num }
6421 elsif ( $KK > $Num ) {
6423 # This fault can be caused by a programming error in which a bad $KK is
6424 # given. The caller should make the first call with KK_new=undef to
6427 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
6431 while ( $Kpnb >= 0 ) {
6432 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
6433 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
6442 sub K_previous_nonblank {
6444 # return index of previous nonblank token before item K;
6445 # Call with $KK=undef to start search at the top of the array
6446 my ( $self, $KK, $rLL ) = @_;
6448 # use the standard array unless given otherwise
6449 $rLL = $self->[_rLL_] unless ( defined($rLL) );
6451 if ( !defined($KK) ) { $KK = $Num }
6452 elsif ( $KK > $Num ) {
6454 # This fault can be caused by a programming error in which a bad $KK is
6455 # given. The caller should make the first call with KK_new=undef to
6458 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
6462 return unless ( $Kpnb >= 0 );
6463 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
6464 return unless ( --$Kpnb >= 0 );
6465 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
6467 # Backup loop. We should not get here unless some routine
6468 # slipped repeated blanks into the token stream.
6469 return unless ( --$Kpnb >= 0 );
6470 while ( $Kpnb >= 0 ) {
6471 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
6477 sub get_old_line_index {
6479 # return index of the original line that token K was on
6480 my ( $self, $K ) = @_;
6481 my $rLL = $self->[_rLL_];
6482 return 0 unless defined($K);
6483 return $rLL->[$K]->[_LINE_INDEX_];
6486 sub get_old_line_count {
6488 # return number of input lines separating two tokens
6489 my ( $self, $Kbeg, $Kend ) = @_;
6490 my $rLL = $self->[_rLL_];
6491 return 0 unless defined($Kbeg);
6492 return 0 unless defined($Kend);
6493 return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
6496 sub parent_seqno_by_K {
6498 # Return the sequence number of the parent container of token K, if any.
6500 my ( $self, $KK ) = @_;
6501 return unless defined($KK);
6503 # Note: This routine is relatively slow. I tried replacing it with a hash
6504 # which is easily created in sub respace_tokens. But the total time with a
6505 # hash was greater because this routine is called once per line whereas a
6506 # hash must be created token-by-token.
6508 my $rLL = $self->[_rLL_];
6511 # For example, consider the following with seqno=5 of the '[' and ']'
6512 # being called with index K of the first token of each line:
6517 # sub { 99 }, 'do {&{%s} for 1,2}', # 5
6518 # '(&{})(&{})', undef, # 5
6519 # [ 2, 2, 0 ], 0 # 5
6522 # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
6523 # unbalanced files, last sequence number will either be undefined or it may
6524 # be at a deeper level. In either case we will just return SEQ_ROOT to
6525 # have a defined value and allow formatting to proceed.
6526 my $parent_seqno = SEQ_ROOT;
6527 while ( defined($KNEXT) ) {
6529 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
6530 my $rtoken_vars = $rLL->[$Kt];
6531 my $type = $rtoken_vars->[_TYPE_];
6532 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6534 # if next container token is closing, it is the parent seqno
6535 if ( $is_closing_type{$type} ) {
6537 $parent_seqno = $type_sequence;
6540 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
6545 # if next container token is opening, we want its parent container
6546 elsif ( $is_opening_type{$type} ) {
6547 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
6551 # not a container - must be ternary - keep going
6554 $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
6555 return $parent_seqno;
6558 sub is_in_block_by_i {
6559 my ( $self, $i ) = @_;
6562 # token at i is contained in a BLOCK
6563 # or is at root level
6564 # or there is some kind of error (i.e. unbalanced file)
6565 # returns false otherwise
6566 my $seqno = $parent_seqno_to_go[$i];
6567 return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
6568 my $Kopening = $self->[_K_opening_container_]->{$seqno};
6569 return 1 unless defined($Kopening);
6570 my $rLL = $self->[_rLL_];
6571 return 1 if $rLL->[$Kopening]->[_BLOCK_TYPE_];
6575 sub is_in_list_by_i {
6576 my ( $self, $i ) = @_;
6578 # returns true if token at i is contained in a LIST
6579 # returns false otherwise
6580 my $seqno = $parent_seqno_to_go[$i];
6581 return unless ( $seqno && $seqno ne SEQ_ROOT );
6582 if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
6590 # Return true if token K is in a list
6591 my ( $self, $KK ) = @_;
6593 my $parent_seqno = $self->parent_seqno_by_K($KK);
6594 return unless defined($parent_seqno);
6595 return $self->[_ris_list_by_seqno_]->{$parent_seqno};
6598 sub is_list_by_seqno {
6600 # Return true if the immediate contents of a container appears to be a
6602 my ( $self, $seqno ) = @_;
6603 return unless defined($seqno);
6604 return $self->[_ris_list_by_seqno_]->{$seqno};
6607 sub resync_lines_and_tokens {
6610 my $rLL = $self->[_rLL_];
6611 my $Klimit = $self->[_Klimit_];
6612 my $rlines = $self->[_rlines_];
6613 my @Krange_code_without_comments;
6614 my @Klast_valign_code;
6616 # Re-construct the arrays of tokens associated with the original input lines
6617 # since they have probably changed due to inserting and deleting blanks
6618 # and a few other tokens.
6622 # This is the next token and its line index:
6625 if ( defined($rLL) && @{$rLL} ) {
6626 $Kmax = @{$rLL} - 1;
6627 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
6630 # Remember the most recently output token index
6634 foreach my $line_of_tokens ( @{$rlines} ) {
6636 my $line_type = $line_of_tokens->{_line_type};
6637 my $CODE_type = $line_of_tokens->{_code_type};
6638 if ( $line_type eq 'CODE' ) {
6642 if ( $Knext <= $Kmax ) {
6643 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
6644 while ( $inext <= $iline ) {
6645 push @K_array, $Knext;
6647 if ( $Knext > $Kmax ) {
6651 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
6655 # Delete any terminal blank token
6657 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
6662 # Define the range of K indexes for the line:
6663 # $Kfirst = index of first token on line
6664 # $Klast_out = index of last token on line
6665 my ( $Kfirst, $Klast );
6667 $Kfirst = $K_array[0];
6668 $Klast = $K_array[-1];
6669 $Klast_out = $Klast;
6671 if ( defined($Kfirst) ) {
6673 # Save ranges of non-comment code. This will be used by
6674 # sub keep_old_line_breaks.
6675 if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
6676 push @Krange_code_without_comments, [ $Kfirst, $Klast ];
6679 # Only save ending K indexes of code types which are blank
6680 # or 'VER'. These will be used for a convergence check.
6681 # See related code in sub 'send_lines_to_vertical_aligner'.
6683 || $CODE_type eq 'VER' )
6685 push @Klast_valign_code, $Klast;
6690 # It is only safe to trim the actual line text if the input
6691 # line had a terminal blank token. Otherwise, we may be
6693 if ( $line_of_tokens->{_ended_in_blank_token} ) {
6694 $line_of_tokens->{_line_text} =~ s/\s+$//;
6696 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
6698 # Deleting semicolons can create new empty code lines
6699 # which should be marked as blank
6700 if ( !defined($Kfirst) ) {
6701 my $code_type = $line_of_tokens->{_code_type};
6702 if ( !$code_type ) {
6703 $line_of_tokens->{_code_type} = 'BL';
6709 # There shouldn't be any nodes beyond the last one. This routine is
6710 # relinking lines and tokens after the tokens have been respaced. A fault
6711 # here indicates some kind of bug has been introduced into the above loops.
6712 if ( defined($inext) ) {
6714 Fault("unexpected tokens at end of file when reconstructing lines");
6716 $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
6718 # Setup the convergence test in the FileWriter based on line-ending indexes
6719 my $file_writer_object = $self->[_file_writer_object_];
6720 $file_writer_object->setup_convergence_test( \@Klast_valign_code );
6722 # Mark essential old breakpoints if combination -iob -lp is used. These
6723 # two options do not work well together, but we can avoid turning -iob off
6724 # by ignoring -iob at certain essential line breaks.
6725 # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
6726 if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
6727 my %is_assignment_or_fat_comma = %is_assignment;
6728 $is_assignment_or_fat_comma{'=>'} = 1;
6729 my $ris_essential_old_breakpoint =
6730 $self->[_ris_essential_old_breakpoint_];
6732 my ( $Kfirst, $Klast );
6733 foreach my $line_of_tokens ( @{$rlines} ) {
6735 my $line_type = $line_of_tokens->{_line_type};
6736 if ( $line_type ne 'CODE' ) {
6737 ( $Kfirst, $Klast ) = ( undef, undef );
6740 my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
6741 ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
6743 next unless defined($Klast_prev);
6744 next unless defined($Kfirst);
6745 my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
6746 my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
6748 unless ( $is_assignment_or_fat_comma{$type_last}
6749 || $is_assignment_or_fat_comma{$type_first} );
6750 $ris_essential_old_breakpoint->{$Klast_prev} = 1;
6757 sub keep_old_line_breaks {
6759 # Called once per file to find and mark any old line breaks which
6760 # should be kept. We will be translating the input hashes into
6763 # A flag is set as follows:
6764 # = 1 make a hard break (flush the current batch)
6765 # best for something like leading commas (-kbb=',')
6766 # = 2 make a soft break (keep building current batch)
6767 # best for something like leading ->
6771 my $rLL = $self->[_rLL_];
6772 my $rKrange_code_without_comments =
6773 $self->[_rKrange_code_without_comments_];
6774 my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
6775 my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
6776 my $rwant_container_open = $self->[_rwant_container_open_];
6777 my $K_opening_container = $self->[_K_opening_container_];
6778 my $ris_broken_container = $self->[_ris_broken_container_];
6779 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
6781 # This code moved here from sub scan_list to fix b1120
6782 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
6783 foreach my $item ( @{$rKrange_code_without_comments} ) {
6784 my ( $Kfirst, $Klast ) = @{$item};
6785 my $type = $rLL->[$Kfirst]->[_TYPE_];
6786 my $token = $rLL->[$Kfirst]->[_TOKEN_];
6788 # leading '->' use a value of 2 which causes a soft
6789 # break rather than a hard break
6790 if ( $type eq '->' ) {
6791 $rbreak_before_Kfirst->{$Kfirst} = 2;
6794 # leading ')->' use a special flag to insure that both
6795 # opening and closing parens get opened
6796 # Fix for b1120: only for parens, not braces
6797 elsif ( $token eq ')' ) {
6798 my $Kn = $self->K_next_nonblank($Kfirst);
6800 unless ( defined($Kn)
6802 && $rLL->[$Kn]->[_TYPE_] eq '->' );
6803 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
6804 next unless ($seqno);
6806 # Patch to avoid blinkers: but do not do this unless the
6807 # container holds a list, or the opening and closing parens are
6808 # separated by more than one line.
6812 !$ris_list_by_seqno->{$seqno}
6813 && ( !$ris_broken_container->{$seqno}
6814 || $ris_broken_container->{$seqno} <= 1 )
6816 $rwant_container_open->{$seqno} = 1;
6821 return unless ( %keep_break_before_type || %keep_break_after_type );
6823 foreach my $item ( @{$rKrange_code_without_comments} ) {
6824 my ( $Kfirst, $Klast ) = @{$item};
6826 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
6827 if ( $keep_break_before_type{$type_first} ) {
6828 $rbreak_before_Kfirst->{$Kfirst} = 1;
6831 my $type_last = $rLL->[$Klast]->[_TYPE_];
6832 if ( $keep_break_after_type{$type_last} ) {
6833 $rbreak_after_Klast->{$Klast} = 1;
6839 sub weld_containers {
6841 # Called once per file to do any welding operations requested by --weld*
6845 # This count is used to eliminate needless calls for weld checks elsewere
6846 $total_weld_count = 0;
6848 return if ( $rOpts->{'indent-only'} );
6849 return unless ($rOpts_add_newlines);
6851 # Important: sub 'weld_cuddled_blocks' must be called before
6852 # sub 'weld_nested_containers'. This is because the cuddled option needs to
6853 # use the original _LEVEL_ values of containers, but the weld nested
6854 # containers changes _LEVEL_ of welded containers.
6856 # Here is a good test case to be sure that both cuddling and welding
6857 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
6861 # if ($BOLD_MATH) { (
6862 # $labels, $comment,
6863 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
6865 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
6869 $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
6871 if ( $rOpts->{'weld-nested-containers'} ) {
6873 $self->weld_nested_containers();
6875 $self->weld_nested_quotes();
6878 ##############################################################
6879 # All welding is done. Finish setting up weld data structures.
6880 ##############################################################
6882 my $rLL = $self->[_rLL_];
6883 my $rK_weld_left = $self->[_rK_weld_left_];
6884 my $rK_weld_right = $self->[_rK_weld_right_];
6885 my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
6888 my @keys = keys %{$rK_weld_right};
6889 $total_weld_count = @keys;
6891 # Note that this loop is processed in unsorted order for efficiency
6892 foreach my $Kstart (@keys) {
6893 my $Kend = $rK_weld_right->{$Kstart};
6895 # An error here would be due to an incorrect initialization introduced
6896 # in one of the above weld routines, like sub weld_nested.
6897 if ( $Kend <= $Kstart ) {
6898 Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n");
6901 $rweld_len_right_at_K->{$Kstart} =
6902 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
6903 $rLL->[$Kstart]->[_CUMULATIVE_LENGTH_];
6905 $rK_weld_left->{$Kend} = $Kstart; # fix in case of missing left link
6907 # Remember the leftmost index of welds which continue to the right
6908 if ( defined( $rK_weld_right->{$Kend} )
6909 && !defined( $rK_weld_left->{$Kstart} ) )
6911 push @K_multi_weld, $Kstart;
6915 # Update the end index and lengths of any long welds to extend to the far
6916 # end. This has to be processed in sorted order.
6917 # Left links added for b1173.
6919 foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
6921 # skip any interior K which was originally missing a left link
6922 next if ( $Kstart <= $Kend );
6925 push @Klist, $Kstart;
6926 $Kend = $rK_weld_right->{$Kstart};
6927 $rK_weld_left->{$Kend} = $Kstart;
6928 my $Knext = $rK_weld_right->{$Kend};
6929 while ( defined($Knext) ) {
6932 $rK_weld_left->{$Kend} = $Kstart;
6933 $Knext = $rK_weld_right->{$Kend};
6935 pop @Klist; # values for last entry are already correct
6936 foreach my $KK (@Klist) {
6938 # Ending indexes must only be shifted to the right for long welds.
6939 # An error here would be due to a programming error introduced in
6940 # the code immediately above.
6941 my $Kend_old = $rK_weld_right->{$KK};
6942 if ( !defined($Kend_old) || $Kend < $Kend_old ) {
6944 "Bad weld link at K=$KK, old end is K=$Kend_old, new end is $Kend\n"
6948 $rK_weld_right->{$KK} = $Kend;
6949 $rweld_len_right_at_K->{$KK} =
6950 $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
6951 $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
6958 sub cumulative_length_before_K {
6959 my ( $self, $KK ) = @_;
6960 my $rLL = $self->[_rLL_];
6961 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
6964 sub weld_cuddled_blocks {
6967 # Called once per file to handle cuddled formatting
6969 my $rK_weld_left = $self->[_rK_weld_left_];
6970 my $rK_weld_right = $self->[_rK_weld_right_];
6972 # This routine implements the -cb flag by finding the appropriate
6973 # closing and opening block braces and welding them together.
6974 return unless ( %{$rcuddled_block_types} );
6976 my $rLL = $self->[_rLL_];
6977 return unless ( defined($rLL) && @{$rLL} );
6978 my $rbreak_container = $self->[_rbreak_container_];
6980 my $K_opening_container = $self->[_K_opening_container_];
6981 my $K_closing_container = $self->[_K_closing_container_];
6983 my $length_to_opening_seqno = sub {
6985 my $KK = $K_opening_container->{$seqno};
6986 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
6989 my $length_to_closing_seqno = sub {
6991 my $KK = $K_closing_container->{$seqno};
6992 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
6996 my $is_broken_block = sub {
6998 # a block is broken if the input line numbers of the braces differ
6999 # we can only cuddle between broken blocks
7001 my $K_opening = $K_opening_container->{$seqno};
7002 return unless ( defined($K_opening) );
7003 my $K_closing = $K_closing_container->{$seqno};
7004 return unless ( defined($K_closing) );
7005 return $rbreak_container->{$seqno}
7006 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
7007 $rLL->[$K_opening]->[_LINE_INDEX_];
7010 # A stack to remember open chains at all levels: This is a hash rather than
7011 # an array for safety because negative levels can occur in files with
7012 # errors. This allows us to keep processing with negative levels.
7013 # $in_chain{$level} = [$chain_type, $type_sequence];
7015 my $CBO = $rOpts->{'cuddled-break-option'};
7017 # loop over structure items to find cuddled pairs
7019 my $KNEXT = $self->[_K_first_seq_item_];
7020 while ( defined($KNEXT) ) {
7022 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
7023 my $rtoken_vars = $rLL->[$KK];
7024 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
7025 if ( !$type_sequence ) {
7026 next if ( $KK == 0 ); # first token in file may not be container
7028 # A fault here implies that an error was made in the little loop at
7029 # the bottom of sub 'respace_tokens' which set the values of
7030 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
7031 # loop control lines above.
7032 Fault("sequence = $type_sequence not defined at K=$KK");
7035 # NOTE: we must use the original levels here. They can get changed
7036 # by sub 'weld_nested_containers', so this routine must be called
7037 # before sub 'weld_nested_containers'.
7038 my $last_level = $level;
7039 $level = $rtoken_vars->[_LEVEL_];
7041 if ( $level < $last_level ) { $in_chain{$last_level} = undef }
7042 elsif ( $level > $last_level ) { $in_chain{$level} = undef }
7044 # We are only looking at code blocks
7045 my $token = $rtoken_vars->[_TOKEN_];
7046 my $type = $rtoken_vars->[_TYPE_];
7047 next unless ( $type eq $token );
7049 if ( $token eq '{' ) {
7051 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
7052 if ( !$block_type ) {
7054 # patch for unrecognized block types which may not be labeled
7055 my $Kp = $self->K_previous_nonblank($KK);
7056 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
7057 $Kp = $self->K_previous_nonblank($Kp);
7060 $block_type = $rLL->[$Kp]->[_TOKEN_];
7063 if ( $in_chain{$level} ) {
7065 # we are in a chain and are at an opening block brace.
7066 # See if we are welding this opening brace with the previous
7067 # block brace. Get their identification numbers:
7068 my $closing_seqno = $in_chain{$level}->[1];
7069 my $opening_seqno = $type_sequence;
7071 # The preceding block must be on multiple lines so that its
7072 # closing brace will start a new line.
7073 if ( !$is_broken_block->($closing_seqno) ) {
7074 next unless ( $CBO == 2 );
7075 $rbreak_container->{$closing_seqno} = 1;
7078 # we will let the trailing block be either broken or intact
7079 ## && $is_broken_block->($opening_seqno);
7081 # We can weld the closing brace to its following word ..
7082 my $Ko = $K_closing_container->{$closing_seqno};
7084 if ( defined($Ko) ) {
7085 $Kon = $self->K_next_nonblank($Ko);
7088 # ..unless it is a comment
7089 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
7091 # OK to weld these two tokens...
7092 $rK_weld_right->{$Ko} = $Kon;
7093 $rK_weld_left->{$Kon} = $Ko;
7095 # Set flag that we want to break the next container
7096 # so that the cuddled line is balanced.
7097 $rbreak_container->{$opening_seqno} = 1
7104 # We are not in a chain. Start a new chain if we see the
7105 # starting block type.
7106 if ( $rcuddled_block_types->{$block_type} ) {
7107 $in_chain{$level} = [ $block_type, $type_sequence ];
7111 $in_chain{$level} = [ $block_type, $type_sequence ];
7115 elsif ( $token eq '}' ) {
7116 if ( $in_chain{$level} ) {
7118 # We are in a chain at a closing brace. See if this chain
7120 my $Knn = $self->K_next_code($KK);
7123 my $chain_type = $in_chain{$level}->[0];
7124 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
7126 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
7130 # Note that we do not weld yet because we must wait until
7131 # we we are sure that an opening brace for this follows.
7132 $in_chain{$level}->[1] = $type_sequence;
7134 else { $in_chain{$level} = undef }
7141 sub find_nested_pairs {
7144 # This routine is called once per file to do preliminary work needed for
7145 # the --weld-nested option. This information is also needed for adding
7148 my $rLL = $self->[_rLL_];
7149 return unless ( defined($rLL) && @{$rLL} );
7152 my $K_opening_container = $self->[_K_opening_container_];
7153 my $K_closing_container = $self->[_K_closing_container_];
7155 # We define an array of pairs of nested containers
7158 # Names of calling routines can either be marked as 'i' or 'w',
7159 # and they may invoke a sub call with an '->'. We will consider
7160 # any consecutive string of such types as a single unit when making
7161 # weld decisions. We also allow a leading !
7162 my $is_name_type = {
7170 # Loop over all closing container tokens
7171 foreach my $inner_seqno ( keys %{$K_closing_container} ) {
7172 my $K_inner_closing = $K_closing_container->{$inner_seqno};
7174 # See if it is immediately followed by another, outer closing token
7175 my $K_outer_closing = $K_inner_closing + 1;
7176 $K_outer_closing += 1
7177 if ( $K_outer_closing < $Num
7178 && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
7180 next unless ( $K_outer_closing < $Num );
7181 my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
7182 next unless ($outer_seqno);
7183 my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
7184 next unless ( $is_closing_token{$token_outer_closing} );
7186 # Now we have to check the opening tokens.
7187 my $K_outer_opening = $K_opening_container->{$outer_seqno};
7188 my $K_inner_opening = $K_opening_container->{$inner_seqno};
7189 next unless defined($K_outer_opening) && defined($K_inner_opening);
7191 # Verify that the inner opening token is the next container after the
7192 # outer opening token.
7193 my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
7194 next unless defined($K_io_check);
7195 if ( $K_io_check != $K_inner_opening ) {
7197 # The inner opening container does not immediately follow the outer
7198 # opening container, but we may still allow a weld if they are
7199 # separated by a sub signature. For example, we may have something
7200 # like this, where $K_io_check may be at the first 'x' instead of
7201 # 'io'. So we need to hop over the signature and see if we arrive
7206 # $obj->then( sub ( $code ) {
7208 # return $c->render(text => '', status => $code);
7213 next if $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] ne 'sub';
7214 next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
7215 my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
7216 next unless defined($seqno_signature);
7217 my $K_signature_closing = $K_closing_container->{$seqno_signature};
7218 next unless defined($K_signature_closing);
7219 my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
7221 unless ( defined($K_test) && $K_test == $K_inner_opening );
7223 # OK, we have arrived at 'io' in the above diagram. We should put
7224 # a limit on the length or complexity of the signature here. There
7225 # is no perfect way to do this, one way is to put a limit on token
7226 # count. For consistency with older versions, we should allow a
7227 # signature with a single variable to weld, but not with
7228 # multiple variables. A single variable as in 'sub ($code) {' can
7229 # have a $Kdiff of 2 to 4, depending on spacing.
7231 # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
7232 # 7, depending on spacing. So to keep formatting consistent with
7233 # previous versions, we will also avoid welding if there is a comma
7236 my $Kdiff = $K_signature_closing - $K_io_check;
7237 next if ( $Kdiff > 4 );
7240 foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
7241 if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
7243 next if ($saw_comma);
7246 # Yes .. this is a possible nesting pair.
7247 # They can be separated by a small amount.
7248 my $K_diff = $K_inner_opening - $K_outer_opening;
7250 # Count nonblank characters separating them.
7251 if ( $K_diff < 0 ) { next } # Shouldn't happen
7252 my $Kn = $K_outer_opening;
7253 my $nonblank_count = 0;
7257 # Here is an example of a long identifier chain which counts as a
7258 # single nonblank here (this spans about 10 K indexes):
7259 # if ( !Boucherot::SetOfConnections->new->handler->execute(
7262 my $Kn_first = $K_outer_opening;
7263 my $Kn_last_nonblank;
7265 my $Kn = $K_outer_opening + 1 ;
7266 $Kn <= $K_inner_opening ;
7270 next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
7271 if ( !$nonblank_count ) { $Kn_first = $Kn }
7272 if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
7273 $Kn_last_nonblank = $Kn;
7275 # skip chain of identifier tokens
7276 my $last_type = $type;
7277 my $last_is_name = $is_name;
7278 $type = $rLL->[$Kn]->[_TYPE_];
7279 $is_name = $is_name_type->{$type};
7280 next if ( $is_name && $last_is_name );
7283 last if ( $nonblank_count > 2 );
7286 # Patch for b1104: do not weld to a paren preceded by sort/map/grep
7287 # because the special line break rules may cause a blinking state
7288 if ( defined($Kn_last_nonblank)
7289 && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
7290 && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
7292 my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
7294 # Turn off welding at sort/map/grep (
7295 if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
7300 # adjacent opening containers, like: do {{
7301 $nonblank_count == 1
7303 # short item following opening paren, like: fun( yyy (
7304 || ( $nonblank_count == 2
7305 && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
7307 # anonymous sub + prototype or sig: )->then( sub ($code) {
7308 # ... but it seems best not to stack two structural blocks, like
7310 # sub make_anon_with_my_sub { sub {
7311 # because it probably hides the structure a little too much.
7312 || ( $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] eq 'sub'
7313 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
7314 && !$rLL->[$K_outer_opening]->[_BLOCK_TYPE_] )
7318 [ $inner_seqno, $outer_seqno, $K_inner_closing ];
7323 # The weld routine expects the pairs in order in the form
7324 # [$seqno_inner, $seqno_outer]
7325 # And they must be in the same order as the inner closing tokens
7326 # (otherwise, welds of three or more adjacent tokens will not work). The K
7327 # value of this inner closing token has temporarily been stored for
7331 # Drop the K index after sorting (it would cause trouble downstream)
7332 map { [ $_->[0], $_->[1] ] }
7334 # Sort on the K values
7335 sort { $a->[2] <=> $b->[2] } @nested_pairs;
7337 return \@nested_pairs;
7340 sub is_excluded_weld {
7342 # decide if this weld is excluded by user request
7343 my ( $self, $KK, $is_leading ) = @_;
7344 my $rLL = $self->[_rLL_];
7345 my $rtoken_vars = $rLL->[$KK];
7346 my $token = $rtoken_vars->[_TOKEN_];
7347 my $rflags = $weld_nested_exclusion_rules{$token};
7348 return 0 unless ( defined($rflags) );
7349 my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
7350 return 0 unless ( defined($flag) );
7351 return 1 if $flag eq '*';
7353 my ( $is_f, $is_k, $is_w );
7354 my $Kp = $self->K_previous_nonblank($KK);
7355 if ( defined($Kp) ) {
7356 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
7357 my $type_p = $rLL->[$Kp]->[_TYPE_];
7360 $is_k = $type_p eq 'k';
7363 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
7365 # either keyword or function call?
7366 $is_w = $is_k || $is_f;
7370 if ( $flag eq 'k' ) { $match = $is_k }
7371 elsif ( $flag eq 'K' ) { $match = !$is_k }
7372 elsif ( $flag eq 'f' ) { $match = $is_f }
7373 elsif ( $flag eq 'F' ) { $match = !$is_f }
7374 elsif ( $flag eq 'w' ) { $match = $is_w }
7375 elsif ( $flag eq 'W' ) { $match = !$is_w }
7379 # types needed for welding RULE 6
7380 my %type_ok_after_bareword;
7384 my @q = qw# => -> { ( [ #;
7385 @type_ok_after_bareword{@q} = (1) x scalar(@q);
7388 use constant DEBUG_WELD => 0;
7390 sub setup_new_weld_measurements {
7392 # Define quantities to check for excess line lengths when welded.
7393 # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
7395 my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
7397 # Given indexes of outer and inner opening containers to be welded:
7398 # $Kouter_opening, $Kinner_opening
7400 # Returns these variables:
7401 # $new_weld_ok = true (new weld ok) or false (do not start new weld)
7402 # $starting_indent = starting indentation
7403 # $starting_lentot = starting cumulative length
7404 # $msg = diagnostic message for debugging
7406 my $rLL = $self->[_rLL_];
7407 my $rlines = $self->[_rlines_];
7411 my $starting_lentot;
7412 my $maximum_text_length;
7415 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
7416 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
7417 my ( $Kfirst, $Klast ) = @{$rK_range};
7419 # Define a reference index from which to start measuring
7421 my $Kprev = $self->K_previous_nonblank($Kfirst);
7422 if ( defined($Kprev) ) {
7424 # The -iob and -wn flags do not work well together. To avoid
7425 # blinking states we have to override -iob at certain key line
7427 $self->[_ris_essential_old_breakpoint_]->{$Kprev} = 1;
7429 # Back up and count length from a token like '=' or '=>' if -lp
7430 # is used (this fixes b520)
7431 # ...or if a break is wanted before there
7432 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
7433 if ( $rOpts_line_up_parentheses
7434 || $want_break_before{$type_prev} )
7436 if ( substr( $type_prev, 0, 1 ) eq '=' ) {
7439 # Fix for b1144 and b1112: backup to the first nonblank
7440 # character before the =>, or to the start of its line.
7441 if ( $type_prev eq '=>' ) {
7442 my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
7443 my $rK_range = $rlines->[$iline_prev]->{_rK_range};
7444 my ( $Kfirst, $Klast ) = @{$rK_range};
7445 for ( my $KK = $Kref - 1 ; $KK >= $Kfirst ; $KK-- ) {
7446 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
7455 # Define the starting measurements we will need
7457 $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
7458 $starting_level = $rLL->[$Kref]->[_LEVEL_];
7459 $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
7461 $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
7462 $starting_ci * $rOpts_continuation_indentation;
7464 # Now fix these if necessary to avoid known problems...
7466 # FIX1: Switch to using the outer opening token as the reference
7467 # point if a line break before it would make a longer line.
7468 # Fixes case b1055 and is also an alternate fix for b1065.
7469 my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
7470 if ( $Kref < $Kouter_opening ) {
7471 my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
7472 my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
7473 my $maximum_text_length_oo =
7474 $maximum_text_length_at_level[$starting_level_oo] -
7475 $starting_ci_oo * $rOpts_continuation_indentation;
7477 # The excess length to any cumulative length K = lenK is either
7478 # $excess = $lenk - ($lentot + $maximum_text_length), or
7479 # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
7480 # so the worst case (maximum excess) corresponds to the configuration
7481 # with minimum value of the sum: $lentot + $maximum_text_length
7482 if ( $lentot_oo + $maximum_text_length_oo <
7483 $starting_lentot + $maximum_text_length )
7485 $Kref = $Kouter_opening;
7486 $starting_level = $starting_level_oo;
7487 $starting_ci = $starting_ci_oo;
7488 $starting_lentot = $lentot_oo;
7489 $maximum_text_length = $maximum_text_length_oo;
7493 my $new_weld_ok = 1;
7495 # FIX2 for b1020: Avoid problem areas with the -wn -lp combination. The
7496 # combination -wn -lp -dws -naws does not work well and can cause blinkers.
7497 # It will probably only occur in stress testing. For this situation we
7498 # will only start a new weld if we start at a 'good' location.
7499 # - Added 'if' to fix case b1032.
7500 # - Require blank before certain previous characters to fix b1111.
7501 # - Add ';' to fix case b1139
7502 # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
7504 && $rOpts_line_up_parentheses
7505 && $rOpts_delete_old_whitespace
7506 && !$rOpts_add_whitespace
7507 && defined($Kprev) )
7509 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
7510 my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
7511 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
7513 if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
7515 $type_prev =~ /^[\,\.\;]/
7516 || $type_prev =~ /^[=\{\[\(\L]/ && $type_pp eq 'b'
7517 || $type_first =~ /^[=\,\.\;\{\[\(\L]/
7518 || $type_first eq '||'
7519 || ( $type_first eq 'k' && $token_first eq 'if'
7520 || $token_first eq 'or' )
7524 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev'\n";
7529 return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
7532 sub excess_line_length_for_Krange {
7533 my ( $self, $Kfirst, $Klast ) = @_;
7535 # returns $excess_length =
7536 # by how many characters a line composed of tokens $Kfirst .. $Klast will
7537 # exceed the allowed line length
7539 my $rLL = $self->[_rLL_];
7540 my $length_before_Kfirst =
7543 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
7545 # backup before a side comment if necessary
7547 if ( $rOpts_ignore_side_comment_lengths
7548 && $rLL->[$Klast]->[_TYPE_] eq '#' )
7550 my $Kprev = $self->K_previous_nonblank($Klast);
7551 if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
7554 # get the length of the text
7555 my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
7557 # get the size of the text window
7558 my $level = $rLL->[$Kfirst]->[_LEVEL_];
7559 my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
7560 my $max_text_length = $maximum_text_length_at_level[$level] -
7561 $ci_level * $rOpts_continuation_indentation;
7563 my $excess_length = $length - $max_text_length;
7567 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
7568 return ($excess_length);
7571 sub weld_nested_containers {
7574 # Called once per file for option '--weld-nested-containers'
7576 my $rK_weld_left = $self->[_rK_weld_left_];
7577 my $rK_weld_right = $self->[_rK_weld_right_];
7579 # This routine implements the -wn flag by "welding together"
7580 # the nested closing and opening tokens which were previously
7581 # identified by sub 'find_nested_pairs'. "welding" simply
7582 # involves setting certain hash values which will be checked
7583 # later during formatting.
7585 my $rLL = $self->[_rLL_];
7586 my $rlines = $self->[_rlines_];
7587 my $K_opening_container = $self->[_K_opening_container_];
7588 my $K_closing_container = $self->[_K_closing_container_];
7590 # Find nested pairs of container tokens for any welding.
7591 my $rnested_pairs = $self->find_nested_pairs();
7593 # Return unless there are nested pairs to weld
7594 return unless defined($rnested_pairs) && @{$rnested_pairs};
7596 my $rOpts_break_at_old_method_breakpoints =
7597 $rOpts->{'break-at-old-method-breakpoints'};
7599 # This array will hold the sequence numbers of the tokens to be welded.
7602 # Variables needed for estimating line lengths
7603 my $maximum_text_length; # maximum spaces available for text
7604 my $starting_lentot; # cumulative text to start of current line
7606 my $iline_outer_opening = -1;
7607 my $weld_count_this_start = 0;
7610 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
7612 my $length_to_opening_seqno = sub {
7614 my $KK = $K_opening_container->{$seqno};
7615 my $lentot = defined($KK)
7616 && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
7620 my $length_to_closing_seqno = sub {
7622 my $KK = $K_closing_container->{$seqno};
7623 my $lentot = defined($KK)
7624 && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
7629 # _oo=outer opening, i.e. first of { {
7630 # _io=inner opening, i.e. second of { {
7631 # _oc=outer closing, i.e. second of } {
7632 # _ic=inner closing, i.e. first of } }
7636 # Main loop over nested pairs...
7637 # We are working from outermost to innermost pairs so that
7638 # level changes will be complete when we arrive at the inner pairs.
7639 while ( my $item = pop( @{$rnested_pairs} ) ) {
7640 my ( $inner_seqno, $outer_seqno ) = @{$item};
7642 my $Kouter_opening = $K_opening_container->{$outer_seqno};
7643 my $Kinner_opening = $K_opening_container->{$inner_seqno};
7644 my $Kouter_closing = $K_closing_container->{$outer_seqno};
7645 my $Kinner_closing = $K_closing_container->{$inner_seqno};
7647 # RULE: do not weld if inner container has <= 3 tokens unless the next
7648 # token is a heredoc (so we know there will be multiple lines)
7649 if ( $Kinner_closing - $Kinner_opening <= 4 ) {
7650 my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
7651 next unless defined($Knext_nonblank);
7652 my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
7653 next unless ( $type eq 'h' );
7656 my $outer_opening = $rLL->[$Kouter_opening];
7657 my $inner_opening = $rLL->[$Kinner_opening];
7658 my $outer_closing = $rLL->[$Kouter_closing];
7659 my $inner_closing = $rLL->[$Kinner_closing];
7661 # RULE: do not weld to a hash brace. The reason is that it has a very
7662 # strong bond strength to the next token, so a line break after it
7663 # may not work. Previously we allowed welding to something like @{
7664 # but that caused blinking states (cases b751, b779).
7665 if ( $inner_opening->[_TYPE_] eq 'L' ) {
7669 # RULE: do not weld to a square bracket which does not contain commas
7670 if ( $inner_opening->[_TYPE_] eq '[' ) {
7671 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
7672 next unless ($rtype_count);
7673 my $comma_count = $rtype_count->{','};
7674 next unless ($comma_count);
7676 # Do not weld if there is text before a '[' such as here:
7677 # curr_opt ( @beg [2,5] )
7678 # It will not break into the desired sandwich structure.
7679 # This fixes case b109, 110.
7680 my $Kdiff = $Kinner_opening - $Kouter_opening;
7681 next if ( $Kdiff > 2 );
7684 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
7688 # Set flag saying if this pair starts a new weld
7689 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
7691 # Set flag saying if this pair is adjacent to the previous nesting pair
7692 # (even if previous pair was rejected as a weld)
7693 my $touch_previous_pair =
7694 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
7695 $previous_pair = $item;
7697 my $do_not_weld_rule = 0;
7699 my $is_one_line_weld;
7701 my $iline_oo = $outer_opening->[_LINE_INDEX_];
7702 my $iline_io = $inner_opening->[_LINE_INDEX_];
7703 my $iline_ic = $inner_closing->[_LINE_INDEX_];
7704 my $iline_oc = $outer_closing->[_LINE_INDEX_];
7705 my $token_oo = $outer_opening->[_TOKEN_];
7707 my $is_multiline_weld =
7708 $iline_oo == $iline_io
7709 && $iline_ic == $iline_oc
7710 && $iline_io != $iline_ic;
7713 my $token_io = $rLL->[$Kinner_opening]->[_TOKEN_];
7714 my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
7715 my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
7717 Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
7718 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
7719 tokens '$token_oo' .. '$token_io'
7723 # If this pair is not adjacent to the previous pair (skipped or not),
7724 # then measure lengths from the start of line of oo.
7726 !$touch_previous_pair
7728 # Also do this if restarting at a new line; fixes case b965, s001
7729 || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
7733 # Remember the line we are using as a reference
7734 $iline_outer_opening = $iline_oo;
7735 $weld_count_this_start = 0;
7737 ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
7738 = $self->setup_new_weld_measurements( $Kouter_opening,
7743 && ( $iline_oo != $iline_io
7744 || $iline_ic != $iline_oc )
7747 if (DEBUG_WELD) { print $msg}
7751 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
7752 my ( $Kfirst, $Klast ) = @{$rK_range};
7754 # An existing one-line weld is a line in which
7755 # (1) the containers are all on one line, and
7756 # (2) the line does not exceed the allowable length, and
7757 # This flag is used to avoid creating blinkers.
7758 # FIX1: Changed 'excess_length_to_K' to 'excess_length_of_line'
7759 # to get exact lengths and fix b604 b605.
7760 if ( $iline_oo == $iline_oc ) {
7762 # All the tokens are on one line, now check their length
7764 $self->excess_line_length_for_Krange( $Kfirst, $Klast );
7765 if ( $excess <= 0 ) {
7767 # All tokens are on one line and fit. This is a valid
7768 # existing one-line weld except for some edge cases
7771 # FIX2: Patch for b1114: add a tolerance of one level if
7772 # this line has an unbalanced start. This helps prevent
7773 # blinkers in unusual cases for lines near the length limit
7774 # by making it more likely that RULE 2 will prevent a weld.
7775 # FIX3: for b1131: only use level difference in -lp mode.
7776 # FIX4: for b1141, b1142: reduce the tolerance for longer
7778 if ( $rOpts_line_up_parentheses
7779 && $outer_opening->[_LEVEL_] -
7780 $rLL->[$Kfirst]->[_LEVEL_] )
7783 # We only need a tolerance if the leading text before
7784 # the first opening token is shorter than the
7785 # indentation length. For simplicity we just use the
7786 # length of the first token here. If necessary, we
7787 # could be more exact in the future and find the
7788 # total length up to the first opening token.
7789 # See cases b1114, b1141, b1142.
7791 $rOpts_indent_columns -
7792 $rLL->[$Kfirst]->[_TOKEN_LENGTH_] );
7794 if ( $excess + $tolx <= 0 ) {
7795 $is_one_line_weld = 1;
7799 $is_one_line_weld = 1;
7804 # DO-NOT-WELD RULE 1:
7805 # Do not weld something that looks like the start of a two-line
7806 # function call, like this: <<snippets/wn6.in>>
7807 # $trans->add_transformation(
7808 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
7809 # We will look for a semicolon after the closing paren.
7811 # We want to weld something complex, like this though
7812 # my $compass = uc( opposite_direction( line_to_canvas_direction(
7813 # @{ $coords[0] }, @{ $coords[1] } ) ) );
7814 # Otherwise we will get a 'blinker'. For example, the following
7815 # would become a blinker without this rule:
7816 # $Self->_Add( $SortOrderDisplay{ $Field
7817 # ->GenerateFieldForSelectSQL() } );
7818 # But it is okay to weld a two-line statement if it looks like
7819 # it was already welded, meaning that the two opening containers are
7820 # on a different line that the two closing containers. This is
7821 # necessary to prevent blinking of something like this with
7822 # perltidy -wn -pbp (starting indentation two levels deep):
7824 # $top_label->set_text( gettext(
7825 # "Unable to create personal directory - check permissions.") );
7827 if ( $iline_oc == $iline_oo + 1
7828 && $iline_io == $iline_ic
7829 && $token_oo eq '(' )
7832 # Look for following semicolon...
7833 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
7834 my $next_nonblank_type =
7835 defined($Knext_nonblank)
7836 ? $rLL->[$Knext_nonblank]->[_TYPE_]
7838 if ( $next_nonblank_type eq ';' ) {
7840 # Then do not weld if no other containers between inner
7841 # opening and closing.
7842 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
7843 if ( $Knext_seq_item == $Kinner_closing ) {
7844 $do_not_weld_rule = 1;
7848 } ## end starting new weld sequence
7850 # DO-NOT-WELD RULE 2:
7851 # Do not weld an opening paren to an inner one line brace block
7852 # We will just use old line numbers for this test and require
7853 # iterations if necessary for convergence
7855 # For example, otherwise we could cause the opening paren
7856 # in the following example to separate from the caller name
7859 # $_[0]->code_handler
7860 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
7862 # Here is another example where we do not want to weld:
7863 # $wrapped->add_around_modifier(
7864 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
7866 # If the one line sub block gets broken due to length or by the
7867 # user, then we can weld. The result will then be:
7868 # $wrapped->add_around_modifier( sub {
7869 # push @tracelog => 'around 1';
7873 # Updated to fix cases b1082 b1102 b1106 b1115:
7874 # Also, do not weld to an intact inner block if the outer opening token
7875 # is on a different line. For example, this prevents oscillation
7876 # between these two states in case b1106:
7879 # ($_,[$self->$_(@_[1..$#_])])
7883 # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
7886 # The effect of this change on typical code is very minimal. Sometimes
7887 # it may take a second iteration to converge, but this gives protection
7890 if ( !$do_not_weld_rule
7891 && !$is_one_line_weld
7892 && $iline_ic == $iline_io )
7894 $do_not_weld_rule = 2
7895 if ( $token_oo eq '(' || $iline_oo != $iline_io );
7898 # DO-NOT-WELD RULE 3:
7899 # Do not weld if this makes our line too long.
7900 # Use a tolerance which depends on if the old tokens were welded
7901 # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
7902 if ( !$do_not_weld_rule ) {
7904 # Measure to a little beyond the inner opening token if it is
7905 # followed by a bare word, which may have unusual line break rules.
7907 # NOTE: Originally this was OLD RULE 6: do not weld to a container
7908 # which is followed on the same line by an unknown bareword token.
7909 # This can cause blinkers (cases b626, b611). But OK to weld one
7910 # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
7911 # has been merged into RULE 3 here to also fix cases b1078 b1091.
7913 my $K_for_length = $Kinner_opening;
7914 my $Knext_io = $self->K_next_nonblank($Kinner_opening);
7915 next unless ( defined($Knext_io) ); # shouldn't happen
7916 my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
7918 # Note: may need to eventually also include other types here,
7919 # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
7920 if ( $type_io_next eq 'w' ) {
7921 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
7922 next unless ( defined($Knext_io2) );
7923 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
7924 if ( !$type_ok_after_bareword{$type_io_next2} ) {
7925 $K_for_length = $Knext_io2;
7929 # Use a tolerance for welds over multiple lines to avoid blinkers.
7930 # We can use zero tolerance if it looks like we are working on an
7933 $is_one_line_weld || $is_multiline_weld
7937 # By how many characters does this exceed the text window?
7939 $self->cumulative_length_before_K($K_for_length) -
7940 $starting_lentot + 1 + $tol -
7941 $maximum_text_length;
7943 # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
7944 # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
7945 # Revised patch: New tolerance definition allows going back to '> 0'
7946 # here. This fixes case b1124. See also cases b1087 and b1087a.
7947 if ( $excess > 0 ) { $do_not_weld_rule = 3 }
7951 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
7955 # DO-NOT-WELD RULE 4; implemented for git#10:
7956 # Do not weld an opening -ce brace if the next container is on a single
7957 # line, different from the opening brace. (This is very rare). For
7958 # example, given the following with -ce, we will avoid joining the {
7962 # [ $_, length($_) ]
7965 # because this would produce a terminal one-line block:
7967 # } else { [ $_, length($_) ] }
7969 # which may not be what is desired. But given this input:
7971 # } else { [ $_, length($_) ] }
7973 # then we will do the weld and retain the one-line block
7974 if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
7975 my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
7976 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
7977 my $io_line = $inner_opening->[_LINE_INDEX_];
7978 my $ic_line = $inner_closing->[_LINE_INDEX_];
7979 my $oo_line = $outer_opening->[_LINE_INDEX_];
7980 if ( $oo_line < $io_line && $ic_line == $io_line ) {
7981 $do_not_weld_rule = 4;
7986 # DO-NOT-WELD RULE 5: do not include welds excluded by user
7989 && %weld_nested_exclusion_rules
7990 && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
7991 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
7994 $do_not_weld_rule = 5;
7997 # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
7999 # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
8001 if ( !$do_not_weld_rule
8002 && $rOpts_break_at_old_method_breakpoints
8003 && $iline_io > $iline_oo )
8006 foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
8007 my $rK_range = $rlines->[$iline]->{_rK_range};
8008 next unless defined($rK_range);
8009 my ( $Kfirst, $Klast ) = @{$rK_range};
8010 next unless defined($Kfirst);
8011 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
8012 $do_not_weld_rule = 7;
8018 if ($do_not_weld_rule) {
8020 # After neglecting a pair, we start measuring from start of point io
8021 my $starting_level = $inner_opening->[_LEVEL_];
8022 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
8024 $self->cumulative_length_before_K($Kinner_opening);
8025 $maximum_text_length =
8026 $maximum_text_length_at_level[$starting_level] -
8027 $starting_ci_level * $rOpts_continuation_indentation;
8030 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
8034 # Normally, a broken pair should not decrease indentation of
8035 # intermediate tokens:
8036 ## if ( $last_pair_broken ) { next }
8037 # However, for long strings of welded tokens, such as '{{{{{{...'
8038 # we will allow broken pairs to also remove indentation.
8039 # This will keep very long strings of opening and closing
8040 # braces from marching off to the right. We will do this if the
8041 # number of tokens in a weld before the broken weld is 4 or more.
8042 # This rule will mainly be needed for test scripts, since typical
8043 # welds have fewer than about 4 welded tokens.
8044 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
8047 # otherwise start new weld ...
8048 elsif ($starting_new_weld) {
8049 $weld_count_this_start++;
8051 $Msg .= "Starting new weld\n";
8056 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
8057 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
8059 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
8060 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
8063 # ... or extend current weld
8065 $weld_count_this_start++;
8067 $Msg .= "Extending current weld\n";
8070 unshift @{ $welds[-1] }, $inner_seqno;
8071 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
8072 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
8074 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
8075 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
8078 # After welding, reduce the indentation level if all intermediate tokens
8079 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
8080 if ( $dlevel != 0 ) {
8081 my $Kstart = $Kinner_opening;
8082 my $Kstop = $Kinner_closing;
8083 for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
8084 $rLL->[$KK]->[_LEVEL_] += $dlevel;
8087 # Copy opening ci level to help break at = for -lp mode (case b1124)
8088 $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
8089 $rLL->[$Kouter_opening]->[_CI_LEVEL_];
8091 # But do not copy the closing ci level ... it can give poor results
8092 ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
8093 ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
8100 sub weld_nested_quotes {
8102 # Called once per file for option '--weld-nested-containers'. This
8103 # does welding on qw quotes.
8107 # See if quotes are excluded from welding
8108 my $rflags = $weld_nested_exclusion_rules{'q'};
8109 return if ( defined($rflags) && defined( $rflags->[1] ) );
8111 my $rK_weld_left = $self->[_rK_weld_left_];
8112 my $rK_weld_right = $self->[_rK_weld_right_];
8114 my $rLL = $self->[_rLL_];
8115 return unless ( defined($rLL) && @{$rLL} );
8118 my $K_opening_container = $self->[_K_opening_container_];
8119 my $K_closing_container = $self->[_K_closing_container_];
8120 my $rlines = $self->[_rlines_];
8122 my $starting_lentot;
8123 my $maximum_text_length;
8125 my $is_single_quote = sub {
8126 my ( $Kbeg, $Kend, $quote_type ) = @_;
8127 foreach my $K ( $Kbeg .. $Kend ) {
8128 my $test_type = $rLL->[$K]->[_TYPE_];
8129 next if ( $test_type eq 'b' );
8130 return if ( $test_type ne $quote_type );
8135 # Length tolerance - same as previously used for sub weld_nested
8137 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
8139 # look for single qw quotes nested in containers
8140 my $KNEXT = $self->[_K_first_seq_item_];
8141 while ( defined($KNEXT) ) {
8143 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8144 my $rtoken_vars = $rLL->[$KK];
8145 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
8146 if ( !$outer_seqno ) {
8147 next if ( $KK == 0 ); # first token in file may not be container
8149 # A fault here implies that an error was made in the little loop at
8150 # the bottom of sub 'respace_tokens' which set the values of
8151 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
8152 # loop control lines above.
8153 Fault("sequence = $outer_seqno not defined at K=$KK");
8156 my $token = $rtoken_vars->[_TOKEN_];
8157 if ( $is_opening_token{$token} ) {
8159 # see if the next token is a quote of some type
8162 if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
8163 next unless ( $Kn < $Num );
8165 my $next_token = $rLL->[$Kn]->[_TOKEN_];
8166 my $next_type = $rLL->[$Kn]->[_TYPE_];
8168 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
8169 && $next_token =~ /^q/ );
8171 # The token before the closing container must also be a quote
8172 my $Kouter_closing = $K_closing_container->{$outer_seqno};
8173 my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
8174 next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
8176 # This is an inner opening container
8177 my $Kinner_opening = $Kn;
8179 # Do not weld to single-line quotes. Nothing is gained, and it may
8181 next if ( $Kinner_closing == $Kinner_opening );
8183 # Only weld to quotes delimited with container tokens. This is
8184 # because welding to arbitrary quote delimiters can produce code
8185 # which is less readable than without welding.
8186 my $closing_delimiter =
8187 substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
8189 unless ( $is_closing_token{$closing_delimiter}
8190 || $closing_delimiter eq '>' );
8192 # Now make sure that there is just a single quote in the container
8196 $Kinner_opening + 1,
8197 $Kinner_closing - 1,
8202 # OK: This is a candidate for welding
8206 my $Kouter_opening = $K_opening_container->{$outer_seqno};
8207 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
8208 my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
8209 my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
8210 my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
8212 ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
8214 # If welded, the line must not exceed allowed line length
8215 ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
8216 = $self->setup_new_weld_measurements( $Kouter_opening,
8218 if ( !$ok_to_weld ) {
8219 if (DEBUG_WELD) { print $msg}
8224 $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
8225 my $excess = $length + $multiline_tol - $maximum_text_length;
8227 my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
8228 if ( $excess >= $excess_max ) {
8233 if ( !$is_old_weld ) { $is_old_weld = "" }
8235 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
8238 # Check weld exclusion rules for outer container
8239 if ( !$do_not_weld ) {
8240 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
8241 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
8244 "No qw weld due to weld exclusion rules for outer container\n";
8250 # Check the length of the last line (fixes case b1039)
8251 if ( !$do_not_weld ) {
8252 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
8253 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
8255 $self->excess_line_length_for_Krange( $Kfirst_ic,
8258 # Allow extra space for additional welded closing container(s)
8259 # and a space and comma or semicolon.
8260 # NOTE: weld len has not been computed yet. Use 2 spaces
8261 # for now, correct for a single weld. This estimate could
8262 # be made more accurate if necessary.
8264 defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
8265 if ( $excess_ic + $weld_len + 2 > 0 ) {
8268 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
8276 $Msg .= "Not Welding QW\n";
8284 $Msg .= "Welding QW\n";
8288 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
8289 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
8291 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
8292 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
8294 # Undo one indentation level if an extra level was added to this
8297 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
8299 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
8301 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
8302 $rLL->[$K]->[_LEVEL_] -= 1;
8304 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
8305 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
8308 # undo CI for other welded quotes
8311 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
8312 $rLL->[$K]->[_CI_LEVEL_] = 0;
8316 # Change the level of a closing qw token to be that of the outer
8317 # containing token. This will allow -lp indentation to function
8318 # correctly in the vertical aligner.
8319 # Patch to fix c002: but not if it contains text
8320 if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
8321 $rLL->[$Kinner_closing]->[_LEVEL_] =
8322 $rLL->[$Kouter_closing]->[_LEVEL_];
8329 sub is_welded_right_at_i {
8330 my ( $self, $i ) = @_;
8331 return unless ( $total_weld_count && $i >= 0 );
8333 # Back up at a blank. This routine is sometimes called at blanks.
8334 # TODO: this routine can eventually be eliminated by setting the weld flags
8335 # for all K indexes between the start and end of a weld, not just at
8337 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
8338 return defined( $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
8341 sub is_welded_at_seqno {
8343 my ( $self, $seqno ) = @_;
8345 # given a sequence number:
8346 # return true if it is welded either left or right
8347 # return false otherwise
8348 return unless ( $total_weld_count && defined($seqno) );
8349 my $KK_o = $self->[_K_opening_container_]->{$seqno};
8350 return unless defined($KK_o);
8351 return defined( $self->[_rK_weld_left_]->{$KK_o} )
8352 || defined( $self->[_rK_weld_right_]->{$KK_o} );
8355 sub mark_short_nested_blocks {
8357 # This routine looks at the entire file and marks any short nested blocks
8358 # which should not be broken. The results are stored in the hash
8359 # $rshort_nested->{$type_sequence}
8360 # which will be true if the container should remain intact.
8362 # For example, consider the following line:
8364 # sub cxt_two { sort { $a <=> $b } test_if_list() }
8366 # The 'sort' block is short and nested within an outer sub block.
8367 # Normally, the existence of the 'sort' block will force the sub block to
8368 # break open, but this is not always desirable. Here we will set a flag for
8369 # the sort block to prevent this. To give the user control, we will
8370 # follow the input file formatting. If either of the blocks is broken in
8371 # the input file then we will allow it to remain broken. Otherwise we will
8372 # set a flag to keep it together in later formatting steps.
8374 # The flag which is set here will be checked in two places:
8375 # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
8378 return if $rOpts->{'indent-only'};
8380 my $rLL = $self->[_rLL_];
8381 return unless ( defined($rLL) && @{$rLL} );
8383 return unless ( $rOpts->{'one-line-block-nesting'} );
8385 my $K_opening_container = $self->[_K_opening_container_];
8386 my $K_closing_container = $self->[_K_closing_container_];
8387 my $rbreak_container = $self->[_rbreak_container_];
8388 my $rshort_nested = $self->[_rshort_nested_];
8389 my $rlines = $self->[_rlines_];
8391 # Variables needed for estimating line lengths
8392 my $maximum_text_length;
8393 my $starting_lentot;
8396 my $excess_length_to_K = sub {
8399 # Estimate the length from the line start to a given token
8400 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
8401 my $excess_length = $length + $length_tol - $maximum_text_length;
8402 return ($excess_length);
8405 my $is_broken_block = sub {
8407 # a block is broken if the input line numbers of the braces differ
8409 my $K_opening = $K_opening_container->{$seqno};
8410 return unless ( defined($K_opening) );
8411 my $K_closing = $K_closing_container->{$seqno};
8412 return unless ( defined($K_closing) );
8413 return $rbreak_container->{$seqno}
8414 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8415 $rLL->[$K_opening]->[_LINE_INDEX_];
8418 # loop over all containers
8419 my @open_block_stack;
8421 my $KNEXT = $self->[_K_first_seq_item_];
8422 while ( defined($KNEXT) ) {
8424 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8425 my $rtoken_vars = $rLL->[$KK];
8426 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8427 if ( !$type_sequence ) {
8428 next if ( $KK == 0 ); # first token in file may not be container
8430 # A fault here implies that an error was made in the little loop at
8431 # the bottom of sub 'respace_tokens' which set the values of
8432 # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
8433 # loop control lines above.
8434 Fault("sequence = $type_sequence not defined at K=$KK");
8437 # Patch: do not mark short blocks with welds.
8438 # In some cases blinkers can form (case b690).
8439 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
8443 # We are just looking at code blocks
8444 my $token = $rtoken_vars->[_TOKEN_];
8445 my $type = $rtoken_vars->[_TYPE_];
8446 next unless ( $type eq $token );
8447 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
8448 next unless ($block_type);
8450 # Keep a stack of all acceptable block braces seen.
8451 # Only consider blocks entirely on one line so dump the stack when line
8453 my $iline_last = $iline;
8454 $iline = $rLL->[$KK]->[_LINE_INDEX_];
8455 if ( $iline != $iline_last ) { @open_block_stack = () }
8457 if ( $token eq '}' ) {
8458 if (@open_block_stack) { pop @open_block_stack }
8460 next unless ( $token eq '{' );
8462 # block must be balanced (bad scripts may be unbalanced)
8463 my $K_opening = $K_opening_container->{$type_sequence};
8464 my $K_closing = $K_closing_container->{$type_sequence};
8465 next unless ( defined($K_opening) && defined($K_closing) );
8467 # require that this block be entirely on one line
8468 next if ( $is_broken_block->($type_sequence) );
8470 # See if this block fits on one line of allowed length (which may
8471 # be different from the input script)
8473 $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8474 my $level = $rLL->[$KK]->[_LEVEL_];
8475 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
8476 $maximum_text_length =
8477 $maximum_text_length_at_level[$level] -
8478 $ci_level * $rOpts_continuation_indentation;
8480 # Dump the stack if block is too long and skip this block
8481 if ( $excess_length_to_K->($K_closing) > 0 ) {
8482 @open_block_stack = ();
8486 # OK, Block passes tests, remember it
8487 push @open_block_stack, $type_sequence;
8489 # We are only marking nested code blocks,
8490 # so check for a previous block on the stack
8491 next unless ( @open_block_stack > 1 );
8493 # Looks OK, mark this as a short nested block
8494 $rshort_nested->{$type_sequence} = 1;
8500 sub adjust_indentation_levels {
8504 # Called once per file to do special indentation adjustments.
8505 # These routines adjust levels either by changing _CI_LEVEL_ directly or
8506 # by setting modified levels in the array $self->[_radjusted_levels_].
8508 # Initialize the adjusted levels. These will be the levels actually used
8509 # for computing indentation.
8511 # NOTE: This routine is called after the weld routines, which may have
8512 # already adjusted _LEVEL_, so we are making adjustments on top of those
8513 # levels. It would be much nicer to have the weld routines also use this
8514 # adjustment, but that gets complicated when we combine -gnu -wn and have
8515 # some welded quotes.
8516 my $radjusted_levels = $self->[_radjusted_levels_];
8517 my $rLL = $self->[_rLL_];
8518 foreach my $KK ( 0 .. @{$rLL} - 1 ) {
8519 $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
8522 # First set adjusted levels for any non-indenting braces.
8523 $self->non_indenting_braces();
8525 # Adjust breaks and indentation list containers
8526 $self->break_before_list_opening_containers();
8528 # Set adjusted levels for the whitespace cycle option.
8529 $self->whitespace_cycle_adjustment();
8531 # Adjust continuation indentation if -bli is set
8532 $self->bli_adjustment();
8534 $self->extended_ci()
8535 if ( $rOpts->{'extended-continuation-indentation'} );
8537 # Now clip any adjusted levels to be non-negative
8538 $self->clip_adjusted_levels();
8543 sub clip_adjusted_levels {
8545 # Replace any negative adjusted levels with zero.
8546 # Negative levels can occur in files with brace errors.
8548 my $radjusted_levels = $self->[_radjusted_levels_];
8549 return unless defined($radjusted_levels) && @{$radjusted_levels};
8550 foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
8554 sub non_indenting_braces {
8556 # Called once per file to handle the --non-indenting-braces parameter.
8557 # Remove indentation within marked braces if requested
8559 return unless ( $rOpts->{'non-indenting-braces'} );
8561 my $rLL = $self->[_rLL_];
8562 return unless ( defined($rLL) && @{$rLL} );
8564 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
8566 my $radjusted_levels = $self->[_radjusted_levels_];
8567 my $Kmax = @{$rLL} - 1;
8570 my $is_non_indenting_brace = sub {
8573 # looking for an opening block brace
8574 my $token = $rLL->[$KK]->[_TOKEN_];
8575 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
8576 return unless ( $token eq '{' && $block_type );
8578 # followed by a comment
8581 if ( $K_sc <= $Kmax && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
8582 return unless ( $K_sc <= $Kmax );
8583 my $type_sc = $rLL->[$K_sc]->[_TYPE_];
8584 return unless ( $type_sc eq '#' );
8587 my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
8588 my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
8589 return unless ( $line_index_sc == $line_index );
8591 # get the side comment text
8592 my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
8594 # The pattern ends in \s but we have removed the newline, so
8595 # we added it back for the match. That way we require an exact
8596 # match to the special string and also allow additional text.
8598 my $is_nib = ( $token_sc =~ /$non_indenting_brace_pattern/ );
8599 if ($is_nib) { $rspecial_side_comment_type->{$K_sc} = 'NIB' }
8603 foreach my $KK ( 0 .. $Kmax ) {
8604 my $num = @seqno_stack;
8605 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
8607 my $token = $rLL->[$KK]->[_TOKEN_];
8608 if ( $token eq '{' && $is_non_indenting_brace->($KK) ) {
8609 push @seqno_stack, $seqno;
8611 if ( $token eq '}' && @seqno_stack && $seqno_stack[-1] == $seqno ) {
8617 $radjusted_levels->[$KK] -= $num;
8622 sub whitespace_cycle_adjustment {
8626 # Called once per file to implement the --whitespace-cycle option
8627 my $rLL = $self->[_rLL_];
8628 return unless ( defined($rLL) && @{$rLL} );
8629 my $radjusted_levels = $self->[_radjusted_levels_];
8631 my $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
8632 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
8634 my $Kmax = @{$rLL} - 1;
8636 my $whitespace_last_level = -1;
8637 my @whitespace_level_stack = ();
8638 my $last_nonblank_type = 'b';
8639 my $last_nonblank_token = '';
8640 foreach my $KK ( 0 .. $Kmax ) {
8641 my $level_abs = $radjusted_levels->[$KK];
8642 my $level = $level_abs;
8643 if ( $level_abs < $whitespace_last_level ) {
8644 pop(@whitespace_level_stack);
8646 if ( !@whitespace_level_stack ) {
8647 push @whitespace_level_stack, $level_abs;
8649 elsif ( $level_abs > $whitespace_last_level ) {
8650 $level = $whitespace_level_stack[-1] +
8651 ( $level_abs - $whitespace_last_level );
8654 # 1 Try to break at a block brace
8656 $level > $rOpts_whitespace_cycle
8657 && $last_nonblank_type eq '{'
8658 && $last_nonblank_token eq '{'
8661 # 2 Then either a brace or bracket
8662 || ( $level > $rOpts_whitespace_cycle + 1
8663 && $last_nonblank_token =~ /^[\{\[]$/ )
8665 # 3 Then a paren too
8666 || $level > $rOpts_whitespace_cycle + 2
8671 push @whitespace_level_stack, $level;
8673 $level = $whitespace_level_stack[-1];
8674 $radjusted_levels->[$KK] = $level;
8676 $whitespace_last_level = $level_abs;
8677 my $type = $rLL->[$KK]->[_TYPE_];
8678 my $token = $rLL->[$KK]->[_TOKEN_];
8679 if ( $type ne 'b' ) {
8680 $last_nonblank_type = $type;
8681 $last_nonblank_token = $token;
8688 use constant DEBUG_BBX => 0;
8690 sub break_before_list_opening_containers {
8694 # This routine is called once per batch to implement parameters
8695 # --break-before-hash-brace=n and similar -bbx=n flags
8696 # and their associated indentation flags:
8697 # --break-before-hash-brace-and-indent and similar -bbxi=n
8699 # Nothing to do if none of the -bbx=n parameters has been set
8700 return unless %break_before_container_types;
8702 my $rLL = $self->[_rLL_];
8703 return unless ( defined($rLL) && @{$rLL} );
8705 # Loop over all opening container tokens
8706 my $K_opening_container = $self->[_K_opening_container_];
8707 my $K_closing_container = $self->[_K_closing_container_];
8708 my $ris_broken_container = $self->[_ris_broken_container_];
8709 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
8710 my $rhas_list = $self->[_rhas_list_];
8711 my $rhas_broken_list = $self->[_rhas_broken_list_];
8712 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
8713 my $radjusted_levels = $self->[_radjusted_levels_];
8714 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
8715 my $rlines = $self->[_rlines_];
8716 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
8717 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
8718 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
8719 my $rK_weld_right = $self->[_rK_weld_right_];
8722 max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
8723 if ($rOpts_ignore_old_breakpoints) {
8724 $length_tol += $rOpts_maximum_line_length;
8727 my $rbreak_before_container_by_seqno = {};
8728 my $rwant_reduced_ci = {};
8729 foreach my $seqno ( keys %{$K_opening_container} ) {
8731 #################################################################
8732 # Part 1: Examine any -bbx=n flags
8733 #################################################################
8735 my $KK = $K_opening_container->{$seqno};
8736 next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
8738 # This must be a list or contain a list.
8739 # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
8740 # Note2: 'has_list' holds the depth to the sub-list. We will require
8742 my $is_list = $self->is_list_by_seqno($seqno);
8743 my $has_list = $rhas_list->{$seqno};
8745 # Fix for b1173: if welded opening container, use flag of innermost
8746 # seqno. Otherwise, the restriction $has_list==1 prevents triple and
8747 # higher welds from following the -BBX parameters.
8748 if ($total_weld_count) {
8749 my $KK_test = $rK_weld_right->{$KK};
8750 if ( defined($KK_test) ) {
8751 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
8752 $is_list ||= $self->is_list_by_seqno($seqno_inner);
8753 $has_list = $rhas_list->{$seqno_inner};
8757 next unless ( $is_list || $has_list && $has_list == 1 );
8759 my $has_broken_list = $rhas_broken_list->{$seqno};
8760 my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
8762 # Only for types of container tokens with a non-default break option
8763 my $token = $rLL->[$KK]->[_TOKEN_];
8764 my $break_option = $break_before_container_types{$token};
8765 next unless ($break_option);
8767 # Require previous nonblank to be '=' or '=>'
8768 my $Kprev = $KK - 1;
8769 next if ( $Kprev < 0 );
8770 my $prev_type = $rLL->[$Kprev]->[_TYPE_];
8771 if ( $prev_type eq 'b' ) {
8773 next if ( $Kprev < 0 );
8774 $prev_type = $rLL->[$Kprev]->[_TYPE_];
8776 next unless ( $is_equal_or_fat_comma{$prev_type} );
8778 my $ci = $rLL->[$KK]->[_CI_LEVEL_];
8782 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
8784 # -bbx=1 = stable, try to follow input
8785 if ( $break_option == 1 ) {
8787 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
8788 my $rK_range = $rlines->[$iline]->{_rK_range};
8789 my ( $Kfirst, $Klast ) = @{$rK_range};
8790 next unless ( $KK == $Kfirst );
8793 # -bbx=2 => apply this style only for a 'complex' list
8794 elsif ( $break_option == 2 ) {
8796 # break if this list contains a broken list with line-ending comma
8799 if ($has_list_with_lec) {
8801 DEBUG_BBX && do { $Msg = "has list with lec;" };
8804 if ( !$ok_to_break ) {
8806 # Turn off -xci if -bbx=2 and this container has a sublist but
8807 # not a broken sublist. This avoids creating blinkers. The
8808 # problem is that -xci can cause one-line lists to break open,
8809 # and thereby creating formatting instability.
8810 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
8811 # b1045 b1046 b1047 b1051 b1052 b1061.
8812 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
8814 my $parent = $rparent_of_seqno->{$seqno};
8815 if ( $self->is_list_by_seqno($parent) ) {
8816 DEBUG_BBX && do { $Msg = "parent is list" };
8821 # Patch to fix b1099 for -lp
8822 # ok in -lp mode if this is a list which contains a list
8823 if ( !$ok_to_break && $rOpts_line_up_parentheses ) {
8824 if ( $is_list && $has_list ) {
8826 DEBUG_BBX && do { $Msg = "is list or has list" };
8830 if ( !$ok_to_break ) {
8832 && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
8837 && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
8839 # Patch: turn off -xci if -bbx=2 and -lp
8840 # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
8841 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
8844 # -bbx=3 = always break
8845 elsif ( $break_option == 3 ) {
8850 # Shouldn't happen! Bad flag, but make behavior same as 3
8855 # Set a flag for actual implementation later in
8856 # sub insert_breaks_before_list_opening_containers
8857 $rbreak_before_container_by_seqno->{$seqno} = 1;
8859 && print STDOUT "BBX: ok to break at seqno=$seqno\n";
8861 # -bbxi=0: Nothing more to do if the ci value remains unchanged
8862 my $ci_flag = $container_indentation_options{$token};
8863 next unless ($ci_flag);
8865 # -bbxi=1: This option removes ci and is handled in
8866 # later sub set_adjusted_indentation
8867 if ( $ci_flag == 1 ) {
8868 $rwant_reduced_ci->{$seqno} = 1;
8874 #################################################################
8875 # Part 2: Perform tests before committing to changing ci and level
8876 #################################################################
8878 # Before changing the ci level of the opening container, we need
8879 # to be sure that the container will be broken in the later stages of
8880 # formatting. We have to do this because we are working early in the
8881 # formatting pipeline. A problem can occur if we change the ci or
8882 # level of the opening token but do not actually break the container
8883 # open as expected. In most cases it wouldn't make any difference if
8884 # we changed ci or not, but there are some edge cases where this
8885 # can cause blinking states, so we need to try to only change ci if
8886 # the container will really be broken.
8888 # Only consider containers already broken
8889 next if ( !$ris_broken_container->{$seqno} );
8891 # Always ok to change ci for permanently broken containers
8892 if ( $ris_permanently_broken->{$seqno} ) {
8896 # Always OK if this list contains a broken sub-container with
8897 # a non-terminal line-ending comma
8898 if ($has_list_with_lec) { goto OK }
8900 # From here on we are considering a single container...
8902 # A single container must have at least 1 line-ending comma:
8903 next unless ( $rlec_count_by_seqno->{$seqno} );
8905 # Since it has a line-ending comma, it will stay broken if the -boc
8907 if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
8909 # OK if the container contains multiple fat commas
8910 # Better: multiple lines with fat commas
8911 if ( !$rOpts_ignore_old_breakpoints ) {
8912 my $rtype_count = $rtype_count_by_seqno->{$seqno};
8913 next unless ($rtype_count);
8914 my $fat_comma_count = $rtype_count->{'=>'};
8916 && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
8917 if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
8920 # The last check we can make is to see if this container could fit on a
8921 # single line. Use the least possble indentation in the estmate (ci=0),
8922 # so we are not subtracting $ci * $rOpts_continuation_indentation from
8923 # tablulated $maximum_text_length value.
8924 my $level = $rLL->[$KK]->[_LEVEL_];
8925 my $maximum_text_length = $maximum_text_length_at_level[$level];
8926 my $K_closing = $K_closing_container->{$seqno};
8927 my $length = $self->cumulative_length_before_K($K_closing) -
8928 $self->cumulative_length_before_K($KK);
8929 my $excess_length = $length - $maximum_text_length;
8932 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
8934 # OK if the net container definitely breaks on length
8935 if ( $excess_length > $length_tol ) {
8937 && print STDOUT "BBX: excess_length=$excess_length\n";
8944 #################################################################
8945 # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
8946 #################################################################
8950 DEBUG_BBX && print STDOUT "BBX: OK to break\n";
8958 # n=0 default indentation (usually one ci)
8959 # n=1 outdent one ci
8960 # n=2 indent one level (minus one ci)
8961 # n=3 indent one extra ci [This may be dropped]
8963 # NOTE: We are adjusting indentation of the opening container. The
8964 # closing container will normally follow the indentation of the opening
8965 # container automatically, so this is not currently done.
8969 if ( $ci_flag == 1 ) {
8973 # option 2: indent one level
8974 elsif ( $ci_flag == 2 ) {
8976 $radjusted_levels->[$KK] += 1;
8981 # Shouldn't happen - leave ci unchanged
8984 $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
8987 $self->[_rbreak_before_container_by_seqno_] =
8988 $rbreak_before_container_by_seqno;
8989 $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
8993 use constant DEBUG_XCI => 0;
8997 # This routine implements the -xci (--extended-continuation-indentation)
8998 # flag. We add CI to interior tokens of a container which itself has CI but
8999 # only if a token does not already have CI.
9001 # To do this, we will locate opening tokens which themselves have
9002 # continuation indentation (CI). We track them with their sequence
9003 # numbers. These sequence numbers are called 'controlling sequence
9004 # numbers'. They apply continuation indentation to the tokens that they
9005 # contain. These inner tokens remember their controlling sequence numbers.
9006 # Later, when these inner tokens are output, they have to see if the output
9007 # lines with their controlling tokens were output with CI or not. If not,
9008 # then they must remove their CI too.
9010 # The controlling CI concept works hierarchically. But CI itself is not
9011 # hierarchical; it is either on or off. There are some rare instances where
9012 # it would be best to have hierarchical CI too, but not enough to be worth
9013 # the programming effort.
9015 # The operations to remove unwanted CI are done in sub 'undo_ci'.
9019 my $rLL = $self->[_rLL_];
9020 return unless ( defined($rLL) && @{$rLL} );
9022 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
9023 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
9024 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
9025 my $rlines = $self->[_rlines_];
9026 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
9027 my $ris_bli_container = $self->[_ris_bli_container_];
9029 my %available_space;
9031 # Loop over all opening container tokens
9032 my $K_opening_container = $self->[_K_opening_container_];
9033 my $K_closing_container = $self->[_K_closing_container_];
9034 my $ris_broken_container = $self->[_ris_broken_container_];
9038 my $KNEXT = $self->[_K_first_seq_item_];
9040 # The following variable can be used to allow a little extra space to
9041 # avoid blinkers. A value $len_tol = 20 fixed the following
9042 # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
9043 # It turned out that the real problem was misparsing a list brace as
9044 # a code block in a 'use' statement when the line length was extremely
9045 # small. A value of 0 works now, but a slightly larger value can
9046 # be used to minimize the chance of a blinker.
9049 while ( defined($KNEXT) ) {
9051 # Fix all tokens up to the next sequence item if we are changing CI
9054 my $is_list = $ris_list_by_seqno->{$seqno_top};
9055 my $space = $available_space{$seqno_top};
9056 my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
9058 for ( my $Kt = $KLAST + 1 ; $Kt < $KNEXT ; $Kt++ ) {
9060 # But do not include tokens which might exceed the line length
9061 # and are not in a list.
9062 # ... This fixes case b1031
9063 my $length_before = $length;
9064 $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
9066 !$rLL->[$Kt]->[_CI_LEVEL_]
9068 || $length - $length_before < $space
9069 || $rLL->[$Kt]->[_TYPE_] eq '#' )
9072 $rLL->[$Kt]->[_CI_LEVEL_] = 1;
9073 $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
9077 $ris_seqno_controlling_ci->{$seqno_top} += $count;
9082 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9084 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9085 my $K_opening = $K_opening_container->{$seqno};
9087 # see if we have reached the end of the current controlling container
9088 if ( $seqno_top && $seqno == $seqno_top ) {
9089 $seqno_top = pop @seqno_stack;
9092 # Patch to fix some block types...
9093 # Certain block types arrive from the tokenizer without CI but should
9094 # have it for this option. These include anonymous subs and
9095 # do sort map grep eval
9096 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
9097 if ( $block_type && $is_block_with_ci{$block_type} ) {
9098 $rLL->[$KK]->[_CI_LEVEL_] = 1;
9100 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
9101 $ris_seqno_controlling_ci->{$seqno_top}++;
9105 # If this does not have ci, update ci if necessary and continue looking
9106 if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
9108 $rLL->[$KK]->[_CI_LEVEL_] = 1;
9109 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
9110 $ris_seqno_controlling_ci->{$seqno_top}++;
9115 # Skip if requested by -bbx to avoid blinkers
9116 if ( $rno_xci_by_seqno->{$seqno} ) {
9120 # Skip if this is a -bli container (this fixes case b1065) Note: case
9121 # b1065 is also fixed by the update for b1055, so this update is not
9122 # essential now. But there does not seem to be a good reason to add
9123 # xci and bli together, so the update is retained.
9124 if ( $ris_bli_container->{$seqno} ) {
9128 # We are looking for opening container tokens with ci
9129 next unless ( defined($K_opening) && $KK == $K_opening );
9131 # Make sure there is a corresponding closing container
9132 # (could be missing if the script has a brace error)
9133 my $K_closing = $K_closing_container->{$seqno};
9134 next unless defined($K_closing);
9136 # Require different input lines. This will filter out a large number
9137 # of small hash braces and array brackets. If we accidentally filter
9138 # out an important container, it will get fixed on the next pass.
9140 $rLL->[$K_opening]->[_LINE_INDEX_] ==
9141 $rLL->[$K_closing]->[_LINE_INDEX_]
9142 && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
9143 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
9144 $rOpts_maximum_line_length )
9148 && print "XCI: Skipping seqno=$seqno, require different lines\n";
9152 # Do not apply -xci if adding extra ci will put the container contents
9153 # beyond the line length limit (fixes cases b899 b935)
9154 my $level = $rLL->[$K_opening]->[_LEVEL_];
9155 my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
9156 my $maximum_text_length =
9157 $maximum_text_length_at_level[$level] -
9158 $ci_level * $rOpts_continuation_indentation;
9160 # remember how much space is available for patch b1031 above
9162 $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
9165 DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
9168 DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
9170 $available_space{$seqno} = $space;
9172 # This becomes the next controlling container
9173 push @seqno_stack, $seqno_top if ($seqno_top);
9174 $seqno_top = $seqno;
9179 sub bli_adjustment {
9181 # Called once per file to implement the --brace-left-and-indent option.
9182 # If -bli is set, adds one continuation indentation for certain braces
9184 return unless ( $rOpts->{'brace-left-and-indent'} );
9185 my $rLL = $self->[_rLL_];
9186 return unless ( defined($rLL) && @{$rLL} );
9187 my $ris_bli_container = $self->[_ris_bli_container_];
9188 my $K_opening_container = $self->[_K_opening_container_];
9189 my $KNEXT = $self->[_K_first_seq_item_];
9191 while ( defined($KNEXT) ) {
9193 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9194 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
9195 if ( $block_type && $block_type =~ /$bli_pattern/ ) {
9196 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9197 my $K_opening = $K_opening_container->{$seqno};
9198 if ( defined($K_opening) ) {
9199 if ( $KK eq $K_opening ) {
9200 $rLL->[$KK]->[_CI_LEVEL_]++;
9201 $ris_bli_container->{$seqno} = 1;
9204 $rLL->[$KK]->[_CI_LEVEL_] =
9205 $rLL->[$K_opening]->[_CI_LEVEL_];
9213 sub find_multiline_qw {
9217 # Multiline qw quotes are not sequenced items like containers { [ (
9218 # but behave in some respects in a similar way. So this routine finds them
9219 # and creates a separate sequence number system for later use.
9221 # This is straightforward because they always begin at the end of one line
9222 # and and at the beginning of a later line. This is true no matter how we
9223 # finally make our line breaks, so we can find them before deciding on new
9226 my $rstarting_multiline_qw_seqno_by_K = {};
9227 my $rending_multiline_qw_seqno_by_K = {};
9228 my $rKrange_multiline_qw_by_seqno = {};
9229 my $rmultiline_qw_has_extra_level = {};
9231 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
9233 my $rlines = $self->[_rlines_];
9234 my $rLL = $self->[_rLL_];
9236 my $num_qw_seqno = 0;
9237 my $K_start_multiline_qw;
9239 foreach my $line_of_tokens ( @{$rlines} ) {
9241 my $line_type = $line_of_tokens->{_line_type};
9242 next unless ( $line_type eq 'CODE' );
9243 my $rK_range = $line_of_tokens->{_rK_range};
9244 my ( $Kfirst, $Klast ) = @{$rK_range};
9245 next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
9246 if ( defined($K_start_multiline_qw) ) {
9247 my $type = $rLL->[$Kfirst]->[_TYPE_];
9250 if ( $type ne 'q' ) {
9251 DEVEL_MODE && print STDERR <<EOM;
9252 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
9254 $K_start_multiline_qw = undef;
9257 my $Kprev = $self->K_previous_nonblank($Kfirst);
9258 my $Knext = $self->K_next_nonblank($Kfirst);
9259 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
9260 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
9261 if ( $type_m eq 'q' && $type_p ne 'q' ) {
9262 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
9263 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
9264 [ $K_start_multiline_qw, $Kfirst ];
9265 $K_start_multiline_qw = undef;
9269 if ( !defined($K_start_multiline_qw)
9270 && $rLL->[$Klast]->[_TYPE_] eq 'q' )
9272 my $Kprev = $self->K_previous_nonblank($Klast);
9273 my $Knext = $self->K_next_nonblank($Klast);
9274 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
9275 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
9276 if ( $type_m ne 'q' && $type_p eq 'q' ) {
9278 $qw_seqno = 'q' . $num_qw_seqno;
9279 $K_start_multiline_qw = $Klast;
9280 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
9285 # Give multiline qw lists extra indentation instead of CI. This option
9286 # works well but is currently only activated when the -xci flag is set.
9287 # The reason is to avoid unexpected changes in formatting.
9288 if ( $rOpts->{'extended-continuation-indentation'} ) {
9289 while ( my ( $qw_seqno, $rKrange ) =
9290 each %{$rKrange_multiline_qw_by_seqno} )
9292 my ( $Kbeg, $Kend ) = @{$rKrange};
9294 # require isolated closing token
9295 my $token_end = $rLL->[$Kend]->[_TOKEN_];
9297 unless ( length($token_end) == 1
9298 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
9300 # require isolated opening token
9301 my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
9303 # allow space(s) after the qw
9304 if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) eq ' ' ) {
9305 $token_beg =~ s/\s+//;
9308 next unless ( length($token_beg) == 3 );
9310 foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
9311 $rLL->[$KK]->[_LEVEL_]++;
9312 $rLL->[$KK]->[_CI_LEVEL_] = 0;
9315 # set flag for -wn option, which will remove the level
9316 $rmultiline_qw_has_extra_level->{$qw_seqno} = 1;
9320 # For the -lp option we need to mark all parent containers of
9322 if ($rOpts_line_up_parentheses) {
9324 while ( my ( $qw_seqno, $rKrange ) =
9325 each %{$rKrange_multiline_qw_by_seqno} )
9327 my ( $Kbeg, $Kend ) = @{$rKrange};
9328 my $parent_seqno = $self->parent_seqno_by_K($Kend);
9329 next unless ($parent_seqno);
9331 # If the parent container exactly surrounds this qw, then -lp
9332 # formatting seems to work so we will not mark it.
9333 my $is_tightly_contained;
9334 my $Kn = $self->K_next_nonblank($Kend);
9335 my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
9336 if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
9338 my $Kp = $self->K_previous_nonblank($Kbeg);
9340 defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
9341 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
9342 $is_tightly_contained = 1;
9346 $ris_excluded_lp_container->{$parent_seqno} = 1
9347 unless ($is_tightly_contained);
9349 # continue up the tree marking parent containers
9351 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
9353 unless ( defined($parent_seqno)
9354 && $parent_seqno ne SEQ_ROOT );
9355 $ris_excluded_lp_container->{$parent_seqno} = 1;
9360 $self->[_rstarting_multiline_qw_seqno_by_K_] =
9361 $rstarting_multiline_qw_seqno_by_K;
9362 $self->[_rending_multiline_qw_seqno_by_K_] =
9363 $rending_multiline_qw_seqno_by_K;
9364 $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
9365 $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
9370 sub is_excluded_lp {
9372 # decide if this container is excluded by user request
9373 # returns true if this token is excluded (i.e., may not use -lp)
9374 # returns false otherwise
9376 # note similarity with sub 'is_excluded_weld'
9377 my ( $self, $KK ) = @_;
9378 my $rLL = $self->[_rLL_];
9379 my $rtoken_vars = $rLL->[$KK];
9380 my $token = $rtoken_vars->[_TOKEN_];
9381 my $rflags = $line_up_parentheses_exclusion_rules{$token};
9382 return 0 unless ( defined($rflags) );
9383 my ( $flag1, $flag2 ) = @{$rflags};
9385 # There are two flags:
9386 # flag1 excludes based on the preceding nonblank word
9387 # flag2 excludes based on the contents of the container
9388 return 0 unless ( defined($flag1) );
9389 return 1 if $flag1 eq '*';
9391 # Find the previous token
9392 my ( $is_f, $is_k, $is_w );
9393 my $Kp = $self->K_previous_nonblank($KK);
9394 if ( defined($Kp) ) {
9395 my $type_p = $rLL->[$Kp]->[_TYPE_];
9396 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9399 $is_k = $type_p eq 'k';
9402 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
9404 # either keyword or function call?
9405 $is_w = $is_k || $is_f;
9408 # Check for exclusion based on flag1 and the previous token:
9410 if ( $flag1 eq 'k' ) { $match = $is_k }
9411 elsif ( $flag1 eq 'K' ) { $match = !$is_k }
9412 elsif ( $flag1 eq 'f' ) { $match = $is_f }
9413 elsif ( $flag1 eq 'F' ) { $match = !$is_f }
9414 elsif ( $flag1 eq 'w' ) { $match = $is_w }
9415 elsif ( $flag1 eq 'W' ) { $match = !$is_w }
9416 return $match if ($match);
9418 # Check for exclusion based on flag2 and the container contents
9419 # Current options to filter on contents:
9420 # 0 or blank: ignore container contents
9421 # 1 exclude non-lists or lists with sublists
9422 # 2 same as 1 but also exclude lists with code blocks
9425 # Containers with multiline-qw containers are automatically
9426 # excluded so do not need to be checked.
9429 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9431 my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
9432 my $has_list = $self->[_rhas_list_]->{$seqno};
9433 my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
9434 my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
9437 || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
9445 sub set_excluded_lp_containers {
9448 return unless ($rOpts_line_up_parentheses);
9449 my $rLL = $self->[_rLL_];
9450 return unless ( defined($rLL) && @{$rLL} );
9452 my $K_opening_container = $self->[_K_opening_container_];
9453 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
9455 foreach my $seqno ( keys %{$K_opening_container} ) {
9456 my $KK = $K_opening_container->{$seqno};
9457 next unless defined($KK);
9459 # code blocks are always excluded by the -lp coding so we can skip them
9460 next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
9462 # see if a user exclusion rule turns off -lp for this container
9463 if ( $self->is_excluded_lp($KK) ) {
9464 $ris_excluded_lp_container->{$seqno} = 1;
9470 ######################################
9471 # CODE SECTION 6: Process line-by-line
9472 ######################################
9474 sub process_all_lines {
9476 # Main loop over all lines of a file.
9477 # Lines are processed according to type.
9480 my $rlines = $self->[_rlines_];
9481 my $sink_object = $self->[_sink_object_];
9482 my $fh_tee = $self->[_fh_tee_];
9483 my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
9484 my $file_writer_object = $self->[_file_writer_object_];
9485 my $logger_object = $self->[_logger_object_];
9486 my $vertical_aligner_object = $self->[_vertical_aligner_object_];
9487 my $save_logfile = $self->[_save_logfile_];
9489 # Note for RT#118553, leave only one newline at the end of a file.
9490 # Example code to do this is in comments below:
9491 # my $Opt_trim_ending_blank_lines = 0;
9492 # if ($Opt_trim_ending_blank_lines) {
9493 # while ( my $line_of_tokens = pop @{$rlines} ) {
9494 # my $line_type = $line_of_tokens->{_line_type};
9495 # if ( $line_type eq 'CODE' ) {
9496 # my $CODE_type = $line_of_tokens->{_code_type};
9497 # next if ( $CODE_type eq 'BL' );
9499 # push @{$rlines}, $line_of_tokens;
9504 # But while this would be a trivial update, it would have very undesirable
9505 # side effects when perltidy is run from within an editor on a small snippet.
9506 # So this is best done with a separate filter, such
9507 # as 'delete_ending_blank_lines.pl' in the examples folder.
9509 # Flag to prevent blank lines when POD occurs in a format skipping sect.
9510 my $in_format_skipping_section;
9512 # set locations for blanks around long runs of keywords
9513 my $rwant_blank_line_after = $self->keyword_group_scan();
9516 my $i_last_POD_END = -10;
9518 foreach my $line_of_tokens ( @{$rlines} ) {
9521 # insert blank lines requested for keyword sequences
9523 && defined( $rwant_blank_line_after->{ $i - 1 } )
9524 && $rwant_blank_line_after->{ $i - 1 } == 1 )
9526 $self->want_blank_line();
9529 my $last_line_type = $line_type;
9530 $line_type = $line_of_tokens->{_line_type};
9531 my $input_line = $line_of_tokens->{_line_text};
9533 # _line_type codes are:
9534 # SYSTEM - system-specific code before hash-bang line
9535 # CODE - line of perl code (including comments)
9536 # POD_START - line starting pod, such as '=head'
9537 # POD - pod documentation text
9538 # POD_END - last line of pod section, '=cut'
9539 # HERE - text of here-document
9540 # HERE_END - last line of here-doc (target word)
9541 # FORMAT - format section
9542 # FORMAT_END - last line of format section, '.'
9543 # DATA_START - __DATA__ line
9544 # DATA - unidentified text following __DATA__
9545 # END_START - __END__ line
9546 # END - unidentified text following __END__
9547 # ERROR - we are in big trouble, probably not a perl script
9549 # put a blank line after an =cut which comes before __END__ and __DATA__
9550 # (required by podchecker)
9551 if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
9552 $i_last_POD_END = $i;
9553 $file_writer_object->reset_consecutive_blank_lines();
9554 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
9555 $self->want_blank_line();
9559 # handle line of code..
9560 if ( $line_type eq 'CODE' ) {
9562 my $CODE_type = $line_of_tokens->{_code_type};
9563 $in_format_skipping_section = $CODE_type eq 'FS';
9565 # Handle blank lines
9566 if ( $CODE_type eq 'BL' ) {
9568 # Keep this blank? Start with the flag -kbl=n, where
9569 # n=0 ignore all old blank lines
9570 # n=1 stable: keep old blanks, but limited by -mbl=n
9571 # n=2 keep all old blank lines, regardless of -mbl=n
9572 # If n=0 we delete all old blank lines and let blank line
9573 # rules generate any needed blank lines.
9574 my $kgb_keep = $rOpts_keep_old_blank_lines;
9576 # Then delete lines requested by the keyword-group logic if
9579 && defined( $rwant_blank_line_after->{$i} )
9580 && $rwant_blank_line_after->{$i} == 2 )
9585 # But always keep a blank line following an =cut
9586 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
9591 $self->flush($CODE_type);
9592 $file_writer_object->write_blank_code_line(
9593 $rOpts_keep_old_blank_lines == 2 );
9594 $self->[_last_line_leading_type_] = 'b';
9600 # Let logger see all non-blank lines of code. This is a slow operation
9601 # so we avoid it if it is not going to be saved.
9602 if ( $save_logfile && $logger_object ) {
9603 $logger_object->black_box( $line_of_tokens,
9604 $vertical_aligner_object->get_output_line_number );
9608 # Handle Format Skipping (FS) and Verbatim (VB) Lines
9609 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
9610 $self->write_unindented_line("$input_line");
9611 $file_writer_object->reset_consecutive_blank_lines();
9615 # Handle all other lines of code
9616 $self->process_line_of_CODE($line_of_tokens);
9619 # handle line of non-code..
9624 if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
9626 # Pod docs should have a preceding blank line. But stay
9627 # out of __END__ and __DATA__ sections, because
9628 # the user may be using this section for any purpose whatsoever
9629 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
9630 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
9632 && !$in_format_skipping_section
9633 && $line_type eq 'POD_START'
9634 && !$self->[_saw_END_or_DATA_] )
9636 $self->want_blank_line();
9640 # leave the blank counters in a predictable state
9641 # after __END__ or __DATA__
9642 elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
9643 $file_writer_object->reset_consecutive_blank_lines();
9644 $self->[_saw_END_or_DATA_] = 1;
9647 # write unindented non-code line
9648 if ( !$skip_line ) {
9649 $self->write_unindented_line($input_line);
9655 } ## end sub process_all_lines
9657 sub keyword_group_scan {
9660 # Called once per file to process the --keyword-group-blanks-* parameters.
9662 # Manipulate blank lines around keyword groups (kgb* flags)
9663 # Scan all lines looking for runs of consecutive lines beginning with
9664 # selected keywords. Example keywords are 'my', 'our', 'local', ... but
9665 # they may be anything. We will set flags requesting that blanks be
9666 # inserted around and within them according to input parameters. Note
9667 # that we are scanning the lines as they came in in the input stream, so
9668 # they are not necessarily well formatted.
9670 # The output of this sub is a return hash ref whose keys are the indexes of
9671 # lines after which we desire a blank line. For line index i:
9672 # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
9673 # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
9674 my $rhash_of_desires = {};
9676 # Nothing to do if no blanks can be output. This test added to fix
9678 if ( !$rOpts_maximum_consecutive_blank_lines ) {
9679 return $rhash_of_desires;
9682 my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
9683 my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
9684 my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
9685 my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
9686 my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
9688 # A range of sizes can be input with decimal notation like 'min.max' with
9689 # any number of dots between the two numbers. Examples:
9690 # string => min max matches
9693 # 1..3 1 3 1,2, or 3
9698 my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
9699 if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
9700 || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
9703 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
9704 ignoring all -kgb flags
9707 # Turn this option off so that this message does not keep repeating
9708 # during iterations and other files.
9709 $rOpts->{'keyword-group-blanks-size'} = "";
9710 return $rhash_of_desires;
9712 $Opt_size_min = 1 unless ($Opt_size_min);
9714 if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
9715 return $rhash_of_desires;
9718 # codes for $Opt_blanks_before and $Opt_blanks_after:
9719 # 0 = never (delete if exist)
9720 # 1 = stable (keep unchanged)
9721 # 2 = always (insert if missing)
9723 return $rhash_of_desires
9724 unless $Opt_size_min > 0
9725 && ( $Opt_blanks_before != 1
9726 || $Opt_blanks_after != 1
9727 || $Opt_blanks_inside
9728 || $Opt_blanks_delete );
9730 my $Opt_pattern = $keyword_group_list_pattern;
9731 my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
9732 my $Opt_repeat_count =
9733 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
9735 my $rlines = $self->[_rlines_];
9736 my $rLL = $self->[_rLL_];
9737 my $K_closing_container = $self->[_K_closing_container_];
9739 # variables for the current group and subgroups:
9740 my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
9744 # ($ibeg, $iend) = starting and ending line indexes of this entire group
9745 # $count = total number of keywords seen in this entire group
9746 # $level_beg = indententation level of this group
9747 # @group = [ $i, $token, $count ] =list of all keywords & blanks
9748 # @subgroup = $j, index of group where token changes
9749 # @iblanks = line indexes of blank lines in input stream in this group
9750 # where i=starting line index
9751 # token (the keyword)
9752 # count = number of this token in this subgroup
9753 # j = index in group where token changes
9755 # These vars will contain values for the most recently seen line:
9756 my ( $line_type, $CODE_type, $K_first, $K_last );
9758 my $number_of_groups_seen = 0;
9760 ####################
9761 # helper subroutines
9762 ####################
9764 my $insert_blank_after = sub {
9766 $rhash_of_desires->{$i} = 1;
9768 if ( defined( $rhash_of_desires->{$ip} )
9769 && $rhash_of_desires->{$ip} == 2 )
9771 $rhash_of_desires->{$ip} = 0;
9776 my $split_into_sub_groups = sub {
9778 # place blanks around long sub-groups of keywords
9780 return unless ($Opt_blanks_inside);
9782 # loop over sub-groups, index k
9783 push @subgroup, scalar @group;
9785 my $kend = @subgroup - 1;
9786 for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
9788 # index j runs through all keywords found
9789 my $j_b = $subgroup[ $k - 1 ];
9790 my $j_e = $subgroup[$k] - 1;
9792 # index i is the actual line number of a keyword
9793 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
9794 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
9795 my $num = $count_e - $count_b + 1;
9797 # This subgroup runs from line $ib to line $ie-1, but may contain
9799 if ( $num >= $Opt_size_min ) {
9801 # if there are blank lines, we require that at least $num lines
9802 # be non-blank up to the boundary with the next subgroup.
9803 my $nog_b = my $nog_e = 1;
9804 if ( @iblanks && !$Opt_blanks_delete ) {
9805 my $j_bb = $j_b + $num - 1;
9806 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
9807 $nog_b = $count_bb - $count_b + 1 == $num;
9809 my $j_ee = $j_e - ( $num - 1 );
9810 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
9811 $nog_e = $count_e - $count_ee + 1 == $num;
9813 if ( $nog_b && $k > $kbeg ) {
9814 $insert_blank_after->( $i_b - 1 );
9816 if ( $nog_e && $k < $kend ) {
9817 my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
9818 $insert_blank_after->( $i_ep - 1 );
9824 my $delete_if_blank = sub {
9827 # delete line $i if it is blank
9828 return unless ( $i >= 0 && $i < @{$rlines} );
9829 my $line_type = $rlines->[$i]->{_line_type};
9830 return if ( $line_type ne 'CODE' );
9831 my $code_type = $rlines->[$i]->{_code_type};
9832 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
9836 my $delete_inner_blank_lines = sub {
9838 # always remove unwanted trailing blank lines from our list
9839 return unless (@iblanks);
9840 while ( my $ibl = pop(@iblanks) ) {
9841 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
9845 # now mark mark interior blank lines for deletion if requested
9846 return unless ($Opt_blanks_delete);
9848 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
9852 my $end_group = sub {
9854 # end a group of keywords
9855 my ($bad_ending) = @_;
9856 if ( defined($ibeg) && $ibeg >= 0 ) {
9858 # then handle sufficiently large groups
9859 if ( $count >= $Opt_size_min ) {
9861 $number_of_groups_seen++;
9863 # do any blank deletions regardless of the count
9864 $delete_inner_blank_lines->();
9867 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
9869 # patch for hash bang line which is not currently marked as
9870 # a comment; mark it as a comment
9871 if ( $ibeg == 1 && !$code_type ) {
9872 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
9874 if ( $line_text && $line_text =~ /^#/ );
9877 # Do not insert a blank after a comment
9878 # (this could be subject to a flag in the future)
9879 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
9880 if ( $Opt_blanks_before == INSERT ) {
9881 $insert_blank_after->( $ibeg - 1 );
9884 elsif ( $Opt_blanks_before == DELETE ) {
9885 $delete_if_blank->( $ibeg - 1 );
9890 # We will only put blanks before code lines. We could loosen
9891 # this rule a little, but we have to be very careful because
9892 # for example we certainly don't want to drop a blank line
9893 # after a line like this:
9895 if ( $line_type eq 'CODE' && defined($K_first) ) {
9897 # - Do not put a blank before a line of different level
9898 # - Do not put a blank line if we ended the search badly
9899 # - Do not put a blank at the end of the file
9900 # - Do not put a blank line before a hanging side comment
9901 my $level = $rLL->[$K_first]->[_LEVEL_];
9902 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
9904 if ( $level == $level_beg
9907 && $iend < @{$rlines}
9908 && $CODE_type ne 'HSC' )
9910 if ( $Opt_blanks_after == INSERT ) {
9911 $insert_blank_after->($iend);
9913 elsif ( $Opt_blanks_after == DELETE ) {
9914 $delete_if_blank->( $iend + 1 );
9919 $split_into_sub_groups->();
9922 # reset for another group
9932 my $find_container_end = sub {
9934 # If the keyword lines ends with an open token, find the closing token
9935 # '$K_closing' so that we can easily skip past the contents of the
9937 return if ( $K_last <= $K_first );
9939 my $type_last = $rLL->[$KK]->[_TYPE_];
9940 my $tok_last = $rLL->[$KK]->[_TOKEN_];
9941 if ( $type_last eq '#' ) {
9942 $KK = $self->K_previous_nonblank($KK);
9943 $tok_last = $rLL->[$KK]->[_TOKEN_];
9945 if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
9947 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9948 my $lev = $rLL->[$KK]->[_LEVEL_];
9949 if ( $lev == $level_beg ) {
9950 $K_closing = $K_closing_container->{$type_sequence};
9955 my $add_to_group = sub {
9956 my ( $i, $token, $level ) = @_;
9958 # End the previous group if we have reached the maximum
9960 if ( $Opt_size_max && @group >= $Opt_size_max ) {
9964 if ( @group == 0 ) {
9966 $level_beg = $level;
9974 if ( !@group || $token ne $group[-1]->[1] ) {
9975 push @subgroup, scalar(@group);
9977 push @group, [ $i, $token, $count ];
9979 # remember if this line ends in an open container
9980 $find_container_end->();
9985 ###################################
9986 # loop over all lines of the source
9987 ###################################
9990 foreach my $line_of_tokens ( @{$rlines} ) {
9994 if ( $Opt_repeat_count > 0
9995 && $number_of_groups_seen >= $Opt_repeat_count );
10000 $line_type = $line_of_tokens->{_line_type};
10002 # always end a group at non-CODE
10003 if ( $line_type ne 'CODE' ) { $end_group->(); next }
10005 $CODE_type = $line_of_tokens->{_code_type};
10007 # end any group at a format skipping line
10008 if ( $CODE_type && $CODE_type eq 'FS' ) {
10013 # continue in a verbatim (VB) type; it may be quoted text
10014 if ( $CODE_type eq 'VB' ) {
10015 if ( $ibeg >= 0 ) { $iend = $i; }
10019 # and continue in blank (BL) types
10020 if ( $CODE_type eq 'BL' ) {
10021 if ( $ibeg >= 0 ) {
10023 push @{iblanks}, $i;
10025 # propagate current subgroup token
10026 my $tok = $group[-1]->[1];
10027 push @group, [ $i, $tok, $count ];
10032 # examine the first token of this line
10033 my $rK_range = $line_of_tokens->{_rK_range};
10034 ( $K_first, $K_last ) = @{$rK_range};
10035 if ( !defined($K_first) ) {
10037 # Somewhat unexpected blank line..
10038 # $rK_range is normally defined for line type CODE, but this can
10039 # happen for example if the input line was a single semicolon which
10040 # is being deleted. In that case there was code in the input
10041 # file but it is not being retained. So we can silently return.
10042 return $rhash_of_desires;
10045 # This is not for keywords in lists ( keyword 'my' can occur in lists,
10047 next if ( $self->is_list_by_K($K_first) );
10049 my $level = $rLL->[$K_first]->[_LEVEL_];
10050 my $type = $rLL->[$K_first]->[_TYPE_];
10051 my $token = $rLL->[$K_first]->[_TOKEN_];
10052 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
10054 # see if this is a code type we seek (i.e. comment)
10056 && $Opt_comment_pattern
10057 && $CODE_type =~ /$Opt_comment_pattern/ )
10060 my $tok = $CODE_type;
10062 # Continuing a group
10063 if ( $ibeg >= 0 && $level == $level_beg ) {
10064 $add_to_group->( $i, $tok, $level );
10070 # first end old group if any; we might be starting new
10071 # keywords at different level
10072 if ( $ibeg > 0 ) { $end_group->(); }
10073 $add_to_group->( $i, $tok, $level );
10078 # See if it is a keyword we seek, but never start a group in a
10079 # continuation line; the code may be badly formatted.
10080 if ( $ci_level == 0
10082 && $token =~ /$Opt_pattern/ )
10085 # Continuing a keyword group
10086 if ( $ibeg >= 0 && $level == $level_beg ) {
10087 $add_to_group->( $i, $token, $level );
10090 # Start new keyword group
10093 # first end old group if any; we might be starting new
10094 # keywords at different level
10095 if ( $ibeg > 0 ) { $end_group->(); }
10096 $add_to_group->( $i, $token, $level );
10101 # This is not one of our keywords, but we are in a keyword group
10102 # so see if we should continue or quit
10103 elsif ( $ibeg >= 0 ) {
10105 # - bail out on a large level change; we may have walked into a
10106 # data structure or anoymous sub code.
10107 if ( $level > $level_beg + 1 || $level < $level_beg ) {
10112 # - keep going on a continuation line of the same level, since
10113 # it is probably a continuation of our previous keyword,
10114 # - and keep going past hanging side comments because we never
10115 # want to interrupt them.
10116 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
10117 || $CODE_type eq 'HSC' )
10123 # - continue if if we are within in a container which started with
10124 # the line of the previous keyword.
10125 if ( defined($K_closing) && $K_first <= $K_closing ) {
10127 # continue if entire line is within container
10128 if ( $K_last <= $K_closing ) { $iend = $i; next }
10130 # continue at ); or }; or ];
10131 my $KK = $K_closing + 1;
10132 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
10133 if ( $KK < $K_last ) {
10134 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
10135 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
10148 # - end the group if none of the above
10153 # not in a keyword group; continue
10157 # end of loop over all lines
10159 return $rhash_of_desires;
10161 } ## end sub keyword_group_scan
10163 #######################################
10164 # CODE SECTION 7: Process lines of code
10165 #######################################
10167 { ## begin closure process_line_of_CODE
10169 # The routines in this closure receive lines of code and combine them into
10170 # 'batches' and send them along. A 'batch' is the unit of code which can be
10171 # processed further as a unit. It has the property that it is the largest
10172 # amount of code into which which perltidy is free to place one or more
10173 # line breaks within it without violating any constraints.
10175 # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
10177 # flags needed by the store routine
10178 my $line_of_tokens;
10179 my $no_internal_newlines;
10180 my $side_comment_follows;
10183 # range of K of tokens for the current line
10184 my ( $K_first, $K_last );
10186 my ( $rLL, $radjusted_levels );
10188 # past stored nonblank tokens
10190 $last_last_nonblank_token, $last_last_nonblank_type,
10191 $last_nonblank_token, $last_nonblank_type,
10192 $last_nonblank_block_type, $K_last_nonblank_code,
10193 $K_last_last_nonblank_code, $looking_for_else,
10194 $is_static_block_comment, $batch_CODE_type,
10195 $last_line_had_side_comment,
10198 # Called once at the start of a new file
10199 sub initialize_process_line_of_CODE {
10200 $last_nonblank_token = ';';
10201 $last_nonblank_type = ';';
10202 $last_last_nonblank_token = ';';
10203 $last_last_nonblank_type = ';';
10204 $last_nonblank_block_type = "";
10205 $K_last_nonblank_code = undef;
10206 $K_last_last_nonblank_code = undef;
10207 $looking_for_else = 0;
10208 $is_static_block_comment = 0;
10209 $batch_CODE_type = "";
10210 $last_line_had_side_comment = 0;
10214 # Batch variables: these describe the current batch of code being formed
10215 # and sent down the pipeline. They are initialized in the next
10217 my ( $rbrace_follower, $index_start_one_line_block,
10218 $semicolons_before_block_self_destruct,
10219 $starting_in_quote, $ending_in_quote, );
10221 # Called before the start of each new batch
10222 sub initialize_batch_variables {
10224 $max_index_to_go = UNDEFINED_INDEX;
10225 @summed_lengths_to_go = @nesting_depth_to_go = (0);
10227 # The initialization code for the remaining batch arrays is as follows
10228 # and can be activated for testing. But profiling shows that it is
10229 # time-consuming to re-initialize the batch arrays and is not necessary
10230 # because the maximum valid token, $max_index_to_go, is carefully
10231 # controlled. This means however that it is not possible to do any
10232 # type of filter or map operation directly on these arrays. And it is
10233 # not possible to use negative indexes. As a precaution against program
10234 # changes which might do this, sub pad_array_to_go adds some undefs at
10235 # the end of the current batch of data.
10237 # So 'long story short': this is a waste of time
10239 @block_type_to_go = ();
10240 @type_sequence_to_go = ();
10241 @bond_strength_to_go = ();
10242 @forced_breakpoint_to_go = ();
10243 @token_lengths_to_go = ();
10244 @levels_to_go = ();
10245 @mate_index_to_go = ();
10246 @ci_levels_to_go = ();
10247 @nobreak_to_go = ();
10248 @old_breakpoint_to_go = ();
10249 @tokens_to_go = ();
10252 @leading_spaces_to_go = ();
10253 @reduced_spaces_to_go = ();
10256 @parent_seqno_to_go = ();
10259 $rbrace_follower = undef;
10260 $ending_in_quote = 0;
10261 destroy_one_line_block();
10265 sub leading_spaces_to_go {
10267 # return the number of indentation spaces for a token in the output
10268 # stream; these were previously stored by 'set_leading_whitespace'.
10271 return 0 if ( $ii < 0 );
10272 my $indentation = $leading_spaces_to_go[$ii];
10273 return ref($indentation) ? $indentation->get_spaces() : $indentation;
10276 sub create_one_line_block {
10277 ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
10282 sub destroy_one_line_block {
10283 $index_start_one_line_block = UNDEFINED_INDEX;
10284 $semicolons_before_block_self_destruct = 0;
10288 # Routine to place the current token into the output stream.
10289 # Called once per output token.
10291 use constant DEBUG_STORE => 0;
10293 sub store_token_to_go {
10295 my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
10297 # Add one token to the next batch.
10298 # $Ktoken_vars = the index K in the global token array
10299 # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
10300 # unless they are temporarily being overridden
10302 # NOTE: This routine needs to be coded efficiently because it is called
10303 # once per token. I have gotten it down from the second slowest to the
10304 # eighth slowest, but that still seems rather slow for what it does.
10306 # This closure variable has already been defined, for efficiency:
10307 # my $radjusted_levels = $self->[_radjusted_levels_];
10309 my $type = $rtoken_vars->[_TYPE_];
10311 # Check for emergency flush...
10312 # The K indexes in the batch must always be a continuous sequence of
10313 # the global token array. The batch process programming assumes this.
10314 # If storing this token would cause this relation to fail we must dump
10315 # the current batch before storing the new token. It is extremely rare
10316 # for this to happen. One known example is the following two-line
10317 # snippet when run with parameters
10318 # --noadd-newlines --space-terminal-semicolon:
10319 # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
10321 if ( $max_index_to_go >= 0 ) {
10322 my $Klast = $K_to_go[$max_index_to_go];
10323 if ( $Ktoken_vars != $Klast + 1 ) {
10324 $self->flush_batch_of_CODE();
10327 # Do not output consecutive blank tokens ... this should not
10328 # happen, but it is worth checking. Later code can then make the
10329 # simplifying assumption that blank tokens are not consecutive.
10330 elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
10335 # Do not start a batch with a blank token.
10336 # Fixes cases b149 b888 b984 b985 b986 b987
10338 if ( $type eq 'b' ) { return }
10341 ++$max_index_to_go;
10342 $batch_CODE_type = $CODE_type;
10343 $K_to_go[$max_index_to_go] = $Ktoken_vars;
10344 $types_to_go[$max_index_to_go] = $type;
10346 $old_breakpoint_to_go[$max_index_to_go] = 0;
10347 $forced_breakpoint_to_go[$max_index_to_go] = 0;
10348 $mate_index_to_go[$max_index_to_go] = -1;
10350 my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
10351 my $ci_level = $ci_levels_to_go[$max_index_to_go] =
10352 $rtoken_vars->[_CI_LEVEL_];
10354 # Clip levels to zero if there are level errors in the file.
10355 # We had to wait until now for reasons explained in sub 'write_line'.
10356 my $level = $rtoken_vars->[_LEVEL_];
10357 if ( $level < 0 ) { $level = 0 }
10358 $levels_to_go[$max_index_to_go] = $level;
10360 $nesting_depth_to_go[$max_index_to_go] = $rtoken_vars->[_SLEVEL_];
10361 $block_type_to_go[$max_index_to_go] = $rtoken_vars->[_BLOCK_TYPE_];
10362 $type_sequence_to_go[$max_index_to_go] =
10363 $rtoken_vars->[_TYPE_SEQUENCE_];
10365 $nobreak_to_go[$max_index_to_go] =
10366 $side_comment_follows ? 2 : $no_internal_newlines;
10368 my $length = $rtoken_vars->[_TOKEN_LENGTH_];
10370 # Safety check that length is defined. Should not be needed now.
10371 # Former patch for indent-only, in which the entire set of tokens is
10372 # turned into type 'q'. Lengths may have not been defined because sub
10373 # 'respace_tokens' is bypassed. We do not need lengths in this case,
10374 # but we will use the character count to have a defined value. In the
10375 # future, it would be nicer to have 'respace_tokens' convert the lines
10376 # to quotes and get correct lengths.
10377 if ( !defined($length) ) { $length = length($token) }
10379 $token_lengths_to_go[$max_index_to_go] = $length;
10381 # We keep a running sum of token lengths from the start of this batch:
10382 # summed_lengths_to_go[$i] = total length to just before token $i
10383 # summed_lengths_to_go[$i+1] = total length to just after token $i
10384 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
10385 $summed_lengths_to_go[$max_index_to_go] + $length;
10387 my $in_continued_quote =
10388 ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
10389 if ( $max_index_to_go == 0 ) {
10390 $starting_in_quote = $in_continued_quote;
10393 # Define the indentation that this token will have in two cases:
10394 # Without CI = reduced_spaces_to_go
10395 # With CI = leading_spaces_to_go
10396 if ($in_continued_quote) {
10397 $leading_spaces_to_go[$max_index_to_go] = 0;
10398 $reduced_spaces_to_go[$max_index_to_go] = 0;
10401 $reduced_spaces_to_go[$max_index_to_go] = my $reduced_spaces =
10402 $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
10403 $leading_spaces_to_go[$max_index_to_go] =
10404 $reduced_spaces + $rOpts_continuation_indentation * $ci_level;
10407 # Correct these values if -lp is used
10408 if ($rOpts_line_up_parentheses) {
10409 $self->set_leading_whitespace( $Ktoken_vars, $K_last_nonblank_code,
10410 $K_last_last_nonblank_code, $level, $ci_level,
10411 $in_continued_quote );
10414 DEBUG_STORE && do {
10415 my ( $a, $b, $c ) = caller();
10417 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
10422 sub flush_batch_of_CODE {
10424 # Finish any batch packaging and call the process routine.
10425 # This must be the only call to grind_batch_of_CODE()
10428 return unless ( $max_index_to_go >= 0 );
10430 # Create an array to hold variables for this batch
10431 my $this_batch = [];
10432 $this_batch->[_starting_in_quote_] = $starting_in_quote;
10433 $this_batch->[_ending_in_quote_] = $ending_in_quote;
10434 $this_batch->[_max_index_to_go_] = $max_index_to_go;
10435 $this_batch->[_rK_to_go_] = \@K_to_go;
10436 $this_batch->[_batch_CODE_type_] = $batch_CODE_type;
10438 # The flag $is_static_block_comment applies to the line which just
10439 # arrived. So it only applies if we are outputting that line.
10440 $this_batch->[_is_static_block_comment_] =
10442 && $max_index_to_go == 0
10443 && $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
10445 $self->[_this_batch_] = $this_batch;
10447 $last_line_had_side_comment =
10448 $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
10450 $self->grind_batch_of_CODE();
10452 # Done .. this batch is history
10453 $self->[_this_batch_] = [];
10455 initialize_batch_variables();
10456 initialize_forced_breakpoint_vars();
10457 initialize_gnu_batch_vars()
10458 if $rOpts_line_up_parentheses;
10465 # end the current batch, EXCEPT for a few special cases
10468 # Exception 1: Do not end line in a weld
10470 if ( $total_weld_count
10471 && $self->is_welded_right_at_i($max_index_to_go) );
10473 # Exception 2: just set a tentative breakpoint if we might be in a
10475 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10476 $self->set_forced_breakpoint($max_index_to_go);
10480 $self->flush_batch_of_CODE();
10484 sub flush_vertical_aligner {
10486 my $vao = $self->[_vertical_aligner_object_];
10491 # flush is called to output any tokens in the pipeline, so that
10492 # an alternate source of lines can be written in the correct order
10494 my ( $self, $CODE_type ) = @_;
10496 # end the current batch with 1 exception
10498 destroy_one_line_block();
10500 # Exception: if we are flushing within the code stream only to insert
10501 # blank line(s), then we can keep the batch intact at a weld. This
10502 # improves formatting of -ce. See test 'ce1.ce'
10503 if ( $CODE_type && $CODE_type eq 'BL' ) { $self->end_batch() }
10505 # otherwise, we have to shut things down completely.
10506 else { $self->flush_batch_of_CODE() }
10508 $self->flush_vertical_aligner();
10512 sub process_line_of_CODE {
10514 my ( $self, $my_line_of_tokens ) = @_;
10516 # This routine is called once per INPUT line to process all of the
10517 # tokens on that line.
10519 # It outputs full-line comments and blank lines immediately.
10521 # The tokens are copied one-by-one from the global token array $rLL to
10522 # a set of '_to_go' arrays which collect batches of tokens for a
10523 # further processing via calls to 'sub store_token_to_go', until a well
10524 # defined 'structural' break point* or 'forced' breakpoint* is reached.
10525 # Then, the batch of collected '_to_go' tokens is passed along to 'sub
10526 # grind_batch_of_CODE' for further processing.
10528 # * 'structural' break points are basically line breaks corresponding
10529 # to code blocks. An example is a chain of if-elsif-else statements,
10530 # which should typically be broken at the opening and closing braces.
10532 # * 'forced' break points are breaks required by side comments or by
10533 # special user controls.
10535 # So this routine is just making an initial set of required line
10536 # breaks, basically regardless of the maximum requested line length.
10537 # The subsequent stage of formating make additional line breaks
10538 # appropriate for lists and logical structures, and to keep line
10539 # lengths below the requested maximum line length.
10541 $line_of_tokens = $my_line_of_tokens;
10542 $CODE_type = $line_of_tokens->{_code_type};
10543 my $input_line_number = $line_of_tokens->{_line_number};
10544 my $input_line = $line_of_tokens->{_line_text};
10546 # initialize closure variables
10547 my $rK_range = $line_of_tokens->{_rK_range};
10548 ( $K_first, $K_last ) = @{$rK_range};
10550 # remember original starting index in case it changes
10551 my $K_first_true = $K_first;
10553 $rLL = $self->[_rLL_];
10554 $radjusted_levels = $self->[_radjusted_levels_];
10556 my $file_writer_object = $self->[_file_writer_object_];
10557 my $rbreak_container = $self->[_rbreak_container_];
10558 my $rshort_nested = $self->[_rshort_nested_];
10559 my $sink_object = $self->[_sink_object_];
10560 my $fh_tee = $self->[_fh_tee_];
10561 my $ris_bli_container = $self->[_ris_bli_container_];
10562 my $rK_weld_left = $self->[_rK_weld_left_];
10564 if ( !defined($K_first) ) {
10566 # Empty line: This can happen if tokens are deleted, for example
10567 # with the -mangle parameter
10571 $no_internal_newlines = 0;
10572 if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
10573 $no_internal_newlines = 2;
10576 $side_comment_follows = 0;
10578 ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
10579 my $is_static_block_comment_without_leading_space =
10580 $CODE_type eq 'SBCX';
10581 $is_static_block_comment =
10582 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
10583 my $is_hanging_side_comment = $CODE_type eq 'HSC';
10584 my $is_VERSION_statement = $CODE_type eq 'VER';
10586 if ($is_VERSION_statement) {
10587 $self->[_saw_VERSION_in_this_file_] = 1;
10588 $no_internal_newlines = 2;
10591 # Add interline blank if any
10592 my $last_old_nonblank_type = "b";
10593 my $first_new_nonblank_token = "";
10594 if ( $max_index_to_go >= 0 ) {
10595 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
10596 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
10598 && $types_to_go[$max_index_to_go] ne 'b'
10600 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
10606 my $rtok_first = $rLL->[$K_first];
10608 my $in_quote = $line_of_tokens->{_ending_in_quote};
10609 $ending_in_quote = $in_quote;
10610 my $guessed_indentation_level =
10611 $line_of_tokens->{_guessed_indentation_level};
10613 ######################################
10614 # Handle a block (full-line) comment..
10615 ######################################
10618 if ( $rOpts->{'delete-block-comments'} ) {
10623 destroy_one_line_block();
10624 $self->end_batch();
10626 # output a blank line before block comments
10628 # unless we follow a blank or comment line
10629 $self->[_last_line_leading_type_] ne '#'
10630 && $self->[_last_line_leading_type_] ne 'b'
10633 && $rOpts->{'blanks-before-comments'}
10635 # if this is NOT an empty comment, unless it follows a side
10636 # comment and could become a hanging side comment.
10638 $rtok_first->[_TOKEN_] ne '#'
10639 || ( $last_line_had_side_comment
10640 && $rLL->[$K_first]->[_LEVEL_] > 0 )
10643 # not after a short line ending in an opening token
10644 # because we already have space above this comment.
10645 # Note that the first comment in this if block, after
10646 # the 'if (', does not get a blank line because of this.
10647 && !$self->[_last_output_short_opening_token_]
10649 # never before static block comments
10650 && !$is_static_block_comment
10653 $self->flush(); # switching to new output stream
10654 $file_writer_object->write_blank_code_line();
10655 $self->[_last_line_leading_type_] = 'b';
10659 $rOpts->{'indent-block-comments'}
10660 && ( !$rOpts->{'indent-spaced-block-comments'}
10661 || $input_line =~ /^\s+/ )
10662 && !$is_static_block_comment_without_leading_space
10665 my $Ktoken_vars = $K_first;
10666 my $rtoken_vars = $rLL->[$Ktoken_vars];
10667 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10668 $self->end_batch();
10672 # switching to new output stream
10675 # Note that last arg in call here is 'undef' for comments
10676 $file_writer_object->write_code_line(
10677 $rtok_first->[_TOKEN_] . "\n", undef );
10678 $self->[_last_line_leading_type_] = '#';
10683 # compare input/output indentation except for continuation lines
10684 # (because they have an unknown amount of initial blank space)
10685 # and lines which are quotes (because they may have been outdented)
10686 $self->compare_indentation_levels( $K_first, $guessed_indentation_level,
10687 $input_line_number )
10688 unless ( $is_hanging_side_comment
10689 || $rtok_first->[_CI_LEVEL_] > 0
10690 || $guessed_indentation_level == 0
10691 && $rtok_first->[_TYPE_] eq 'Q' );
10693 ##########################
10694 # Handle indentation-only
10695 ##########################
10697 # NOTE: In previous versions we sent all qw lines out immediately here.
10698 # No longer doing this: also write a line which is entirely a 'qw' list
10699 # to allow stacking of opening and closing tokens. Note that interior
10700 # qw lines will still go out at the end of this routine.
10701 if ( $CODE_type eq 'IO' ) {
10703 my $line = $input_line;
10705 # Fix for rt #125506 Unexpected string formating
10706 # in which leading space of a terminal quote was removed
10708 $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
10710 my $Ktoken_vars = $K_first;
10712 # We work with a copy of the token variables and change the
10713 # first token to be the entire line as a quote variable
10714 my $rtoken_vars = $rLL->[$Ktoken_vars];
10715 $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
10717 # Patch: length is not really important here
10718 $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
10720 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10721 $self->end_batch();
10725 ############################
10726 # Handle all other lines ...
10727 ############################
10729 # If we just saw the end of an elsif block, write nag message
10730 # if we do not see another elseif or an else.
10731 if ($looking_for_else) {
10733 unless ( $rLL->[$K_first]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
10734 write_logfile_entry("(No else block)\n");
10736 $looking_for_else = 0;
10739 # This is a good place to kill incomplete one-line blocks
10742 ( $semicolons_before_block_self_destruct == 0 )
10743 && ( $max_index_to_go >= 0 )
10744 && ( $last_old_nonblank_type eq ';' )
10745 && ( $first_new_nonblank_token ne '}' )
10748 # Patch for RT #98902. Honor request to break at old commas.
10749 || ( $rOpts_break_at_old_comma_breakpoints
10750 && $max_index_to_go >= 0
10751 && $last_old_nonblank_type eq ',' )
10754 $forced_breakpoint_to_go[$max_index_to_go] = 1
10755 if ($rOpts_break_at_old_comma_breakpoints);
10756 destroy_one_line_block();
10757 $self->end_batch();
10760 # Keep any requested breaks before this line. Note that we have to
10761 # use the original K_first because it may have been reduced above
10762 # to add a blank. The value of the flag is as follows:
10763 # 1 => hard break, flush the batch
10764 # 2 => soft break, set breakpoint and continue building the batch
10765 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
10766 destroy_one_line_block();
10767 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
10768 $self->set_forced_breakpoint($max_index_to_go);
10771 $self->end_batch();
10775 # loop to process the tokens one-by-one
10777 # We do not want a leading blank if the previous batch just got output
10778 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
10782 foreach my $Ktoken_vars ( $K_first .. $K_last ) {
10784 my $rtoken_vars = $rLL->[$Ktoken_vars];
10785 my $token = $rtoken_vars->[_TOKEN_];
10786 my $type = $rtoken_vars->[_TYPE_];
10787 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
10788 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
10790 # If we are continuing after seeing a right curly brace, flush
10791 # buffer unless we see what we are looking for, as in
10793 if ( $rbrace_follower && $type ne 'b' ) {
10795 unless ( $rbrace_follower->{$token} ) {
10796 $self->end_batch();
10798 $rbrace_follower = undef;
10801 # Get next nonblank on this line
10802 my $next_nonblank_token = '';
10803 my $next_nonblank_token_type = 'b';
10804 if ( $Ktoken_vars < $K_last ) {
10805 my $Knnb = $Ktoken_vars + 1;
10806 if ( $rLL->[$Knnb]->[_TYPE_] eq 'b'
10807 && $Knnb < $K_last )
10811 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
10812 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
10815 # Do not allow breaks which would promote a side comment to a
10816 # block comment. In order to allow a break before an opening
10817 # or closing BLOCK, followed by a side comment, those sections
10818 # of code will handle this flag separately.
10819 $side_comment_follows = ( $next_nonblank_token_type eq '#' );
10820 my $is_opening_BLOCK =
10824 && !$rshort_nested->{$type_sequence}
10825 && $block_type ne 't' );
10826 my $is_closing_BLOCK =
10830 && !$rshort_nested->{$type_sequence}
10831 && $block_type ne 't' );
10833 if ( $side_comment_follows
10834 && !$is_opening_BLOCK
10835 && !$is_closing_BLOCK )
10837 $no_internal_newlines = 1;
10840 # We're only going to handle breaking for code BLOCKS at this
10841 # (top) level. Other indentation breaks will be handled by
10842 # sub scan_list, which is better suited to dealing with them.
10843 if ($is_opening_BLOCK) {
10845 # Tentatively output this token. This is required before
10846 # calling starting_one_line_block. We may have to unstore
10847 # it, though, if we have to break before it.
10848 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10850 # Look ahead to see if we might form a one-line block..
10852 $self->starting_one_line_block( $Ktoken_vars,
10853 $K_last_nonblank_code, $K_last );
10854 $self->clear_breakpoint_undo_stack();
10856 # to simplify the logic below, set a flag to indicate if
10857 # this opening brace is far from the keyword which introduces it
10858 my $keyword_on_same_line = 1;
10860 $max_index_to_go >= 0
10861 && $last_nonblank_type eq ')'
10862 && ( ( $rtoken_vars->[_SLEVEL_] < $nesting_depth_to_go[0] )
10866 $keyword_on_same_line = 0;
10869 # decide if user requested break before '{'
10872 # This test was added to minimize changes in -bl formatting
10873 # caused by other changes to fix cases b562 .. b983
10874 # Previously, the -bl flag was being applied almost randomly
10875 # to sort/map/grep/eval blocks, depending on if they were
10876 # flagged as possible one-line blocks. usually time they
10877 # were not given -bl formatting. The following flag was
10878 # added to minimize changes to existing formatting.
10879 $is_braces_left_exclude_block{$block_type}
10882 # use -bl flag if not a sub block of any type
10883 : $block_type !~ /$ANYSUB_PATTERN/
10884 ? $rOpts->{'opening-brace-on-new-line'}
10886 # use -sbl flag for a named sub block
10887 : $block_type !~ /$ASUB_PATTERN/
10888 ? $rOpts->{'opening-sub-brace-on-new-line'}
10890 # use -asbl flag for an anonymous sub block
10891 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
10893 # Break if requested with -bli flag
10894 $want_break ||= $ris_bli_container->{$type_sequence};
10896 # Do not break if this token is welded to the left
10897 if ( $total_weld_count
10898 && defined( $rK_weld_left->{$Ktoken_vars} ) )
10903 # Break before an opening '{' ...
10909 # and we were unable to start looking for a block,
10910 && $index_start_one_line_block == UNDEFINED_INDEX
10912 # or if it will not be on same line as its keyword, so that
10913 # it will be outdented (eval.t, overload.t), and the user
10914 # has not insisted on keeping it on the right
10915 || ( !$keyword_on_same_line
10916 && !$rOpts->{'opening-brace-always-on-right'} )
10920 # but only if allowed
10921 unless ($no_internal_newlines) {
10923 # since we already stored this token, we must unstore it
10924 $self->unstore_token_to_go();
10926 # then output the line
10927 $self->end_batch();
10929 # and now store this token at the start of a new line
10930 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10934 # Now update for side comment
10935 if ($side_comment_follows) { $no_internal_newlines = 1 }
10937 # now output this line
10938 unless ($no_internal_newlines) {
10939 $self->end_batch();
10943 elsif ($is_closing_BLOCK) {
10945 # If there is a pending one-line block ..
10946 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10948 # we have to terminate it if..
10951 # it is too long (final length may be different from
10952 # initial estimate). note: must allow 1 space for this
10954 $self->excess_line_length( $index_start_one_line_block,
10955 $max_index_to_go ) >= 0
10957 # or if it has too many semicolons
10958 || ( $semicolons_before_block_self_destruct == 0
10959 && $last_nonblank_type ne ';' )
10962 destroy_one_line_block();
10966 # put a break before this closing curly brace if appropriate
10967 unless ( $no_internal_newlines
10968 || $index_start_one_line_block != UNDEFINED_INDEX )
10971 # write out everything before this closing curly brace
10972 $self->end_batch();
10975 # Now update for side comment
10976 if ($side_comment_follows) { $no_internal_newlines = 1 }
10978 # store the closing curly brace
10979 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
10981 # ok, we just stored a closing curly brace. Often, but
10982 # not always, we want to end the line immediately.
10983 # So now we have to check for special cases.
10985 # if this '}' successfully ends a one-line block..
10986 my $is_one_line_block = 0;
10987 my $keep_going = 0;
10988 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10990 # Remember the type of token just before the
10991 # opening brace. It would be more general to use
10992 # a stack, but this will work for one-line blocks.
10993 $is_one_line_block =
10994 $types_to_go[$index_start_one_line_block];
10996 # we have to actually make it by removing tentative
10997 # breaks that were set within it
10998 $self->undo_forced_breakpoint_stack(0);
10999 $self->set_nobreaks( $index_start_one_line_block,
11000 $max_index_to_go - 1 );
11002 # then re-initialize for the next one-line block
11003 destroy_one_line_block();
11005 # then decide if we want to break after the '}' ..
11006 # We will keep going to allow certain brace followers as in:
11007 # do { $ifclosed = 1; last } unless $losing;
11009 # But make a line break if the curly ends a
11010 # significant block:
11013 $is_block_without_semicolon{$block_type}
11015 # Follow users break point for
11016 # one line block types U & G, such as a 'try' block
11017 || $is_one_line_block =~ /^[UG]$/
11018 && $Ktoken_vars == $K_last
11021 # if needless semicolon follows we handle it later
11022 && $next_nonblank_token ne ';'
11026 unless ($no_internal_newlines);
11030 # set string indicating what we need to look for brace follower
11032 if ( $block_type eq 'do' ) {
11033 $rbrace_follower = \%is_do_follower;
11034 if ( $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
11037 $rbrace_follower = { ')' => 1 };
11040 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
11041 $rbrace_follower = \%is_if_brace_follower;
11043 elsif ( $block_type eq 'else' ) {
11044 $rbrace_follower = \%is_else_brace_follower;
11047 # added eval for borris.t
11048 elsif ($is_sort_map_grep_eval{$block_type}
11049 || $is_one_line_block eq 'G' )
11051 $rbrace_follower = undef;
11056 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
11058 if ($is_one_line_block) {
11059 $rbrace_follower = \%is_anon_sub_1_brace_follower;
11062 $rbrace_follower = \%is_anon_sub_brace_follower;
11066 # None of the above: specify what can follow a closing
11067 # brace of a block which is not an
11068 # if/elsif/else/do/sort/map/grep/eval
11070 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
11072 $rbrace_follower = \%is_other_brace_follower;
11075 # See if an elsif block is followed by another elsif or else;
11077 if ( $block_type eq 'elsif' ) {
11079 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
11080 $looking_for_else = 1; # ok, check on next line
11084 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
11085 write_logfile_entry("No else block :(\n");
11090 # keep going after certain block types (map,sort,grep,eval)
11091 # added eval for borris.t
11097 # if no more tokens, postpone decision until re-entring
11098 elsif ( ( $next_nonblank_token_type eq 'b' )
11099 && $rOpts_add_newlines )
11101 unless ($rbrace_follower) {
11103 unless ($no_internal_newlines);
11107 elsif ($rbrace_follower) {
11109 unless ( $rbrace_follower->{$next_nonblank_token} ) {
11111 unless ($no_internal_newlines);
11113 $rbrace_follower = undef;
11118 unless ($no_internal_newlines);
11121 } # end treatment of closing block token
11124 elsif ( $type eq ';' ) {
11126 my $break_before_semicolon = ( $Ktoken_vars == $K_first )
11127 && $rOpts_break_at_old_semicolon_breakpoints;
11129 # kill one-line blocks with too many semicolons
11130 $semicolons_before_block_self_destruct--;
11132 $break_before_semicolon
11133 || ( $semicolons_before_block_self_destruct < 0 )
11134 || ( $semicolons_before_block_self_destruct == 0
11135 && $next_nonblank_token_type !~ /^[b\}]$/ )
11138 destroy_one_line_block();
11139 $self->end_batch() if ($break_before_semicolon);
11142 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
11146 $no_internal_newlines
11147 || ( $rOpts_keep_interior_semicolons
11148 && $Ktoken_vars < $K_last )
11149 || ( $next_nonblank_token eq '}' )
11154 # handle here_doc target string
11155 elsif ( $type eq 'h' ) {
11157 # no newlines after seeing here-target
11158 $no_internal_newlines = 2;
11159 ## destroy_one_line_block(); # deleted to fix case b529
11160 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
11163 # handle all other token types
11166 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
11169 # remember two previous nonblank OUTPUT tokens
11170 if ( $type ne '#' && $type ne 'b' ) {
11171 $last_last_nonblank_token = $last_nonblank_token;
11172 $last_last_nonblank_type = $last_nonblank_type;
11173 $last_nonblank_token = $token;
11174 $last_nonblank_type = $type;
11175 $last_nonblank_block_type = $block_type;
11176 $K_last_last_nonblank_code = $K_last_nonblank_code;
11177 $K_last_nonblank_code = $Ktoken_vars;
11180 } # end of loop over all tokens in this 'line_of_tokens'
11182 my $type = $rLL->[$K_last]->[_TYPE_];
11183 my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
11185 # we have to flush ..
11188 # if there is a side comment...
11191 # if this line ends in a quote
11192 # NOTE: This is critically important for insuring that quoted lines
11193 # do not get processed by things like -sot and -sct
11196 # if this is a VERSION statement
11197 || $is_VERSION_statement
11199 # to keep a label at the end of a line
11202 # if we have a hard break request
11203 || $break_flag && $break_flag != 2
11205 # if we are instructed to keep all old line breaks
11206 || !$rOpts->{'delete-old-newlines'}
11208 # if this is a line of the form 'use overload'. A break here
11209 # in the input file is a good break because it will allow
11210 # the operators which follow to be formatted well. Without
11211 # this break the formatting with -ci=4 -xci is poor, for example.
11215 # print length $_[2], "\n";
11216 # my ( $x, $y ) = _order(@_);
11217 # Number::Roman->new( int $x + $y );
11220 # my ( $x, $y ) = _order(@_);
11221 # Number::Roman->new( int $x - $y );
11223 || ( $max_index_to_go == 2
11224 && $types_to_go[0] eq 'k'
11225 && $tokens_to_go[0] eq 'use'
11226 && $tokens_to_go[$max_index_to_go] eq 'overload' )
11229 destroy_one_line_block();
11230 $self->end_batch();
11233 # Check for a soft break request
11234 if ( $max_index_to_go >= 0 && $break_flag && $break_flag == 2 ) {
11235 $self->set_forced_breakpoint($max_index_to_go);
11238 # mark old line breakpoints in current output stream
11240 $max_index_to_go >= 0
11241 && ( !$rOpts_ignore_old_breakpoints
11242 || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
11245 my $jobp = $max_index_to_go;
11246 if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
11250 $old_breakpoint_to_go[$jobp] = 1;
11253 } ## end sub process_line_of_CODE
11254 } ## end closure process_line_of_CODE
11256 sub tight_paren_follows {
11258 my ( $self, $K_to_go_0, $K_ic ) = @_;
11260 # Input parameters:
11261 # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
11262 # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
11263 # Return parameter:
11264 # false if we want a break after the closing do brace
11265 # true if we do not want a break after the closing do brace
11267 # We are at the closing brace of a 'do' block. See if this brace is
11268 # followed by a closing paren, and if so, set a flag which indicates
11269 # that we do not want a line break between the '}' and ')'.
11271 # xxxxx ( ...... do { ... } ) {
11272 # ^-------looking at this brace, K_ic
11274 # Subscript notation:
11275 # _i = inner container (braces in this case)
11276 # _o = outer container (parens in this case)
11277 # _io = inner opening = '{'
11278 # _ic = inner closing = '}'
11279 # _oo = outer opening = '('
11280 # _oc = outer closing = ')'
11282 # |--K_oo |--K_oc = outer container
11283 # xxxxx ( ...... do { ...... } ) {
11284 # |--K_io |--K_ic = inner container
11286 # In general, the safe thing to do is return a 'false' value
11287 # if the statement appears to be complex. This will have
11288 # the downstream side-effect of opening up outer containers
11289 # to help make complex code readable. But for simpler
11290 # do blocks it can be preferable to keep the code compact
11291 # by returning a 'true' value.
11293 return unless defined($K_ic);
11294 my $rLL = $self->[_rLL_];
11296 # we should only be called at a closing block
11297 my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
11298 return unless ($seqno_i); # shouldn't happen;
11300 # This only applies if the next nonblank is a ')'
11301 my $K_oc = $self->K_next_nonblank($K_ic);
11302 return unless defined($K_oc);
11303 my $token_next = $rLL->[$K_oc]->[_TOKEN_];
11304 return unless ( $token_next eq ')' );
11306 my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
11307 my $K_io = $self->[_K_opening_container_]->{$seqno_i};
11308 my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
11309 return unless ( defined($K_io) && defined($K_oo) );
11311 # RULE 1: Do not break before a closing signature paren
11312 # (regardless of complexity). This is a fix for issue git#22.
11313 # Looking for something like:
11314 # sub xxx ( ... do { ... } ) {
11315 # ^----- next block_type
11316 my $K_test = $self->K_next_nonblank($K_oc);
11317 if ( defined($K_test) ) {
11318 my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
11320 && $rLL->[$K_test]->[_TYPE_] eq '{'
11321 && $block_type =~ /$ANYSUB_PATTERN/ )
11327 # RULE 2: Break if the contents within braces appears to be 'complex'. We
11328 # base this decision on the number of tokens between braces.
11330 # xxxxx ( ... do { ... } ) {
11333 # Although very simple, it has the advantages of (1) being insensitive to
11334 # changes in lengths of identifier names, (2) easy to understand, implement
11335 # and test. A test case for this is 't/snippets/long_line.in'.
11337 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
11338 # if ( do { $2 !~ /&/ } ) { ... }
11340 # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
11341 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
11343 # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
11344 # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
11346 return if ( $K_ic - $K_io > 16 );
11348 # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
11349 # As with the previous rule, we decide based on the token count
11351 # xxxxx ( ... do { ... } ) {
11354 # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
11355 # $K_io - $K_oo = 4 [Pass Rule 3]
11356 # if ( do { $2 !~ /&/ } ) { ... }
11358 # Example: $K_ic - $K_oo = 10 [Pass rule 2]
11359 # $K_io - $K_oo = 9 [Pass rule 3]
11360 # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
11362 return if ( $K_io - $K_oo > 9 );
11364 # RULE 4: Break if we have already broken this batch of output tokens
11365 return if ( $K_oo < $K_to_go_0 );
11367 # RULE 5: Break if input is not on one line
11368 # For example, we will set the flag for the following expression
11369 # written in one line:
11371 # This has: $K_ic - $K_oo = 10 [Pass rule 2]
11372 # $K_io - $K_oo = 8 [Pass rule 3]
11373 # $self->debug( 'Error: ' . do { local $/; <$err> } );
11375 # but we break after the brace if it is on multiple lines on input, since
11376 # the user may prefer it on multiple lines:
11380 # 'Error: ' . do { local $/; <$err> }
11383 if ( !$rOpts_ignore_old_breakpoints ) {
11384 my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
11385 my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
11386 return if ( $iline_oo != $iline_oc );
11389 # OK to keep the paren tight
11393 sub starting_one_line_block {
11395 # after seeing an opening curly brace, look for the closing brace and see
11396 # if the entire block will fit on a line. This routine is not always right
11397 # so a check is made later (at the closing brace) to make sure we really
11398 # have a one-line block. We have to do this preliminary check, though,
11399 # because otherwise we would always break at a semicolon within a one-line
11400 # block if the block contains multiple statements.
11402 my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
11404 my $rbreak_container = $self->[_rbreak_container_];
11405 my $rshort_nested = $self->[_rshort_nested_];
11406 my $rLL = $self->[_rLL_];
11407 my $K_opening_container = $self->[_K_opening_container_];
11409 # kill any current block - we can only go 1 deep
11410 destroy_one_line_block();
11413 # 1=distance from start of block to opening brace exceeds line length
11418 # This routine should not have been called if there are no tokens in the
11419 # 'to_go' arrays of previously stored tokens. A previous call to
11420 # 'store_token_to_go' should have stored an opening brace. An error here
11421 # indicates that a programming change may have caused a flush operation to
11422 # clean out the previously stored tokens.
11423 if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
11424 Fault("program bug: store_token_to_go called incorrectly\n");
11427 # Return if block should be broken
11428 my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
11429 if ( $rbreak_container->{$type_sequence} ) {
11433 my $ris_bli_container = $self->[_ris_bli_container_];
11434 my $is_bli = $ris_bli_container->{$type_sequence};
11436 my $block_type = $rLL->[$Kj]->[_BLOCK_TYPE_];
11437 my $index_max_forced_break = get_index_max_forced_break();
11439 my $previous_nonblank_token = '';
11440 my $i_last_nonblank = -1;
11441 if ( defined($K_last_nonblank) ) {
11442 $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
11443 if ( $i_last_nonblank >= 0 ) {
11444 $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
11448 # find the starting keyword for this block (such as 'if', 'else', ...)
11449 if ( $max_index_to_go == 0
11450 || $block_type =~ /^[\{\}\;\:]$/
11451 || $block_type =~ /^package/ )
11453 $i_start = $max_index_to_go;
11456 # the previous nonblank token should start these block types
11458 $i_last_nonblank >= 0
11459 && ( $previous_nonblank_token eq $block_type
11460 || $block_type =~ /$ANYSUB_PATTERN/
11461 || $block_type =~ /\(\)/ )
11464 $i_start = $i_last_nonblank;
11466 # For signatures and extended syntax ...
11467 # If this brace follows a parenthesized list, we should look back to
11468 # find the keyword before the opening paren because otherwise we might
11469 # form a one line block which stays intack, and cause the parenthesized
11470 # expression to break open. That looks bad.
11471 if ( $tokens_to_go[$i_start] eq ')' ) {
11473 # Find the opening paren
11474 my $K_start = $K_to_go[$i_start];
11475 return 0 unless defined($K_start);
11476 my $seqno = $type_sequence_to_go[$i_start];
11477 return 0 unless ($seqno);
11478 my $K_opening = $K_opening_container->{$seqno};
11479 return 0 unless defined($K_opening);
11480 my $i_opening = $i_start + ( $K_opening - $K_start );
11482 # give up if not on this line
11483 return 0 unless ( $i_opening >= 0 );
11484 $i_start = $i_opening; ##$index_max_forced_break + 1;
11486 # go back one token before the opening paren
11487 if ( $i_start > 0 ) { $i_start-- }
11488 if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
11489 my $lev = $levels_to_go[$i_start];
11490 if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
11494 elsif ( $previous_nonblank_token eq ')' ) {
11496 # For something like "if (xxx) {", the keyword "if" will be
11497 # just after the most recent break. This will be 0 unless
11498 # we have just killed a one-line block and are starting another.
11500 # Note: cannot use inext_index_to_go[] here because that array
11501 # is still being constructed.
11502 $i_start = $index_max_forced_break + 1;
11503 if ( $types_to_go[$i_start] eq 'b' ) {
11507 # Patch to avoid breaking short blocks defined with extended_syntax:
11508 # Strip off any trailing () which was added in the parser to mark
11509 # the opening keyword. For example, in the following
11510 # create( TypeFoo $e) {$bubba}
11511 # the blocktype would be marked as create()
11512 my $stripped_block_type = $block_type;
11513 $stripped_block_type =~ s/\(\)$//;
11515 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
11520 # patch for SWITCH/CASE to retain one-line case/when blocks
11521 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
11523 # Note: cannot use inext_index_to_go[] here because that array
11524 # is still being constructed.
11525 $i_start = $index_max_forced_break + 1;
11526 if ( $types_to_go[$i_start] eq 'b' ) {
11529 unless ( $tokens_to_go[$i_start] eq $block_type ) {
11538 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
11540 my $maximum_line_length =
11541 $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
11543 # see if block starting location is too great to even start
11544 if ( $pos > $maximum_line_length ) {
11548 # See if everything to the closing token will fit on one line
11549 # This is part of an update to fix cases b562 .. b983
11550 my $K_closing = $self->[_K_closing_container_]->{$type_sequence};
11551 return 0 unless ( defined($K_closing) );
11552 my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
11553 $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
11555 my $excess = $pos + 1 + $container_length - $maximum_line_length;
11557 # Add a small tolerance for welded tokens (case b901)
11558 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
11562 if ( $excess > 0 ) {
11564 # line is too long... there is no chance of forming a one line block
11565 # if the excess is more than 1 char
11566 return 0 if ( $excess > 1 );
11568 # ... and give up if it is not a one-line block on input.
11569 # note: for a one-line block on input, it may be possible to keep
11570 # it as a one-line block (by removing a needless semicolon ).
11571 my $K_start = $K_to_go[$i_start];
11573 $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
11574 return 0 if ($ldiff);
11577 foreach my $Ki ( $Kj + 1 .. $K_last ) {
11579 # old whitespace could be arbitrarily large, so don't use it
11580 if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
11581 else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
11583 # ignore some small blocks
11584 my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
11585 my $nobreak = $rshort_nested->{$type_sequence};
11587 # Return false result if we exceed the maximum line length,
11588 if ( $pos > $maximum_line_length ) {
11592 # keep going for non-containers
11593 elsif ( !$type_sequence ) {
11597 # return if we encounter another opening brace before finding the
11599 elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
11600 && $rLL->[$Ki]->[_TYPE_] eq '{'
11601 && $rLL->[$Ki]->[_BLOCK_TYPE_]
11607 # if we find our closing brace..
11608 elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
11609 && $rLL->[$Ki]->[_TYPE_] eq '}'
11610 && $rLL->[$Ki]->[_BLOCK_TYPE_]
11614 # be sure any trailing comment also fits on the line
11615 my $Ki_nonblank = $Ki;
11616 if ( $Ki_nonblank < $K_last ) {
11618 if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
11619 && $Ki_nonblank < $K_last )
11625 # Patch for one-line sort/map/grep/eval blocks with side comments:
11626 # We will ignore the side comment length for sort/map/grep/eval
11627 # because this can lead to statements which change every time
11628 # perltidy is run. Here is an example from Denis Moskowitz which
11629 # oscillates between these two states without this patch:
11632 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
11636 ## $_->foo ne 'bar'
11637 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
11641 # When the first line is input it gets broken apart by the main
11642 # line break logic in sub process_line_of_CODE.
11643 # When the second line is input it gets recombined by
11644 # process_line_of_CODE and passed to the output routines. The
11645 # output routines (set_continuation_breaks) do not break it apart
11646 # because the bond strengths are set to the highest possible value
11647 # for grep/map/eval/sort blocks, so the first version gets output.
11648 # It would be possible to fix this by changing bond strengths,
11649 # but they are high to prevent errors in older versions of perl.
11652 && $rLL->[$Ki_nonblank]->[_TYPE_] eq '#'
11653 && !$is_sort_map_grep{$block_type} )
11656 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
11658 if ( $Ki_nonblank > $Ki + 1 ) {
11660 # source whitespace could be anything, assume
11661 # at least one space before the hash on output
11662 if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
11665 else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
11668 if ( $pos >= $maximum_line_length ) {
11673 # ok, it's a one-line block
11674 create_one_line_block( $i_start, 20 );
11678 # just keep going for other characters
11683 # We haven't hit the closing brace, but there is still space. So the
11684 # question here is, should we keep going to look at more lines in hopes of
11685 # forming a new one-line block, or should we stop right now. The problem
11686 # with continuing is that we will not be able to honor breaks before the
11687 # opening brace if we continue.
11689 # Typically we will want to keep trying to make one-line blocks for things
11690 # like sort/map/grep/eval. But it is not always a good idea to make as
11691 # many one-line blocks as possible, so other types are not done. The user
11692 # can always use -mangle.
11694 # If we want to keep going, we will create a new one-line block.
11695 # The blocks which we can keep going are in a hash, but we never want
11696 # to continue if we are at a '-bli' block.
11697 if ( $want_one_line_block{$block_type} && !$is_bli ) {
11698 create_one_line_block( $i_start, 1 );
11703 sub unstore_token_to_go {
11705 # remove most recent token from output stream
11707 if ( $max_index_to_go > 0 ) {
11708 $max_index_to_go--;
11711 $max_index_to_go = UNDEFINED_INDEX;
11716 sub compare_indentation_levels {
11718 # Check to see if output line tabbing agrees with input line
11719 # this can be very useful for debugging a script which has an extra
11720 # or missing brace.
11722 my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
11723 return unless ( defined($K_first) );
11725 my $rLL = $self->[_rLL_];
11727 my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
11728 my $radjusted_levels = $self->[_radjusted_levels_];
11729 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
11730 $structural_indentation_level = $radjusted_levels->[$K_first];
11733 my $is_closing_block = $rLL->[$K_first]->[_TYPE_] eq '}'
11734 && $rLL->[$K_first]->[_BLOCK_TYPE_];
11736 if ( $guessed_indentation_level ne $structural_indentation_level ) {
11737 $self->[_last_tabbing_disagreement_] = $line_number;
11739 if ($is_closing_block) {
11741 if ( !$self->[_in_brace_tabbing_disagreement_] ) {
11742 $self->[_in_brace_tabbing_disagreement_] = $line_number;
11744 if ( !$self->[_first_brace_tabbing_disagreement_] ) {
11745 $self->[_first_brace_tabbing_disagreement_] = $line_number;
11750 if ( !$self->[_in_tabbing_disagreement_] ) {
11751 $self->[_tabbing_disagreement_count_]++;
11753 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
11754 write_logfile_entry(
11755 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
11758 $self->[_in_tabbing_disagreement_] = $line_number;
11759 $self->[_first_tabbing_disagreement_] = $line_number
11760 unless ( $self->[_first_tabbing_disagreement_] );
11765 $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
11767 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
11768 if ($in_tabbing_disagreement) {
11770 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
11771 write_logfile_entry(
11772 "End indentation disagreement from input line $in_tabbing_disagreement\n"
11775 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
11777 write_logfile_entry(
11778 "No further tabbing disagreements will be noted\n");
11781 $self->[_in_tabbing_disagreement_] = 0;
11788 ###################################################
11789 # CODE SECTION 8: Utilities for setting breakpoints
11790 ###################################################
11792 { ## begin closure set_forced_breakpoint
11794 my $forced_breakpoint_count;
11795 my $forced_breakpoint_undo_count;
11796 my @forced_breakpoint_undo_stack;
11797 my $index_max_forced_break;
11799 # Break before or after certain tokens based on user settings
11800 my %break_before_or_after_token;
11804 # Updated to use all operators. This fixes case b1054
11805 # Here is the previous simplified version:
11806 ## my @q = qw( . : ? and or xor && || );
11807 my @q = @all_operators;
11810 @break_before_or_after_token{@q} = (1) x scalar(@q);
11813 sub initialize_forced_breakpoint_vars {
11814 $forced_breakpoint_count = 0;
11815 $index_max_forced_break = UNDEFINED_INDEX;
11816 $forced_breakpoint_undo_count = 0;
11817 @forced_breakpoint_undo_stack = ();
11821 sub get_forced_breakpoint_count {
11822 return $forced_breakpoint_count;
11825 sub get_forced_breakpoint_undo_count {
11826 return $forced_breakpoint_undo_count;
11829 sub get_index_max_forced_break {
11830 return $index_max_forced_break;
11833 sub set_fake_breakpoint {
11835 # Just bump up the breakpoint count as a signal that there are breaks.
11836 # This is useful if we have breaks but may want to postpone deciding
11837 # where to make them.
11838 $forced_breakpoint_count++;
11842 use constant DEBUG_FORCE => 0;
11844 sub set_forced_breakpoint {
11845 my ( $self, $i ) = @_;
11847 return unless defined $i && $i >= 0;
11849 # Back up at a blank in case we need an = break.
11850 # This is a backup fix for cases like b932.
11851 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
11853 # no breaks between welded tokens
11854 return if ( $total_weld_count && $self->is_welded_right_at_i($i) );
11856 my $token = $tokens_to_go[$i];
11857 my $type = $types_to_go[$i];
11859 # For certain tokens, use user settings to decide if we break before or
11861 if ( $break_before_or_after_token{$token}
11862 && ( $type eq $token || $type eq 'k' ) )
11864 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
11867 # breaks are forced before 'if' and 'unless'
11868 elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
11870 if ( $i >= 0 && $i <= $max_index_to_go ) {
11871 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
11873 DEBUG_FORCE && do {
11874 my ( $a, $b, $c ) = caller();
11876 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
11879 ######################################################################
11880 # NOTE: if we call set_closing_breakpoint below it will then call
11881 # this routing back. So there is the possibility of an infinite
11882 # loop if a programming error is made. As a precaution, I have
11883 # added a check on the forced_breakpoint flag, so that we won't
11884 # keep trying to set it. That will give additional protection
11886 ######################################################################
11888 if ( $i_nonblank >= 0
11889 && $nobreak_to_go[$i_nonblank] == 0
11890 && !$forced_breakpoint_to_go[$i_nonblank] )
11892 $forced_breakpoint_to_go[$i_nonblank] = 1;
11894 if ( $i_nonblank > $index_max_forced_break ) {
11895 $index_max_forced_break = $i_nonblank;
11897 $forced_breakpoint_count++;
11898 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
11901 # if we break at an opening container..break at the closing
11902 if ( $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
11904 $self->set_closing_breakpoint($i_nonblank);
11911 sub clear_breakpoint_undo_stack {
11913 $forced_breakpoint_undo_count = 0;
11917 use constant DEBUG_UNDOBP => 0;
11919 sub undo_forced_breakpoint_stack {
11921 my ( $self, $i_start ) = @_;
11923 # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
11924 # remove all breakpoints from the top of the 'undo stack' down to and
11925 # including index $i_start.
11927 # The 'undo stack' is a stack of all breakpoints made for a batch of
11930 if ( $i_start < 0 ) {
11932 my ( $a, $b, $c ) = caller();
11934 # Bad call, can only be due to a recent programming change.
11935 # Better stop here.
11937 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
11941 while ( $forced_breakpoint_undo_count > $i_start ) {
11943 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
11944 if ( $i >= 0 && $i <= $max_index_to_go ) {
11945 $forced_breakpoint_to_go[$i] = 0;
11946 $forced_breakpoint_count--;
11948 DEBUG_UNDOBP && do {
11949 my ( $a, $b, $c ) = caller();
11951 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
11955 # shouldn't happen, but not a critical error
11957 DEBUG_UNDOBP && do {
11958 my ( $a, $b, $c ) = caller();
11960 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
11966 } ## end closure set_forced_breakpoint
11968 { ## begin closure set_closing_breakpoint
11970 my %postponed_breakpoint;
11972 sub initialize_postponed_breakpoint {
11973 %postponed_breakpoint = ();
11977 sub has_postponed_breakpoint {
11979 return $postponed_breakpoint{$seqno};
11982 sub set_closing_breakpoint {
11984 # set a breakpoint at a matching closing token
11985 my ( $self, $i_break ) = @_;
11987 if ( $mate_index_to_go[$i_break] >= 0 ) {
11989 # CAUTION: infinite recursion possible here:
11990 # set_closing_breakpoint calls set_forced_breakpoint, and
11991 # set_forced_breakpoint call set_closing_breakpoint
11992 # ( test files attrib.t, BasicLyx.pm.html).
11993 # Don't reduce the '2' in the statement below
11994 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
11996 # break before } ] and ), but sub set_forced_breakpoint will decide
11997 # to break before or after a ? and :
11998 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
11999 $self->set_forced_breakpoint(
12000 $mate_index_to_go[$i_break] - $inc );
12004 my $type_sequence = $type_sequence_to_go[$i_break];
12005 if ($type_sequence) {
12006 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
12007 $postponed_breakpoint{$type_sequence} = 1;
12012 } ## end closure set_closing_breakpoint
12014 #########################################
12015 # CODE SECTION 9: Process batches of code
12016 #########################################
12018 { ## begin closure grind_batch_of_CODE
12020 # The routines in this closure begin the processing of a 'batch' of code.
12022 # A variable to keep track of consecutive nonblank lines so that we can
12023 # insert occasional blanks
12024 my @nonblank_lines_at_depth;
12026 # A variable to remember maximum size of previous batches; this is needed
12027 # by the logical padding routine
12028 my $peak_batch_size;
12031 sub initialize_grind_batch_of_CODE {
12032 @nonblank_lines_at_depth = ();
12033 $peak_batch_size = 0;
12038 # sub grind_batch_of_CODE receives sections of code which are the longest
12039 # possible lines without a break. In other words, it receives what is left
12040 # after applying all breaks forced by blank lines, block comments, side
12041 # comments, pod text, and structural braces. Its job is to break this code
12042 # down into smaller pieces, if necessary, which fit within the maximum
12043 # allowed line length. Then it sends the resulting lines of code on down
12044 # the pipeline to the VerticalAligner package, breaking the code into
12045 # continuation lines as necessary. The batch of tokens are in the "to_go"
12046 # arrays. The name 'grind' is slightly suggestive of a machine continually
12047 # breaking down long lines of code, but mainly it is unique and easy to
12048 # remember and find with an editor search.
12050 # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
12051 # together in the following way:
12053 # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
12054 # combines them into the largest sequences of tokens which might form a new
12056 # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
12059 # So sub 'process_line_of_CODE' builds up the longest possible continouus
12060 # sequences of tokens, regardless of line length, and then
12061 # grind_batch_of_CODE breaks these sequences back down into the new output
12064 # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
12066 use constant DEBUG_GRIND => 0;
12068 sub grind_batch_of_CODE {
12071 my $file_writer_object = $self->[_file_writer_object_];
12073 my $this_batch = $self->[_this_batch_];
12076 my $starting_in_quote = $this_batch->[_starting_in_quote_];
12077 my $ending_in_quote = $this_batch->[_ending_in_quote_];
12078 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
12079 my $rK_to_go = $this_batch->[_rK_to_go_];
12080 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
12082 my $rLL = $self->[_rLL_];
12084 # This routine is only called from sub flush_batch_of_code, so that
12085 # routine is a better spot for debugging.
12086 DEBUG_GRIND && do {
12087 my $token = my $type = "";
12088 if ( $max_index_to_go >= 0 ) {
12089 $token = $tokens_to_go[$max_index_to_go];
12090 $type = $types_to_go[$max_index_to_go];
12092 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
12093 print STDERR <<EOM;
12094 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
12099 # Safety check - shouldn't happen. The calling routine must not call
12100 # here unless there are tokens in the batch to be processed. This
12101 # fault can only be triggered by a recent programming change.
12102 if ( $max_index_to_go < 0 ) {
12104 "sub grind incorrectly called with max_index_to_go=$max_index_to_go"
12108 # Initialize some batch variables
12109 my $comma_count_in_batch = 0;
12110 my $ilast_nonblank = -1;
12112 my @ix_seqno_controlling_ci;
12113 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12114 $bond_strength_to_go[$i] = 0;
12115 $iprev_to_go[$i] = $ilast_nonblank;
12116 $inext_to_go[$i] = $i + 1;
12118 my $type = $types_to_go[$i];
12119 if ( $type ne 'b' ) {
12120 if ( $ilast_nonblank >= 0 ) {
12121 $inext_to_go[$ilast_nonblank] = $i;
12123 # just in case there are two blanks in a row (shouldn't
12125 if ( ++$ilast_nonblank < $i ) {
12126 $inext_to_go[$ilast_nonblank] = $i;
12129 $ilast_nonblank = $i;
12131 # This is a good spot to efficiently collect information needed
12132 # for breaking lines...
12134 if ( $type eq ',' ) { $comma_count_in_batch++; }
12136 # gather info needed by sub set_continuation_breaks
12137 my $seqno = $type_sequence_to_go[$i];
12140 # remember indexes of any tokens controlling xci
12141 # in this batch. This list is needed by sub undo_ci.
12142 if ( $ris_seqno_controlling_ci->{$seqno} ) {
12143 push @ix_seqno_controlling_ci, $i;
12146 if ( $type eq '?' ) {
12147 push @colon_list, $type;
12149 elsif ( $type eq ':' ) {
12150 push @colon_list, $type;
12156 my $comma_arrow_count_contained =
12157 $self->match_opening_and_closing_tokens();
12159 # tell the -lp option we are outputting a batch so it can close
12160 # any unfinished items in its stack
12163 # If this line ends in a code block brace, set breaks at any
12164 # previous closing code block braces to breakup a chain of code
12165 # blocks on one line. This is very rare but can happen for
12166 # user-defined subs. For example we might be looking at this:
12167 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
12168 my $saw_good_break = 0; # flag to force breaks even if short line
12171 # looking for opening or closing block brace
12172 $block_type_to_go[$max_index_to_go]
12174 # never any good breaks if just one token
12175 && $max_index_to_go > 0
12177 # but not one of these which are never duplicated on a line:
12178 # until|while|for|if|elsif|else
12179 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
12183 my $lev = $nesting_depth_to_go[$max_index_to_go];
12185 # Walk backwards from the end and
12186 # set break at any closing block braces at the same level.
12187 # But quit if we are not in a chain of blocks.
12188 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
12189 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
12190 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
12192 if ( $block_type_to_go[$i] ) {
12193 if ( $tokens_to_go[$i] eq '}' ) {
12194 $self->set_forced_breakpoint($i);
12195 $saw_good_break = 1;
12199 # quit if we see anything besides words, function, blanks
12201 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
12206 my $imax = $max_index_to_go;
12208 # trim any blank tokens
12209 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
12210 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
12212 # anything left to write?
12213 if ( $imin <= $imax ) {
12215 my $last_line_leading_type = $self->[_last_line_leading_type_];
12216 my $last_line_leading_level = $self->[_last_line_leading_level_];
12217 my $last_last_line_leading_level =
12218 $self->[_last_last_line_leading_level_];
12220 # add a blank line before certain key types but not after a comment
12221 if ( $last_line_leading_type ne '#' ) {
12222 my $want_blank = 0;
12223 my $leading_token = $tokens_to_go[$imin];
12224 my $leading_type = $types_to_go[$imin];
12226 # blank lines before subs except declarations and one-liners
12227 if ( $leading_type eq 'i' ) {
12228 if ( $leading_token =~ /$SUB_PATTERN/ ) {
12229 $want_blank = $rOpts->{'blank-lines-before-subs'}
12230 if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
12233 # break before all package declarations
12234 elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
12235 $want_blank = $rOpts->{'blank-lines-before-packages'};
12239 # break before certain key blocks except one-liners
12240 if ( $leading_type eq 'k' ) {
12241 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' )
12243 $want_blank = $rOpts->{'blank-lines-before-subs'}
12244 if ( terminal_type_i( $imin, $imax ) ne '}' );
12247 # Break before certain block types if we haven't had a
12248 # break at this level for a while. This is the
12249 # difficult decision..
12250 elsif ($last_line_leading_type ne 'b'
12251 && $leading_token =~
12252 /^(unless|if|while|until|for|foreach)$/ )
12255 $nonblank_lines_at_depth[$last_line_leading_level];
12256 if ( !defined($lc) ) { $lc = 0 }
12258 # patch for RT #128216: no blank line inserted at a level
12260 if ( $levels_to_go[$imin] != $last_line_leading_level )
12266 $rOpts->{'blanks-before-blocks'}
12267 && $lc >= $rOpts->{'long-block-line-count'}
12268 && $self->consecutive_nonblank_lines() >=
12269 $rOpts->{'long-block-line-count'}
12270 && terminal_type_i( $imin, $imax ) ne '}';
12274 # Check for blank lines wanted before a closing brace
12275 if ( $leading_token eq '}' ) {
12276 if ( $rOpts->{'blank-lines-before-closing-block'}
12277 && $block_type_to_go[$imin]
12278 && $block_type_to_go[$imin] =~
12279 /$blank_lines_before_closing_block_pattern/ )
12282 $rOpts->{'blank-lines-before-closing-block'};
12283 if ( $nblanks > $want_blank ) {
12284 $want_blank = $nblanks;
12291 # future: send blank line down normal path to VerticalAligner
12292 $self->flush_vertical_aligner();
12293 $file_writer_object->require_blank_code_lines($want_blank);
12297 # update blank line variables and count number of consecutive
12298 # non-blank, non-comment lines at this level
12299 $last_last_line_leading_level = $last_line_leading_level;
12300 $last_line_leading_level = $levels_to_go[$imin];
12301 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
12302 $last_line_leading_type = $types_to_go[$imin];
12303 if ( $last_line_leading_level == $last_last_line_leading_level
12304 && $last_line_leading_type ne 'b'
12305 && $last_line_leading_type ne '#'
12306 && defined( $nonblank_lines_at_depth[$last_line_leading_level] )
12309 $nonblank_lines_at_depth[$last_line_leading_level]++;
12312 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
12315 $self->[_last_line_leading_type_] = $last_line_leading_type;
12316 $self->[_last_line_leading_level_] = $last_line_leading_level;
12317 $self->[_last_last_line_leading_level_] =
12318 $last_last_line_leading_level;
12320 # Flag to remember if we called sub 'pad_array_to_go'.
12321 # Some routines (scan_list(), set_continuation_breaks() ) need some
12322 # extra tokens added at the end of the batch. Most batches do not
12323 # use these routines, so we will avoid calling 'pad_array_to_go'
12324 # unless it is needed.
12325 my $called_pad_array_to_go;
12327 # set all forced breakpoints for good list formatting
12328 my $is_long_line = $max_index_to_go > 0
12329 && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
12331 my $old_line_count_in_batch =
12332 $max_index_to_go == 0
12334 : $self->get_old_line_count( $K_to_go[0],
12335 $K_to_go[$max_index_to_go] );
12339 || $old_line_count_in_batch > 1
12341 # must always call scan_list() with unbalanced batches because
12342 # it is maintaining some stacks
12343 || is_unbalanced_batch()
12345 # call scan_list if we might want to break at commas
12347 $comma_count_in_batch
12348 && ( $rOpts_maximum_fields_per_table > 0
12349 && $rOpts_maximum_fields_per_table <=
12350 $comma_count_in_batch
12351 || $rOpts_comma_arrow_breakpoints == 0 )
12354 # call scan_list if user may want to break open some one-line
12356 || ( $comma_arrow_count_contained
12357 && $rOpts_comma_arrow_breakpoints != 3 )
12360 # add a couple of extra terminal blank tokens
12361 $self->pad_array_to_go();
12362 $called_pad_array_to_go = 1;
12364 ## This caused problems in one version of perl for unknown reasons:
12365 ## $saw_good_break ||= scan_list();
12366 my $sgb = $self->scan_list($is_long_line);
12367 $saw_good_break ||= $sgb;
12370 # let $ri_first and $ri_last be references to lists of
12371 # first and last tokens of line fragments to output..
12372 my ( $ri_first, $ri_last );
12374 # write a single line if..
12377 # we aren't allowed to add any newlines
12378 !$rOpts_add_newlines
12383 # this line is 'short'
12386 # and we didn't see a good breakpoint
12387 && !$saw_good_break
12389 # and we don't already have an interior breakpoint
12390 && !get_forced_breakpoint_count()
12394 @{$ri_first} = ($imin);
12395 @{$ri_last} = ($imax);
12398 # otherwise use multiple lines
12401 # add a couple of extra terminal blank tokens if we haven't
12403 $self->pad_array_to_go() unless ($called_pad_array_to_go);
12405 ( $ri_first, $ri_last ) =
12406 $self->set_continuation_breaks( $saw_good_break,
12409 $self->break_all_chain_tokens( $ri_first, $ri_last );
12411 $self->break_equals( $ri_first, $ri_last );
12413 # now we do a correction step to clean this up a bit
12414 # (The only time we would not do this is for debugging)
12415 if ($rOpts_recombine) {
12416 ( $ri_first, $ri_last ) =
12417 $self->recombine_breakpoints( $ri_first, $ri_last );
12420 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
12424 $self->insert_breaks_before_list_opening_containers( $ri_first,
12426 if ( %break_before_container_types && $max_index_to_go > 0 );
12428 # do corrector step if -lp option is used
12429 my $do_not_pad = 0;
12430 if ($rOpts_line_up_parentheses) {
12432 $self->correct_lp_indentation( $ri_first, $ri_last );
12435 # unmask any invisible line-ending semicolon. They were placed by
12436 # sub respace_tokens but we only now know if we actually need them.
12437 if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
12441 if ( $want_left_space{';'} != WS_NO ) {
12445 $tokens_to_go[$i] = $tok;
12446 $token_lengths_to_go[$i] = $tok_len;
12447 my $KK = $K_to_go[$i];
12448 $rLL->[$KK]->[_TOKEN_] = $tok;
12449 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
12450 my $line_number = 1 + $self->get_old_line_index($KK);
12451 $self->note_added_semicolon($line_number);
12454 if ( $rOpts_one_line_block_semicolons == 0 ) {
12455 $self->delete_one_line_semicolons( $ri_first, $ri_last );
12458 # The line breaks for this batch of code have been finalized. Now we
12459 # can to package the results for further processing. We will switch
12460 # from the local '_to_go' buffer arrays (i-index) back to the global
12461 # token arrays (K-index) at this point.
12464 for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
12465 my $ibeg = $ri_first->[$n];
12466 my $Kbeg = $K_to_go[$ibeg];
12467 my $iend = $ri_last->[$n];
12468 my $Kend = $K_to_go[$iend];
12469 if ( $iend - $ibeg != $Kend - $Kbeg ) {
12470 $index_error = $n unless defined($index_error);
12473 [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
12476 # Check correctness of the mapping between the i and K token
12477 # indexes. (The K index is the global index, the i index is the
12478 # batch index). It is important to do this check because an error
12479 # would be disastrous. The reason that we should never see an
12480 # index error here is that sub 'store_token_to_go' has a check to
12481 # make sure that the indexes in batches remain continuous. Since
12482 # sub 'store_token_to_go' controls feeding tokens into batches,
12483 # no index discrepancies should occur unless a recent programming
12484 # change has introduced a bug.
12485 if ( defined($index_error) ) {
12487 # Temporary debug code - should never get here
12488 for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
12489 my $ibeg = $ri_first->[$n];
12490 my $Kbeg = $K_to_go[$ibeg];
12491 my $iend = $ri_last->[$n];
12492 my $Kend = $K_to_go[$iend];
12493 my $idiff = $iend - $ibeg;
12494 my $Kdiff = $Kend - $Kbeg;
12495 print STDERR <<EOM;
12496 line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
12500 "Index error at line $index_error; i and K ranges differ");
12503 $this_batch->[_rlines_K_] = $rlines_K;
12504 $this_batch->[_ibeg0_] = $ri_first->[0];
12505 $this_batch->[_peak_batch_size_] = $peak_batch_size;
12506 $this_batch->[_do_not_pad_] = $do_not_pad;
12507 $this_batch->[_batch_count_] = $batch_count;
12508 $this_batch->[_rix_seqno_controlling_ci_] =
12509 \@ix_seqno_controlling_ci;
12511 $self->send_lines_to_vertical_aligner();
12513 # Insert any requested blank lines after an opening brace. We have
12514 # to skip back before any side comment to find the terminal token
12516 for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
12517 next if $types_to_go[$iterm] eq '#';
12518 next if $types_to_go[$iterm] eq 'b';
12522 # write requested number of blank lines after an opening block brace
12523 if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
12524 if ( $rOpts->{'blank-lines-after-opening-block'}
12525 && $block_type_to_go[$iterm]
12526 && $block_type_to_go[$iterm] =~
12527 /$blank_lines_after_opening_block_pattern/ )
12529 my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
12530 $self->flush_vertical_aligner();
12531 $file_writer_object->require_blank_code_lines($nblanks);
12536 # Remember the largest batch size processed. This is needed by the
12537 # logical padding routine to avoid padding the first nonblank token
12538 if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
12539 $peak_batch_size = $max_index_to_go;
12544 } ## end closure grind_batch_of_CODE
12546 { ## begin closure match_opening_and_closing_tokens
12548 # closure to keep track of unbalanced containers.
12549 # arrays shared by the routines in this block:
12550 my %saved_opening_indentation;
12551 my @unmatched_opening_indexes_in_this_batch;
12552 my @unmatched_closing_indexes_in_this_batch;
12553 my %comma_arrow_count;
12555 sub initialize_saved_opening_indentation {
12556 %saved_opening_indentation = ();
12560 sub is_unbalanced_batch {
12561 return @unmatched_opening_indexes_in_this_batch +
12562 @unmatched_closing_indexes_in_this_batch;
12565 sub match_opening_and_closing_tokens {
12567 # Match up indexes of opening and closing braces, etc, in this batch.
12568 # This has to be done after all tokens are stored because unstoring
12569 # of tokens would otherwise cause trouble.
12572 my $rwant_container_open = $self->[_rwant_container_open_];
12573 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
12575 @unmatched_opening_indexes_in_this_batch = ();
12576 @unmatched_closing_indexes_in_this_batch = ();
12577 %comma_arrow_count = ();
12578 my $comma_arrow_count_contained = 0;
12579 my $parent_seqno = $self->parent_seqno_by_K( $K_to_go[0] );
12581 foreach my $i ( 0 .. $max_index_to_go ) {
12582 $parent_seqno_to_go[$i] = $parent_seqno;
12584 my $seqno = $type_sequence_to_go[$i];
12586 my $token = $tokens_to_go[$i];
12587 if ( $is_opening_sequence_token{$token} ) {
12588 if ( $is_opening_token{$token} ) {
12589 $parent_seqno = $seqno;
12592 if ( $rwant_container_open->{$seqno} ) {
12593 $self->set_forced_breakpoint($i);
12596 push @unmatched_opening_indexes_in_this_batch, $i;
12598 elsif ( $is_closing_sequence_token{$token} ) {
12600 if ( $is_closing_token{$token} ) {
12601 $parent_seqno = $rparent_of_seqno->{$seqno};
12602 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
12603 $parent_seqno_to_go[$i] = $parent_seqno;
12606 if ( $rwant_container_open->{$seqno} ) {
12607 $self->set_forced_breakpoint( $i - 1 );
12610 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12611 if ( defined($i_mate) && $i_mate >= 0 ) {
12612 if ( $type_sequence_to_go[$i_mate] ==
12613 $type_sequence_to_go[$i] )
12615 $mate_index_to_go[$i] = $i_mate;
12616 $mate_index_to_go[$i_mate] = $i;
12617 my $seqno = $type_sequence_to_go[$i];
12618 if ( $comma_arrow_count{$seqno} ) {
12619 $comma_arrow_count_contained +=
12620 $comma_arrow_count{$seqno};
12624 push @unmatched_opening_indexes_in_this_batch,
12626 push @unmatched_closing_indexes_in_this_batch, $i;
12630 push @unmatched_closing_indexes_in_this_batch, $i;
12634 elsif ( $tokens_to_go[$i] eq '=>' ) {
12635 if (@unmatched_opening_indexes_in_this_batch) {
12636 my $j = $unmatched_opening_indexes_in_this_batch[-1];
12637 my $seqno = $type_sequence_to_go[$j];
12638 $comma_arrow_count{$seqno}++;
12643 return $comma_arrow_count_contained;
12646 sub save_opening_indentation {
12648 # This should be called after each batch of tokens is output. It
12649 # saves indentations of lines of all unmatched opening tokens.
12650 # These will be used by sub get_opening_indentation.
12652 my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
12654 # QW INDENTATION PATCH 1:
12655 # Also save indentation for multiline qw quotes
12657 my $seqno_qw_opening;
12658 if ( $types_to_go[$max_index_to_go] eq 'q' ) {
12659 my $KK = $K_to_go[$max_index_to_go];
12660 $seqno_qw_opening =
12661 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
12662 if ($seqno_qw_opening) {
12663 push @i_qw, $max_index_to_go;
12667 # we need to save indentations of any unmatched opening tokens
12668 # in this batch because we may need them in a subsequent batch.
12669 foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
12671 my $seqno = $type_sequence_to_go[$_];
12674 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
12675 $seqno = $seqno_qw_opening;
12680 $seqno = 'UNKNOWN';
12684 $saved_opening_indentation{$seqno} = [
12685 lookup_opening_indentation(
12686 $_, $ri_first, $ri_last, $rindentation_list
12693 sub get_saved_opening_indentation {
12695 my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
12698 if ( $saved_opening_indentation{$seqno} ) {
12699 ( $indent, $offset, $is_leading ) =
12700 @{ $saved_opening_indentation{$seqno} };
12705 # some kind of serious error it doesn't exist
12706 # (example is badfile.t)
12708 return ( $indent, $offset, $is_leading, $exists );
12710 } ## end closure match_opening_and_closing_tokens
12712 sub lookup_opening_indentation {
12714 # get the indentation of the line in the current output batch
12715 # which output a selected opening token
12718 # $i_opening - index of an opening token in the current output batch
12719 # whose line indentation we need
12720 # $ri_first - reference to list of the first index $i for each output
12721 # line in this batch
12722 # $ri_last - reference to list of the last index $i for each output line
12724 # $rindentation_list - reference to a list containing the indentation
12725 # used for each line. (NOTE: the first slot in
12726 # this list is the last returned line number, and this is
12727 # followed by the list of indentations).
12730 # -the indentation of the line which contained token $i_opening
12731 # -and its offset (number of columns) from the start of the line
12733 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12735 if ( !@{$ri_last} ) {
12737 # An error here implies a bug introduced by a recent program change.
12738 # Every batch of code has lines.
12739 Fault("Error in opening_indentation: no lines");
12743 my $nline = $rindentation_list->[0]; # line number of previous lookup
12745 # reset line location if necessary
12746 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12748 # find the correct line
12749 unless ( $i_opening > $ri_last->[-1] ) {
12750 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12753 # Error - token index is out of bounds - shouldn't happen
12754 # A program bug has been introduced in one of the calling routines.
12755 # We better stop here.
12757 my $i_last_line = $ri_last->[-1];
12759 Program bug in call to lookup_opening_indentation - index out of range
12760 called with index i_opening=$i_opening > $i_last_line = max index of last line
12761 This batch has max index = $max_index_to_go,
12763 report_definite_bug(); # old coding, will not get here
12764 $nline = $#{$ri_last};
12767 $rindentation_list->[0] =
12768 $nline; # save line number to start looking next call
12769 my $ibeg = $ri_start->[$nline];
12770 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
12771 my $is_leading = ( $ibeg == $i_opening );
12772 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12775 { ## begin closure terminal_type_i
12777 my %is_sort_map_grep_eval_do;
12780 my @q = qw(sort map grep eval do);
12781 @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
12784 sub terminal_type_i {
12786 # returns type of last token on this line (terminal token), as follows:
12787 # returns # for a full-line comment
12788 # returns ' ' for a blank line
12789 # otherwise returns final token type
12791 my ( $ibeg, $iend ) = @_;
12793 # Start at the end and work backwards
12795 my $type_i = $types_to_go[$i];
12797 # Check for side comment
12798 if ( $type_i eq '#' ) {
12800 if ( $i < $ibeg ) {
12801 return wantarray ? ( $type_i, $ibeg ) : $type_i;
12803 $type_i = $types_to_go[$i];
12806 # Skip past a blank
12807 if ( $type_i eq 'b' ) {
12809 if ( $i < $ibeg ) {
12810 return wantarray ? ( $type_i, $ibeg ) : $type_i;
12812 $type_i = $types_to_go[$i];
12815 # Found it..make sure it is a BLOCK termination,
12816 # but hide a terminal } after sort/grep/map because it is not
12817 # necessarily the end of the line. (terminal.t)
12818 my $block_type = $block_type_to_go[$i];
12822 || ( $is_sort_map_grep_eval_do{$block_type} ) )
12827 return wantarray ? ( $type_i, $i ) : $type_i;
12830 } ## end closure terminal_type_i
12832 sub pad_array_to_go {
12834 # To simplify coding in scan_list and set_bond_strengths, it helps to
12835 # create some extra blank tokens at the end of the arrays. We also add
12836 # some undef's to help guard against using invalid data.
12838 $K_to_go[ $max_index_to_go + 1 ] = undef;
12839 $tokens_to_go[ $max_index_to_go + 1 ] = '';
12840 $tokens_to_go[ $max_index_to_go + 2 ] = '';
12841 $tokens_to_go[ $max_index_to_go + 3 ] = undef;
12842 $types_to_go[ $max_index_to_go + 1 ] = 'b';
12843 $types_to_go[ $max_index_to_go + 2 ] = 'b';
12844 $types_to_go[ $max_index_to_go + 3 ] = undef;
12845 $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
12846 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12847 $nesting_depth_to_go[$max_index_to_go];
12850 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12851 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12853 # Nesting depths are equivalent to the _SLEVEL_ variable which is
12854 # clipped to be >=0 in sub write_line, so it should not be possible
12855 # to get here unless the code has a bracing error which leaves a
12856 # closing brace with zero nesting depth.
12857 unless ( get_saw_brace_error() ) {
12859 "Program bug in pad_array_to_go: hit nesting error which should have been caught\n"
12861 report_definite_bug();
12865 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12870 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12871 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12876 sub break_all_chain_tokens {
12878 # scan the current breakpoints looking for breaks at certain "chain
12879 # operators" (. : && || + etc) which often occur repeatedly in a long
12880 # statement. If we see a break at any one, break at all similar tokens
12881 # within the same container.
12883 my ( $self, $ri_left, $ri_right ) = @_;
12885 my %saw_chain_type;
12886 my %left_chain_type;
12887 my %right_chain_type;
12888 my %interior_chain_type;
12889 my $nmax = @{$ri_right} - 1;
12891 # scan the left and right end tokens of all lines
12893 for my $n ( 0 .. $nmax ) {
12894 my $il = $ri_left->[$n];
12895 my $ir = $ri_right->[$n];
12896 my $typel = $types_to_go[$il];
12897 my $typer = $types_to_go[$ir];
12898 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
12899 $typer = '+' if ( $typer eq '-' );
12900 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
12901 $typer = '*' if ( $typer eq '/' );
12902 my $tokenl = $tokens_to_go[$il];
12903 my $tokenr = $tokens_to_go[$ir];
12905 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
12906 next if ( $typel eq '?' );
12907 push @{ $left_chain_type{$typel} }, $il;
12908 $saw_chain_type{$typel} = 1;
12911 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
12912 next if ( $typer eq '?' );
12913 push @{ $right_chain_type{$typer} }, $ir;
12914 $saw_chain_type{$typer} = 1;
12918 return unless $count;
12920 # now look for any interior tokens of the same types
12922 for my $n ( 0 .. $nmax ) {
12923 my $il = $ri_left->[$n];
12924 my $ir = $ri_right->[$n];
12925 foreach my $i ( $il + 1 .. $ir - 1 ) {
12926 my $type = $types_to_go[$i];
12927 $type = '+' if ( $type eq '-' );
12928 $type = '*' if ( $type eq '/' );
12929 if ( $saw_chain_type{$type} ) {
12930 push @{ $interior_chain_type{$type} }, $i;
12935 return unless $count;
12937 # now make a list of all new break points
12940 # loop over all chain types
12941 foreach my $type ( keys %saw_chain_type ) {
12943 # quit if just ONE continuation line with leading . For example--
12944 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
12946 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
12948 # loop over all interior chain tokens
12949 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
12951 # loop over all left end tokens of same type
12952 if ( $left_chain_type{$type} ) {
12953 next if $nobreak_to_go[ $itest - 1 ];
12954 foreach my $i ( @{ $left_chain_type{$type} } ) {
12955 next unless $self->in_same_container_i( $i, $itest );
12956 push @insert_list, $itest - 1;
12958 # Break at matching ? if this : is at a different level.
12959 # For example, the ? before $THRf_DEAD in the following
12960 # should get a break if its : gets a break.
12963 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
12964 # : ( $_ & 4 ) ? $THRf_R_DETACHED
12965 # : $THRf_R_JOINABLE;
12967 && $levels_to_go[$i] != $levels_to_go[$itest] )
12969 my $i_question = $mate_index_to_go[$itest];
12970 if ( $i_question > 0 ) {
12971 push @insert_list, $i_question - 1;
12978 # loop over all right end tokens of same type
12979 if ( $right_chain_type{$type} ) {
12980 next if $nobreak_to_go[$itest];
12981 foreach my $i ( @{ $right_chain_type{$type} } ) {
12982 next unless $self->in_same_container_i( $i, $itest );
12983 push @insert_list, $itest;
12985 # break at matching ? if this : is at a different level
12987 && $levels_to_go[$i] != $levels_to_go[$itest] )
12989 my $i_question = $mate_index_to_go[$itest];
12990 if ( $i_question >= 0 ) {
12991 push @insert_list, $i_question;
13000 # insert any new break points
13001 if (@insert_list) {
13002 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
13007 sub insert_additional_breaks {
13009 # this routine will add line breaks at requested locations after
13010 # sub set_continuation_breaks has made preliminary breaks.
13012 my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
13015 my $line_number = 0;
13016 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
13018 next if ( $nobreak_to_go[$i_break_left] );
13020 $i_f = $ri_first->[$line_number];
13021 $i_l = $ri_last->[$line_number];
13022 while ( $i_break_left >= $i_l ) {
13025 # shouldn't happen unless caller passes bad indexes
13026 if ( $line_number >= @{$ri_last} ) {
13028 "Non-fatal program bug: couldn't set break at $i_break_left\n"
13030 report_definite_bug();
13033 $i_f = $ri_first->[$line_number];
13034 $i_l = $ri_last->[$line_number];
13037 # Do not leave a blank at the end of a line; back up if necessary
13038 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
13040 my $i_break_right = $inext_to_go[$i_break_left];
13041 if ( $i_break_left >= $i_f
13042 && $i_break_left < $i_l
13043 && $i_break_right > $i_f
13044 && $i_break_right <= $i_l )
13046 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
13047 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
13053 sub in_same_container_i {
13055 # check to see if tokens at i1 and i2 are in the
13056 # same container, and not separated by a comma, ? or :
13057 # This is an interface between the _to_go arrays to the rLL array
13058 my ( $self, $i1, $i2 ) = @_;
13061 return if ( $parent_seqno_to_go[$i1] ne $parent_seqno_to_go[$i2] );
13064 return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
13067 { ## begin closure in_same_container_K
13068 my $ris_break_token;
13069 my $ris_comma_token;
13073 # all cases break on seeing commas at same level
13076 @{$ris_comma_token}{@q} = (1) x scalar(@q);
13078 # Non-ternary text also breaks on seeing any of qw(? : || or )
13079 # Example: we would not want to break at any of these .'s
13080 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
13081 push @q, qw( or || ? : );
13082 @{$ris_break_token}{@q} = (1) x scalar(@q);
13085 sub in_same_container_K {
13087 # Check to see if tokens at K1 and K2 are in the same container,
13088 # and not separated by certain characters: => , ? : || or
13089 # This version uses the newer $rLL data structure.
13091 my ( $self, $K1, $K2 ) = @_;
13092 if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
13093 my $rLL = $self->[_rLL_];
13094 my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
13095 return if ( $depth_1 < 0 );
13096 return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
13098 # Select character set to scan for
13099 my $type_1 = $rLL->[$K1]->[_TYPE_];
13100 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
13102 # Fast preliminary loop to verify that tokens are in the same container
13105 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
13106 last if !defined($KK);
13107 last if ( $KK >= $K2 );
13108 my $depth_K = $rLL->[$KK]->[_SLEVEL_];
13109 return if ( $depth_K < $depth_1 );
13110 next if ( $depth_K > $depth_1 );
13111 if ( $type_1 ne ':' ) {
13112 my $tok_K = $rLL->[$KK]->[_TOKEN_];
13113 return if ( $tok_K eq '?' || $tok_K eq ':' );
13117 # Slow loop checking for certain characters
13119 ###########################################################
13120 # This is potentially a slow routine and not critical.
13121 # For safety just give up for large differences.
13122 # See test file 'infinite_loop.txt'
13123 ###########################################################
13124 return if ( $K2 - $K1 > 200 );
13126 foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
13128 my $depth_K = $rLL->[$K]->[_SLEVEL_];
13129 next if ( $depth_K > $depth_1 );
13130 return if ( $depth_K < $depth_1 ); # redundant, checked above
13131 my $tok = $rLL->[$K]->[_TOKEN_];
13132 return if ( $rbreak->{$tok} );
13136 } ## end closure in_same_container_K
13140 # Look for assignment operators that could use a breakpoint.
13141 # For example, in the following snippet
13143 # $HOME = $ENV{HOME}
13146 # || die "no home directory for user $<";
13148 # we could break at the = to get this, which is a little nicer:
13153 # || die "no home directory for user $<";
13155 # The logic here follows the logic in set_logical_padding, which
13156 # will add the padding in the second line to improve alignment.
13158 my ( $self, $ri_left, $ri_right ) = @_;
13159 my $nmax = @{$ri_right} - 1;
13160 return unless ( $nmax >= 2 );
13162 # scan the left ends of first two lines
13165 for my $n ( 1 .. 2 ) {
13166 my $il = $ri_left->[$n];
13167 my $typel = $types_to_go[$il];
13168 my $tokenl = $tokens_to_go[$il];
13170 my $has_leading_op = ( $tokenl =~ /^\w/ )
13171 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
13172 : $is_chain_operator{$typel}; # and, or
13173 return unless ($has_leading_op);
13176 unless ( $tokenl eq $tokbeg
13177 && $nesting_depth_to_go[$il] eq $depth_beg );
13180 $depth_beg = $nesting_depth_to_go[$il];
13183 # now look for any interior tokens of the same types
13184 my $il = $ri_left->[0];
13185 my $ir = $ri_right->[0];
13187 # now make a list of all new break points
13189 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
13190 my $type = $types_to_go[$i];
13191 if ( $is_assignment{$type}
13192 && $nesting_depth_to_go[$i] eq $depth_beg )
13194 if ( $want_break_before{$type} ) {
13195 push @insert_list, $i - 1;
13198 push @insert_list, $i;
13203 # Break after a 'return' followed by a chain of operators
13204 # return ( $^O !~ /win32|dos/i )
13205 # && ( $^O ne 'VMS' )
13206 # && ( $^O ne 'OS2' )
13207 # && ( $^O ne 'MacOS' );
13210 # ( $^O !~ /win32|dos/i )
13211 # && ( $^O ne 'VMS' )
13212 # && ( $^O ne 'OS2' )
13213 # && ( $^O ne 'MacOS' );
13215 if ( $types_to_go[$i] eq 'k'
13216 && $tokens_to_go[$i] eq 'return'
13218 && $nesting_depth_to_go[$i] eq $depth_beg )
13220 push @insert_list, $i;
13223 return unless (@insert_list);
13225 # One final check...
13226 # scan second and third lines and be sure there are no assignments
13227 # we want to avoid breaking at an = to make something like this:
13229 # $html_icons{"$type-$state"}
13230 # or $icon = $html_icons{$type}
13231 # or $icon = $html_icons{$state} )
13232 for my $n ( 1 .. 2 ) {
13233 my $il = $ri_left->[$n];
13234 my $ir = $ri_right->[$n];
13235 foreach my $i ( $il + 1 .. $ir ) {
13236 my $type = $types_to_go[$i];
13238 if ( $is_assignment{$type}
13239 && $nesting_depth_to_go[$i] eq $depth_beg );
13243 # ok, insert any new break point
13244 if (@insert_list) {
13245 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
13250 { ## begin closure recombine_breakpoints
13252 # This routine is called once per batch to see if it would be better
13253 # to combine some of the lines into which the batch has been broken.
13265 @is_amp_amp{@q} = (1) x scalar(@q);
13268 @is_ternary{@q} = (1) x scalar(@q);
13270 @q = qw( + - * / );
13271 @is_math_op{@q} = (1) x scalar(@q);
13274 @is_plus_minus{@q} = (1) x scalar(@q);
13277 @is_mult_div{@q} = (1) x scalar(@q);
13280 sub Debug_dump_breakpoints {
13282 # Debug routine to dump current breakpoints...not normally called
13283 # We are given indexes to the current lines:
13284 # $ri_beg = ref to array of BEGinning indexes of each line
13285 # $ri_end = ref to array of ENDing indexes of each line
13286 my ( $self, $ri_beg, $ri_end, $msg ) = @_;
13287 print STDERR "----Dumping breakpoints from: $msg----\n";
13288 for my $n ( 0 .. @{$ri_end} - 1 ) {
13289 my $ibeg = $ri_beg->[$n];
13290 my $iend = $ri_end->[$n];
13292 foreach my $i ( $ibeg .. $iend ) {
13293 $text .= $tokens_to_go[$i];
13295 print STDERR "$n ($ibeg:$iend) $text\n";
13297 print STDERR "----\n";
13301 sub delete_one_line_semicolons {
13303 my ( $self, $ri_beg, $ri_end ) = @_;
13304 my $rLL = $self->[_rLL_];
13305 my $K_opening_container = $self->[_K_opening_container_];
13307 # Walk down the lines of this batch and delete any semicolons
13308 # terminating one-line blocks;
13309 my $nmax = @{$ri_end} - 1;
13311 foreach my $n ( 0 .. $nmax ) {
13312 my $i_beg = $ri_beg->[$n];
13313 my $i_e = $ri_end->[$n];
13314 my $K_beg = $K_to_go[$i_beg];
13315 my $K_e = $K_to_go[$i_e];
13317 my $type_end = $rLL->[$K_end]->[_TYPE_];
13318 if ( $type_end eq '#' ) {
13319 $K_end = $self->K_previous_nonblank($K_end);
13320 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
13323 # we are looking for a line ending in closing brace
13325 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
13327 # ...and preceded by a semicolon on the same line
13328 my $K_semicolon = $self->K_previous_nonblank($K_end);
13329 next unless defined($K_semicolon);
13330 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
13331 next if ( $i_semicolon <= $i_beg );
13332 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
13334 # Safety check - shouldn't happen - not critical
13335 # This is not worth throwing a Fault, except in DEVEL_MODE
13336 if ( $types_to_go[$i_semicolon] ne ';' ) {
13338 && Fault("unexpected type looking for semicolon");
13342 # ... with the corresponding opening brace on the same line
13343 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
13344 my $K_opening = $K_opening_container->{$type_sequence};
13345 next unless ( defined($K_opening) );
13346 my $i_opening = $i_beg + ( $K_opening - $K_beg );
13347 next if ( $i_opening < $i_beg );
13349 # ... and only one semicolon between these braces
13350 my $semicolon_count = 0;
13351 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
13352 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
13353 $semicolon_count++;
13357 next if ($semicolon_count);
13359 # ...ok, then make the semicolon invisible
13360 $tokens_to_go[$i_semicolon] = "";
13361 $token_lengths_to_go[$i_semicolon] = 0;
13362 $rLL->[$K_semicolon]->[_TOKEN_] = "";
13363 $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
13368 use constant DEBUG_RECOMBINE => 0;
13370 sub recombine_breakpoints {
13372 # sub set_continuation_breaks is very liberal in setting line breaks
13373 # for long lines, always setting breaks at good breakpoints, even
13374 # when that creates small lines. Sometimes small line fragments
13375 # are produced which would look better if they were combined.
13376 # That's the task of this routine.
13378 # We are given indexes to the current lines:
13379 # $ri_beg = ref to array of BEGinning indexes of each line
13380 # $ri_end = ref to array of ENDing indexes of each line
13381 my ( $self, $ri_beg, $ri_end ) = @_;
13383 my $rK_weld_right = $self->[_rK_weld_right_];
13384 my $rK_weld_left = $self->[_rK_weld_left_];
13386 # Make a list of all good joining tokens between the lines
13389 my $nmax = @{$ri_end} - 1;
13390 for my $n ( 1 .. $nmax ) {
13391 my $ibeg_1 = $ri_beg->[ $n - 1 ];
13392 my $iend_1 = $ri_end->[ $n - 1 ];
13393 my $iend_2 = $ri_end->[$n];
13394 my $ibeg_2 = $ri_beg->[$n];
13396 my ( $itok, $itokp, $itokm );
13398 foreach my $itest ( $iend_1, $ibeg_2 ) {
13399 my $type = $types_to_go[$itest];
13400 if ( $is_math_op{$type}
13401 || $is_amp_amp{$type}
13402 || $is_assignment{$type}
13408 $joint[$n] = [$itok];
13411 my $more_to_do = 1;
13413 # We keep looping over all of the lines of this batch
13414 # until there are no more possible recombinations
13415 my $nmax_last = @{$ri_end};
13417 while ($more_to_do) {
13420 my $nmax = @{$ri_end} - 1;
13422 # Safety check for infinite loop
13423 unless ( $nmax < $nmax_last ) {
13425 # Shouldn't happen because splice below decreases nmax on each
13426 # iteration. An error can only be due to a recent programming
13428 Fault("Program bug-infinite loop in recombine breakpoints\n");
13430 $nmax_last = $nmax;
13432 my $skip_Section_3;
13433 my $leading_amp_count = 0;
13434 my $this_line_is_semicolon_terminated;
13436 # loop over all remaining lines in this batch
13437 for my $iter ( 1 .. $nmax ) {
13439 # alternating sweep direction gives symmetric results
13440 # for recombining lines which exceed the line length
13441 # such as eval {{{{.... }}}}
13443 if ($reverse) { $n = 1 + $nmax - $iter; }
13444 else { $n = $iter }
13446 #----------------------------------------------------------
13447 # If we join the current pair of lines,
13448 # line $n-1 will become the left part of the joined line
13449 # line $n will become the right part of the joined line
13451 # Here are Indexes of the endpoint tokens of the two lines:
13453 # -----line $n-1--- | -----line $n-----
13454 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
13457 # We want to decide if we should remove the line break
13458 # between the tokens at $iend_1 and $ibeg_2
13460 # We will apply a number of ad-hoc tests to see if joining
13461 # here will look ok. The code will just issue a 'next'
13462 # command if the join doesn't look good. If we get through
13463 # the gauntlet of tests, the lines will be recombined.
13464 #----------------------------------------------------------
13466 # beginning and ending tokens of the lines we are working on
13467 my $ibeg_1 = $ri_beg->[ $n - 1 ];
13468 my $iend_1 = $ri_end->[ $n - 1 ];
13469 my $iend_2 = $ri_end->[$n];
13470 my $ibeg_2 = $ri_beg->[$n];
13471 my $ibeg_nmax = $ri_beg->[$nmax];
13473 # combined line cannot be too long
13474 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
13475 next if ( $excess > 0 );
13477 my $type_iend_1 = $types_to_go[$iend_1];
13478 my $type_iend_2 = $types_to_go[$iend_2];
13479 my $type_ibeg_1 = $types_to_go[$ibeg_1];
13480 my $type_ibeg_2 = $types_to_go[$ibeg_2];
13482 # terminal token of line 2 if any side comment is ignored:
13483 my $iend_2t = $iend_2;
13484 my $type_iend_2t = $type_iend_2;
13486 # some beginning indexes of other lines, which may not exist
13487 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
13488 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
13489 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
13493 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
13494 # $nesting_depth_to_go[$ibeg_1] );
13496 DEBUG_RECOMBINE && do {
13498 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
13501 # If line $n is the last line, we set some flags and
13502 # do any special checks for it
13503 if ( $n == $nmax ) {
13505 # a terminal '{' should stay where it is
13506 # unless preceded by a fat comma
13507 next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
13509 if ( $type_iend_2 eq '#'
13510 && $iend_2 - $ibeg_2 >= 2
13511 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
13513 $iend_2t = $iend_2 - 2;
13514 $type_iend_2t = $types_to_go[$iend_2t];
13517 $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
13520 #----------------------------------------------------------
13521 # Recombine Section 0:
13522 # Examine the special token joining this line pair, if any.
13523 # Put as many tests in this section to avoid duplicate code and
13524 # to make formatting independent of whether breaks are to the
13525 # left or right of an operator.
13526 #----------------------------------------------------------
13528 my ($itok) = @{ $joint[$n] };
13531 my $type = $types_to_go[$itok];
13533 if ( $type eq ':' ) {
13535 # do not join at a colon unless it disobeys the break
13537 if ( $itok eq $iend_1 ) {
13538 next unless $want_break_before{$type};
13541 $leading_amp_count++;
13542 next if $want_break_before{$type};
13546 # handle math operators + - * /
13547 elsif ( $is_math_op{$type} ) {
13549 # Combine these lines if this line is a single
13550 # number, or if it is a short term with same
13551 # operator as the previous line. For example, in
13552 # the following code we will combine all of the
13553 # short terms $A, $B, $C, $D, $E, $F, together
13554 # instead of leaving them one per line:
13556 # $A * $B * $C * $D * $E * $F *
13557 # ( 2. * $eps * $sigma * $area ) *
13558 # ( 1. / $tcold**3 - 1. / $thot**3 );
13560 # This can be important in math-intensive code.
13564 my $itokp = min( $inext_to_go[$itok], $iend_2 );
13565 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
13566 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
13567 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
13569 # check for a number on the right
13570 if ( $types_to_go[$itokp] eq 'n' ) {
13572 # ok if nothing else on right
13573 if ( $itokp == $iend_2 ) {
13578 # look one more token to right..
13579 # okay if math operator or some termination
13581 ( ( $itokpp == $iend_2 )
13582 && $is_math_op{ $types_to_go[$itokpp] } )
13583 || $types_to_go[$itokpp] =~ /^[#,;]$/;
13587 # check for a number on the left
13588 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
13590 # okay if nothing else to left
13591 if ( $itokm == $ibeg_1 ) {
13595 # otherwise look one more token to left
13598 # okay if math operator, comma, or assignment
13599 $good_combo = ( $itokmm == $ibeg_1 )
13600 && ( $is_math_op{ $types_to_go[$itokmm] }
13601 || $types_to_go[$itokmm] =~ /^[,]$/
13602 || $is_assignment{ $types_to_go[$itokmm] }
13607 # look for a single short token either side of the
13609 if ( !$good_combo ) {
13611 # Slight adjustment factor to make results
13612 # independent of break before or after operator in
13613 # long summed lists. (An operator and a space make
13615 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
13619 # numbers or id's on both sides of this joint
13620 $types_to_go[$itokp] =~ /^[in]$/
13621 && $types_to_go[$itokm] =~ /^[in]$/
13623 # one of the two lines must be short:
13626 # no more than 2 nonblank tokens right of
13631 && token_sequence_length( $itokp, $iend_2 )
13633 $rOpts_short_concatenation_item_length
13636 # no more than 2 nonblank tokens left of
13641 && token_sequence_length( $ibeg_1, $itokm )
13643 $rOpts_short_concatenation_item_length
13648 # keep pure terms; don't mix +- with */
13650 $is_plus_minus{$type}
13651 && ( $is_mult_div{ $types_to_go[$itokmm] }
13652 || $is_mult_div{ $types_to_go[$itokpp] } )
13655 $is_mult_div{$type}
13656 && ( $is_plus_minus{ $types_to_go[$itokmm] }
13657 || $is_plus_minus{ $types_to_go[$itokpp] } )
13663 # it is also good to combine if we can reduce to 2 lines
13664 if ( !$good_combo ) {
13666 # index on other line where same token would be in a
13669 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
13674 && $types_to_go[$iother] ne $type;
13677 next unless ($good_combo);
13681 elsif ( $is_amp_amp{$type} ) {
13685 elsif ( $is_assignment{$type} ) {
13687 } ## end assignment
13690 #----------------------------------------------------------
13691 # Recombine Section 1:
13692 # Join welded nested containers immediately
13693 #----------------------------------------------------------
13697 && ( $type_sequence_to_go[$iend_1]
13698 && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
13699 || $type_sequence_to_go[$ibeg_2]
13700 && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
13709 #----------------------------------------------------------
13710 # Recombine Section 2:
13711 # Examine token at $iend_1 (right end of first line of pair)
13712 #----------------------------------------------------------
13714 # an isolated '}' may join with a ';' terminated segment
13715 if ( $type_iend_1 eq '}' ) {
13717 # Check for cases where combining a semicolon terminated
13718 # statement with a previous isolated closing paren will
13719 # allow the combined line to be outdented. This is
13720 # generally a good move. For example, we can join up
13721 # the last two lines here:
13723 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
13724 # $size, $atime, $mtime, $ctime, $blksize, $blocks
13730 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
13731 # $size, $atime, $mtime, $ctime, $blksize, $blocks
13734 # which makes the parens line up.
13736 # Another example, from Joe Matarazzo, probably looks best
13737 # with the 'or' clause appended to the trailing paren:
13738 # $self->some_method(
13741 # ) or die "Some_method didn't work";
13743 # But we do not want to do this for something like the -lp
13744 # option where the paren is not outdentable because the
13745 # trailing clause will be far to the right.
13747 # The logic here is synchronized with the logic in sub
13748 # sub set_adjusted_indentation, which actually does
13751 $skip_Section_3 ||= $this_line_is_semicolon_terminated
13753 # only one token on last line
13754 && $ibeg_1 == $iend_1
13756 # must be structural paren
13757 && $tokens_to_go[$iend_1] eq ')'
13759 # style must allow outdenting,
13760 && !$closing_token_indentation{')'}
13762 # only leading '&&', '||', and ':' if no others seen
13763 # (but note: our count made below could be wrong
13764 # due to intervening comments)
13765 && ( $leading_amp_count == 0
13766 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
13768 # but leading colons probably line up with a
13769 # previous colon or question (count could be wrong).
13770 && $type_ibeg_2 ne ':'
13772 # only one step in depth allowed. this line must not
13773 # begin with a ')' itself.
13774 && ( $nesting_depth_to_go[$iend_1] ==
13775 $nesting_depth_to_go[$iend_2] + 1 );
13777 # YVES patch 2 of 2:
13778 # Allow cuddled eval chains, like this:
13785 # This patch works together with a patch in
13786 # setting adjusted indentation (where the closing eval
13787 # brace is outdented if possible).
13788 # The problem is that an 'eval' block has continuation
13789 # indentation and it looks better to undo it in some
13790 # cases. If we do not use this patch we would get:
13798 # The alternative, for uncuddled style, is to create
13799 # a patch in set_adjusted_indentation which undoes
13800 # the indentation of a leading line like 'or do {'.
13801 # This doesn't work well with -icb through
13803 $block_type_to_go[$iend_1] eq 'eval'
13804 && !$rOpts->{'line-up-parentheses'}
13805 && !$rOpts->{'indent-closing-brace'}
13806 && $tokens_to_go[$iend_2] eq '{'
13808 ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
13809 || ( $type_ibeg_2 eq 'k'
13810 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
13811 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
13815 $skip_Section_3 ||= 1;
13822 # handle '.' and '?' specially below
13823 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
13827 elsif ( $type_iend_1 eq '{' ) {
13830 # honor breaks at opening brace
13831 # Added to prevent recombining something like this:
13832 # } || eval { package main;
13833 next if $forced_breakpoint_to_go[$iend_1];
13836 # do not recombine lines with ending &&, ||,
13837 elsif ( $is_amp_amp{$type_iend_1} ) {
13838 next unless $want_break_before{$type_iend_1};
13841 # Identify and recombine a broken ?/: chain
13842 elsif ( $type_iend_1 eq '?' ) {
13844 # Do not recombine different levels
13846 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
13848 # do not recombine unless next line ends in :
13849 next unless $type_iend_2 eq ':';
13852 # for lines ending in a comma...
13853 elsif ( $type_iend_1 eq ',' ) {
13855 # Do not recombine at comma which is following the
13857 # TODO: might be best to make a special flag
13858 next if ( $old_breakpoint_to_go[$iend_1] );
13860 # An isolated '},' may join with an identifier + ';'
13861 # This is useful for the class of a 'bless' statement
13863 if ( $type_ibeg_1 eq '}'
13864 && $type_ibeg_2 eq 'i' )
13867 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
13868 && ( $iend_2 == ( $ibeg_2 + 1 ) )
13869 && $this_line_is_semicolon_terminated );
13871 # override breakpoint
13872 $forced_breakpoint_to_go[$iend_1] = 0;
13878 # do not recombine after a comma unless this will leave
13880 next unless ( $n + 1 >= $nmax );
13882 # do not recombine if there is a change in indentation depth
13885 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
13887 # do not recombine a "complex expression" after a
13888 # comma. "complex" means no parens.
13890 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
13891 if ( $tokens_to_go[$ii] eq '(' ) {
13896 next if $saw_paren;
13901 elsif ( $type_iend_1 eq '(' ) {
13903 # No longer doing this
13906 elsif ( $type_iend_1 eq ')' ) {
13908 # No longer doing this
13911 # keep a terminal for-semicolon
13912 elsif ( $type_iend_1 eq 'f' ) {
13916 # if '=' at end of line ...
13917 elsif ( $is_assignment{$type_iend_1} ) {
13919 # keep break after = if it was in input stream
13920 # this helps prevent 'blinkers'
13921 next if $old_breakpoint_to_go[$iend_1]
13923 # don't strand an isolated '='
13924 && $iend_1 != $ibeg_1;
13926 my $is_short_quote =
13927 ( $type_ibeg_2 eq 'Q'
13928 && $ibeg_2 == $iend_2
13929 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
13930 $rOpts_short_concatenation_item_length );
13932 ( $type_ibeg_1 eq '?'
13933 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
13935 # always join an isolated '=', a short quote, or if this
13936 # will put ?/: at start of adjacent lines
13937 if ( $ibeg_1 != $iend_1
13938 && !$is_short_quote
13945 # unless we can reduce this to two lines
13948 # or three lines, the last with a leading semicolon
13949 || ( $nmax == $n + 2
13950 && $types_to_go[$ibeg_nmax] eq ';' )
13952 # or the next line ends with a here doc
13953 || $type_iend_2 eq 'h'
13955 # or the next line ends in an open paren or brace
13956 # and the break hasn't been forced [dima.t]
13957 || ( !$forced_breakpoint_to_go[$iend_1]
13958 && $type_iend_2 eq '{' )
13961 # do not recombine if the two lines might align well
13962 # this is a very approximate test for this
13965 # RT#127633 - the leading tokens are not operators
13966 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
13968 # or they are different
13970 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
13976 # Recombine if we can make two lines
13979 # -lp users often prefer this:
13980 # my $title = function($env, $env, $sysarea,
13981 # "bubba Borrower Entry");
13982 # so we will recombine if -lp is used we have
13984 && ( !$rOpts_line_up_parentheses
13985 || $type_iend_2 ne ',' )
13989 # otherwise, scan the rhs line up to last token for
13990 # complexity. Note that we are not counting the last
13991 # token in case it is an opening paren.
13993 my $depth = $nesting_depth_to_go[$ibeg_2];
13994 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
13995 if ( $nesting_depth_to_go[$i] != $depth ) {
13997 last if ( $tv > 1 );
13999 $depth = $nesting_depth_to_go[$i];
14002 # ok to recombine if no level changes before last token
14005 # otherwise, do not recombine if more than two
14007 next if ( $tv > 1 );
14009 # check total complexity of the two adjacent lines
14010 # that will occur if we do this join
14013 ? $ri_end->[ $n + 1 ]
14015 foreach my $i ( $iend_2 .. $istop ) {
14016 if ( $nesting_depth_to_go[$i] != $depth ) {
14018 last if ( $tv > 2 );
14020 $depth = $nesting_depth_to_go[$i];
14023 # do not recombine if total is more than 2 level changes
14024 next if ( $tv > 2 );
14029 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
14030 $forced_breakpoint_to_go[$iend_1] = 0;
14035 elsif ( $type_iend_1 eq 'k' ) {
14037 # make major control keywords stand out
14042 #/^(last|next|redo|return)$/
14043 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
14045 # but only if followed by multiple lines
14049 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
14051 unless $want_break_before{ $tokens_to_go[$iend_1] };
14055 #----------------------------------------------------------
14056 # Recombine Section 3:
14057 # Examine token at $ibeg_2 (left end of second line of pair)
14058 #----------------------------------------------------------
14060 # join lines identified above as capable of
14061 # causing an outdented line with leading closing paren
14062 # Note that we are skipping the rest of this section
14063 # and the rest of the loop to do the join
14064 if ($skip_Section_3) {
14065 $forced_breakpoint_to_go[$iend_1] = 0;
14070 # handle lines with leading &&, ||
14071 elsif ( $is_amp_amp{$type_ibeg_2} ) {
14073 $leading_amp_count++;
14075 # ok to recombine if it follows a ? or :
14076 # and is followed by an open paren..
14078 ( $is_ternary{$type_ibeg_1}
14079 && $tokens_to_go[$iend_2] eq '(' )
14081 # or is followed by a ? or : at same depth
14083 # We are looking for something like this. We can
14084 # recombine the && line with the line above to make the
14085 # structure more clear:
14087 # exists $G->{Attr}->{V}
14088 # && exists $G->{Attr}->{V}->{$u}
14089 # ? %{ $G->{Attr}->{V}->{$u} }
14092 # We should probably leave something like this alone:
14094 # exists $G->{Attr}->{E}
14095 # && exists $G->{Attr}->{E}->{$u}
14096 # && exists $G->{Attr}->{E}->{$u}->{$v}
14097 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
14099 # so that we either have all of the &&'s (or ||'s)
14100 # on one line, as in the first example, or break at
14101 # each one as in the second example. However, it
14102 # sometimes makes things worse to check for this because
14103 # it prevents multiple recombinations. So this is not done.
14105 && $is_ternary{ $types_to_go[$ibeg_3] }
14106 && $nesting_depth_to_go[$ibeg_3] ==
14107 $nesting_depth_to_go[$ibeg_2] );
14109 next if !$ok && $want_break_before{$type_ibeg_2};
14110 $forced_breakpoint_to_go[$iend_1] = 0;
14112 # tweak the bond strength to give this joint priority
14117 # Identify and recombine a broken ?/: chain
14118 elsif ( $type_ibeg_2 eq '?' ) {
14120 # Do not recombine different levels
14121 my $lev = $levels_to_go[$ibeg_2];
14122 next if ( $lev ne $levels_to_go[$ibeg_1] );
14124 # Do not recombine a '?' if either next line or
14125 # previous line does not start with a ':'. The reasons
14126 # are that (1) no alignment of the ? will be possible
14127 # and (2) the expression is somewhat complex, so the
14128 # '?' is harder to see in the interior of the line.
14129 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
14130 my $precedes_colon =
14131 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
14132 next unless ( $follows_colon || $precedes_colon );
14134 # we will always combining a ? line following a : line
14135 if ( !$follows_colon ) {
14137 # ...otherwise recombine only if it looks like a chain.
14138 # we will just look at a few nearby lines to see if
14139 # this looks like a chain.
14140 my $local_count = 0;
14141 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
14144 && $types_to_go[$ii] eq ':'
14145 && $levels_to_go[$ii] == $lev;
14147 next unless ( $local_count > 1 );
14149 $forced_breakpoint_to_go[$iend_1] = 0;
14152 # do not recombine lines with leading '.'
14153 elsif ( $type_ibeg_2 eq '.' ) {
14154 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
14158 # ... unless there is just one and we can reduce
14159 # this to two lines if we do. For example, this
14163 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
14165 # looks better than this:
14166 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
14167 # . '$args .= $pat;'
14172 && $type_ibeg_1 ne $type_ibeg_2
14175 # ... or this would strand a short quote , like this
14176 # . "some long quote"
14179 || ( $types_to_go[$i_next_nonblank] eq 'Q'
14180 && $i_next_nonblank >= $iend_2 - 1
14181 && $token_lengths_to_go[$i_next_nonblank] <
14182 $rOpts_short_concatenation_item_length )
14186 # handle leading keyword..
14187 elsif ( $type_ibeg_2 eq 'k' ) {
14189 # handle leading "or"
14190 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
14193 $this_line_is_semicolon_terminated
14195 $type_ibeg_1 eq '}'
14198 # following 'if' or 'unless' or 'or'
14199 $type_ibeg_1 eq 'k'
14200 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
14202 # important: only combine a very simple or
14203 # statement because the step below may have
14204 # combined a trailing 'and' with this or,
14205 # and we do not want to then combine
14206 # everything together
14207 && ( $iend_2 - $ibeg_2 <= 7 )
14213 $forced_breakpoint_to_go[$iend_1] = 0
14214 unless $old_breakpoint_to_go[$iend_1];
14217 # handle leading 'and' and 'xor'
14218 elsif ($tokens_to_go[$ibeg_2] eq 'and'
14219 || $tokens_to_go[$ibeg_2] eq 'xor' )
14222 # Decide if we will combine a single terminal 'and'
14223 # after an 'if' or 'unless'.
14225 # This looks best with the 'and' on the same
14226 # line as the 'if':
14229 # if $seconds and $nu < 2;
14231 # But this looks better as shown:
14234 # if !$this->{Parents}{$_}
14235 # or $this->{Parents}{$_} eq $_;
14239 $this_line_is_semicolon_terminated
14242 # following 'if' or 'unless' or 'or'
14243 $type_ibeg_1 eq 'k'
14244 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
14245 || $tokens_to_go[$ibeg_1] eq 'or' )
14250 # handle leading "if" and "unless"
14251 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
14253 # Combine something like:
14255 # if ( $lang !~ /${l}$/i );
14257 # next if ( $lang !~ /${l}$/i );
14260 $this_line_is_semicolon_terminated
14262 # previous line begins with 'and' or 'or'
14263 && $type_ibeg_1 eq 'k'
14264 && $is_and_or{ $tokens_to_go[$ibeg_1] }
14269 # handle all other leading keywords
14272 # keywords look best at start of lines,
14273 # but combine things like "1 while"
14274 unless ( $is_assignment{$type_iend_1} ) {
14276 if ( ( $type_iend_1 ne 'k' )
14277 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
14282 # similar treatment of && and || as above for 'and' and 'or':
14283 # NOTE: This block of code is currently bypassed because
14284 # of a previous block but is retained for possible future use.
14285 elsif ( $is_amp_amp{$type_ibeg_2} ) {
14287 # maybe looking at something like:
14288 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
14292 $this_line_is_semicolon_terminated
14294 # previous line begins with an 'if' or 'unless' keyword
14295 && $type_ibeg_1 eq 'k'
14296 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
14301 # handle line with leading = or similar
14302 elsif ( $is_assignment{$type_ibeg_2} ) {
14303 next unless ( $n == 1 || $n == $nmax );
14304 next if $old_breakpoint_to_go[$iend_1];
14308 # unless we can reduce this to two lines
14311 # or three lines, the last with a leading semicolon
14312 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
14314 # or the next line ends with a here doc
14315 || $type_iend_2 eq 'h'
14317 # or this is a short line ending in ;
14318 || ( $n == $nmax && $this_line_is_semicolon_terminated )
14320 $forced_breakpoint_to_go[$iend_1] = 0;
14323 #----------------------------------------------------------
14324 # Recombine Section 4:
14325 # Combine the lines if we arrive here and it is possible
14326 #----------------------------------------------------------
14328 # honor hard breakpoints
14329 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
14331 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
14333 # Require a few extra spaces before recombining lines if we are
14334 # at an old breakpoint unless this is a simple list or terminal
14335 # line. The goal is to avoid oscillating between two
14336 # quasi-stable end states. For example this snippet caused
14340 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
14344 if ( $old_breakpoint_to_go[$iend_1]
14345 && !$this_line_is_semicolon_terminated
14348 && $type_iend_2 ne ',' );
14350 # do not recombine if we would skip in indentation levels
14351 if ( $n < $nmax ) {
14352 my $if_next = $ri_beg->[ $n + 1 ];
14355 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
14356 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
14358 # but an isolated 'if (' is undesirable
14361 && $iend_1 - $ibeg_1 <= 2
14362 && $type_ibeg_1 eq 'k'
14363 && $tokens_to_go[$ibeg_1] eq 'if'
14364 && $tokens_to_go[$iend_1] ne '('
14370 next if ( $bs >= NO_BREAK - 1 );
14372 # remember the pair with the greatest bond strength
14379 if ( $bs > $bs_best ) {
14386 # recombine the pair with the greatest bond strength
14388 splice @{$ri_beg}, $n_best, 1;
14389 splice @{$ri_end}, $n_best - 1, 1;
14390 splice @joint, $n_best, 1;
14392 # keep going if we are still making progress
14396 return ( $ri_beg, $ri_end );
14398 } ## end closure recombine_breakpoints
14400 sub insert_final_ternary_breaks {
14402 my ( $self, $ri_left, $ri_right ) = @_;
14404 # Called once per batch to look for and do any final line breaks for
14405 # long ternary chains
14407 my $nmax = @{$ri_right} - 1;
14409 # scan the left and right end tokens of all lines
14411 my $i_first_colon = -1;
14412 for my $n ( 0 .. $nmax ) {
14413 my $il = $ri_left->[$n];
14414 my $ir = $ri_right->[$n];
14415 my $typel = $types_to_go[$il];
14416 my $typer = $types_to_go[$ir];
14417 return if ( $typel eq '?' );
14418 return if ( $typer eq '?' );
14419 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
14420 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
14423 # For long ternary chains,
14424 # if the first : we see has its ? is in the interior
14425 # of a preceding line, then see if there are any good
14426 # breakpoints before the ?.
14427 if ( $i_first_colon > 0 ) {
14428 my $i_question = $mate_index_to_go[$i_first_colon];
14429 if ( $i_question > 0 ) {
14431 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
14432 my $token = $tokens_to_go[$ii];
14433 my $type = $types_to_go[$ii];
14435 # For now, a good break is either a comma or,
14436 # in a long chain, a 'return'.
14437 # Patch for RT #126633: added the $nmax>1 check to avoid
14438 # breaking after a return for a simple ternary. For longer
14439 # chains the break after return allows vertical alignment, so
14440 # it is still done. So perltidy -wba='?' will not break
14441 # immediately after the return in the following statement:
14443 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
14444 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
14449 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
14451 && $self->in_same_container_i( $ii, $i_question )
14454 push @insert_list, $ii;
14459 # insert any new break points
14460 if (@insert_list) {
14461 $self->insert_additional_breaks( \@insert_list, $ri_left,
14469 sub insert_breaks_before_list_opening_containers {
14471 my ( $self, $ri_left, $ri_right ) = @_;
14473 # This routine is called once per batch to implement the parameters
14474 # --break-before-hash-brace, etc.
14476 # Nothing to do if none of these parameters has been set
14477 return unless %break_before_container_types;
14479 my $nmax = @{$ri_right} - 1;
14480 return unless ( $nmax >= 0 );
14482 my $rLL = $self->[_rLL_];
14484 my $rbreak_before_container_by_seqno =
14485 $self->[_rbreak_before_container_by_seqno_];
14486 my $rK_weld_left = $self->[_rK_weld_left_];
14488 # scan the ends of all lines
14490 for my $n ( 0 .. $nmax ) {
14491 my $il = $ri_left->[$n];
14492 my $ir = $ri_right->[$n];
14493 next unless ( $ir > $il );
14494 my $Kl = $K_to_go[$il];
14495 my $Kr = $K_to_go[$ir];
14497 my $type_end = $rLL->[$Kr]->[_TYPE_];
14499 # Backup before any side comment
14500 if ( $type_end eq '#' ) {
14501 $Kend = $self->K_previous_nonblank($Kr);
14502 next unless defined($Kend);
14503 $type_end = $rLL->[$Kend]->[_TYPE_];
14506 # Backup to the start of any weld; fix for b1173.
14507 if ($total_weld_count) {
14508 my $Kend_test = $rK_weld_left->{$Kend};
14509 if ( defined($Kend_test) && $Kend_test > $Kl ) {
14510 $Kend = $Kend_test;
14511 $Kend_test = $rK_weld_left->{$Kend};
14514 # Do not break if we did not back up to the start of a weld
14515 # (shouldn't happen)
14516 next if ( defined($Kend_test) );
14519 my $token = $rLL->[$Kend]->[_TOKEN_];
14520 next unless ( $is_opening_token{$token} );
14521 next unless ( $Kl < $Kend - 1 );
14523 my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
14524 next unless ( defined($seqno) );
14526 # Use the flag which was previously set
14527 next unless ( $rbreak_before_container_by_seqno->{$seqno} );
14529 # Install a break before this opening token.
14530 my $Kbreak = $self->K_previous_nonblank($Kend);
14531 my $ibreak = $Kbreak - $Kl + $il;
14532 next if ( $ibreak < $il );
14533 next if ( $nobreak_to_go[$ibreak] );
14534 push @insert_list, $ibreak;
14537 # insert any new break points
14538 if (@insert_list) {
14539 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
14544 sub note_added_semicolon {
14545 my ( $self, $line_number ) = @_;
14546 $self->[_last_added_semicolon_at_] = $line_number;
14547 if ( $self->[_added_semicolon_count_] == 0 ) {
14548 $self->[_first_added_semicolon_at_] = $line_number;
14550 $self->[_added_semicolon_count_]++;
14551 write_logfile_entry("Added ';' here\n");
14555 sub note_deleted_semicolon {
14556 my ( $self, $line_number ) = @_;
14557 $self->[_last_deleted_semicolon_at_] = $line_number;
14558 if ( $self->[_deleted_semicolon_count_] == 0 ) {
14559 $self->[_first_deleted_semicolon_at_] = $line_number;
14561 $self->[_deleted_semicolon_count_]++;
14562 write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
14566 sub note_embedded_tab {
14567 my ( $self, $line_number ) = @_;
14568 $self->[_embedded_tab_count_]++;
14569 $self->[_last_embedded_tab_at_] = $line_number;
14570 if ( !$self->[_first_embedded_tab_at_] ) {
14571 $self->[_first_embedded_tab_at_] = $line_number;
14574 if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
14575 write_logfile_entry("Embedded tabs in quote or pattern\n");
14580 sub correct_lp_indentation {
14582 # When the -lp option is used, we need to make a last pass through
14583 # each line to correct the indentation positions in case they differ
14584 # from the predictions. This is necessary because perltidy uses a
14585 # predictor/corrector method for aligning with opening parens. The
14586 # predictor is usually good, but sometimes stumbles. The corrector
14587 # tries to patch things up once the actual opening paren locations
14589 my ( $self, $ri_first, $ri_last ) = @_;
14590 my $do_not_pad = 0;
14592 # Note on flag '$do_not_pad':
14593 # We want to avoid a situation like this, where the aligner inserts
14594 # whitespace before the '=' to align it with a previous '=', because
14595 # otherwise the parens might become mis-aligned in a situation like
14596 # this, where the '=' has become aligned with the previous line,
14597 # pushing the opening '(' forward beyond where we want it.
14599 # $mkFloor::currentRoom = '';
14600 # $mkFloor::c_entry = $c->Entry(
14602 # -relief => 'sunken',
14606 # We leave it to the aligner to decide how to do this.
14608 # first remove continuation indentation if appropriate
14609 my $max_line = @{$ri_first} - 1;
14611 # looking at each line of this batch..
14612 my ( $ibeg, $iend );
14613 foreach my $line ( 0 .. $max_line ) {
14614 $ibeg = $ri_first->[$line];
14615 $iend = $ri_last->[$line];
14617 # looking at each token in this output line..
14618 foreach my $i ( $ibeg .. $iend ) {
14620 # How many space characters to place before this token
14621 # for special alignment. Actual padding is done in the
14624 # looking for next unvisited indentation item
14625 my $indentation = $leading_spaces_to_go[$i];
14626 if ( !$indentation->get_marked() ) {
14627 $indentation->set_marked(1);
14629 # looking for indentation item for which we are aligning
14630 # with parens, braces, and brackets
14631 next unless ( $indentation->get_align_paren() );
14633 # skip closed container on this line
14634 if ( $i > $ibeg ) {
14635 my $im = max( $ibeg, $iprev_to_go[$i] );
14636 if ( $type_sequence_to_go[$im]
14637 && $mate_index_to_go[$im] <= $iend )
14643 if ( $line == 1 && $i == $ibeg ) {
14647 # Ok, let's see what the error is and try to fix it
14649 my $predicted_pos = $indentation->get_spaces();
14650 if ( $i > $ibeg ) {
14652 # token is mid-line - use length to previous token
14653 $actual_pos = total_line_length( $ibeg, $i - 1 );
14655 # for mid-line token, we must check to see if all
14656 # additional lines have continuation indentation,
14657 # and remove it if so. Otherwise, we do not get
14659 my $closing_index = $indentation->get_closed();
14660 if ( $closing_index > $iend ) {
14661 my $ibeg_next = $ri_first->[ $line + 1 ];
14662 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
14663 $self->undo_lp_ci( $line, $i, $closing_index,
14664 $ri_first, $ri_last );
14668 elsif ( $line > 0 ) {
14670 # handle case where token starts a new line;
14671 # use length of previous line
14672 my $ibegm = $ri_first->[ $line - 1 ];
14673 my $iendm = $ri_last->[ $line - 1 ];
14674 $actual_pos = total_line_length( $ibegm, $iendm );
14678 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
14682 # token is first character of first line of batch
14683 $actual_pos = $predicted_pos;
14686 my $move_right = $actual_pos - $predicted_pos;
14688 # done if no error to correct (gnu2.t)
14689 if ( $move_right == 0 ) {
14690 $indentation->set_recoverable_spaces($move_right);
14694 # if we have not seen closure for this indentation in
14695 # this batch, we can only pass on a request to the
14697 my $closing_index = $indentation->get_closed();
14699 if ( $closing_index < 0 ) {
14700 $indentation->set_recoverable_spaces($move_right);
14704 # If necessary, look ahead to see if there is really any
14705 # leading whitespace dependent on this whitespace, and
14706 # also find the longest line using this whitespace.
14707 # Since it is always safe to move left if there are no
14708 # dependents, we only need to do this if we may have
14709 # dependent nodes or need to move right.
14711 my $right_margin = 0;
14712 my $have_child = $indentation->get_have_child();
14714 my %saw_indentation;
14715 my $line_count = 1;
14716 $saw_indentation{$indentation} = $indentation;
14718 if ( $have_child || $move_right > 0 ) {
14720 my $max_length = 0;
14721 if ( $i == $ibeg ) {
14722 $max_length = total_line_length( $ibeg, $iend );
14725 # look ahead at the rest of the lines of this batch..
14726 foreach my $line_t ( $line + 1 .. $max_line ) {
14727 my $ibeg_t = $ri_first->[$line_t];
14728 my $iend_t = $ri_last->[$line_t];
14729 last if ( $closing_index <= $ibeg_t );
14731 # remember all different indentation objects
14732 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
14733 $saw_indentation{$indentation_t} = $indentation_t;
14736 # remember longest line in the group
14737 my $length_t = total_line_length( $ibeg_t, $iend_t );
14738 if ( $length_t > $max_length ) {
14739 $max_length = $length_t;
14743 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
14745 if ( $right_margin < 0 ) { $right_margin = 0 }
14748 my $first_line_comma_count =
14749 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
14750 my $comma_count = $indentation->get_comma_count();
14751 my $arrow_count = $indentation->get_arrow_count();
14753 # This is a simple approximate test for vertical alignment:
14754 # if we broke just after an opening paren, brace, bracket,
14755 # and there are 2 or more commas in the first line,
14756 # and there are no '=>'s,
14757 # then we are probably vertically aligned. We could set
14758 # an exact flag in sub scan_list, but this is good
14760 my $indentation_count = keys %saw_indentation;
14761 my $is_vertically_aligned =
14763 && $first_line_comma_count > 1
14764 && $indentation_count == 1
14765 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
14767 # Make the move if possible ..
14770 # we can always move left
14773 # but we should only move right if we are sure it will
14774 # not spoil vertical alignment
14775 || ( $comma_count == 0 )
14776 || ( $comma_count > 0 && !$is_vertically_aligned )
14780 ( $move_right <= $right_margin )
14784 foreach ( keys %saw_indentation ) {
14785 $saw_indentation{$_}
14786 ->permanently_decrease_available_spaces( -$move );
14790 # Otherwise, record what we want and the vertical aligner
14791 # will try to recover it.
14793 $indentation->set_recoverable_spaces($move_right);
14798 return $do_not_pad;
14803 # If there is a single, long parameter within parens, like this:
14805 # $self->command( "/msg "
14806 # . $infoline->chan
14807 # . " You said $1, but did you know that it's square was "
14808 # . $1 * $1 . " ?" );
14810 # we can remove the continuation indentation of the 2nd and higher lines
14811 # to achieve this effect, which is more pleasing:
14813 # $self->command("/msg "
14814 # . $infoline->chan
14815 # . " You said $1, but did you know that it's square was "
14816 # . $1 * $1 . " ?");
14818 my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
14820 my $max_line = @{$ri_first} - 1;
14822 # must be multiple lines
14823 return unless $max_line > $line_open;
14825 my $lev_start = $levels_to_go[$i_start];
14826 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
14828 # see if all additional lines in this container have continuation
14831 my $line_1 = 1 + $line_open;
14832 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
14833 my $ibeg = $ri_first->[$n];
14834 my $iend = $ri_last->[$n];
14835 if ( $ibeg eq $closing_index ) { $n--; last }
14836 return if ( $lev_start != $levels_to_go[$ibeg] );
14837 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
14838 last if ( $closing_index <= $iend );
14841 # we can reduce the indentation of all continuation lines
14842 my $continuation_line_count = $n - $line_open;
14843 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
14844 (0) x ($continuation_line_count);
14845 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
14846 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
14850 ###############################################
14851 # CODE SECTION 10: Code to break long statments
14852 ###############################################
14854 sub set_continuation_breaks {
14856 # Called once per batch to set breaks in long lines.
14858 # Define an array of indexes for inserting newline characters to
14859 # keep the line lengths below the maximum desired length. There is
14860 # an implied break after the last token, so it need not be included.
14863 # This routine is part of series of routines which adjust line
14864 # lengths. It is only called if a statement is longer than the
14865 # maximum line length, or if a preliminary scanning located
14866 # desirable break points. Sub scan_list has already looked at
14867 # these tokens and set breakpoints (in array
14868 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
14869 # after commas, after opening parens, and before closing parens).
14870 # This routine will honor these breakpoints and also add additional
14871 # breakpoints as necessary to keep the line length below the maximum
14872 # requested. It bases its decision on where the 'bond strength' is
14875 # Output: returns references to the arrays:
14878 # which contain the indexes $i of the first and last tokens on each
14881 # In addition, the array:
14882 # $forced_breakpoint_to_go[$i]
14883 # may be updated to be =1 for any index $i after which there must be
14884 # a break. This signals later routines not to undo the breakpoint.
14886 my ( $self, $saw_good_break, $rcolon_list ) = @_;
14888 # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
14891 use constant DEBUG_BREAKPOINTS => 0;
14893 my @i_first = (); # the first index to output
14894 my @i_last = (); # the last index to output
14895 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
14896 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
14898 $self->set_bond_strengths();
14901 my $imax = $max_index_to_go;
14902 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14903 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14904 my $i_begin = $imin; # index for starting next iteration
14906 my $leading_spaces = leading_spaces_to_go($imin);
14907 my $line_count = 0;
14908 my $last_break_strength = NO_BREAK;
14909 my $i_last_break = -1;
14910 my $max_bias = 0.001;
14911 my $tiny_bias = 0.0001;
14912 my $leading_alignment_token = "";
14913 my $leading_alignment_type = "";
14915 # see if any ?/:'s are in order
14916 my $colons_in_order = 1;
14918 foreach ( @{$rcolon_list} ) {
14919 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
14923 # This is a sufficient but not necessary condition for colon chain
14924 my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
14928 #-------------------------------------------------------
14929 # BEGINNING of main loop to set continuation breakpoints
14930 # Keep iterating until we reach the end
14931 #-------------------------------------------------------
14932 while ( $i_begin <= $imax ) {
14933 my $lowest_strength = NO_BREAK;
14934 my $starting_sum = $summed_lengths_to_go[$i_begin];
14937 my $lowest_next_token = '';
14938 my $lowest_next_type = 'b';
14939 my $i_lowest_next_nonblank = -1;
14940 my $maximum_line_length =
14941 $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
14943 #-------------------------------------------------------
14944 # BEGINNING of inner loop to find the best next breakpoint
14945 #-------------------------------------------------------
14946 my $strength = NO_BREAK;
14947 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
14948 my $type = $types_to_go[$i_test];
14949 my $token = $tokens_to_go[$i_test];
14950 my $next_type = $types_to_go[ $i_test + 1 ];
14951 my $next_token = $tokens_to_go[ $i_test + 1 ];
14952 my $i_next_nonblank = $inext_to_go[$i_test];
14953 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
14954 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14955 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
14957 # adjustments to the previous bond strength may have been made, and
14958 # we must keep the bond strength of a token and its following blank
14960 my $last_strength = $strength;
14961 $strength = $bond_strength_to_go[$i_test];
14962 if ( $type eq 'b' ) { $strength = $last_strength }
14964 # reduce strength a bit to break ties at an old comma breakpoint ...
14967 $old_breakpoint_to_go[$i_test]
14969 # Patch: limited to just commas to avoid blinking states
14972 # which is a 'good' breakpoint, meaning ...
14973 # we don't want to break before it
14974 && !$want_break_before{$type}
14976 # and either we want to break before the next token
14977 # or the next token is not short (i.e. not a '*', '/' etc.)
14978 && $i_next_nonblank <= $imax
14979 && ( $want_break_before{$next_nonblank_type}
14980 || $token_lengths_to_go[$i_next_nonblank] > 2
14981 || $next_nonblank_type eq ','
14982 || $is_opening_type{$next_nonblank_type} )
14985 $strength -= $tiny_bias;
14986 DEBUG_BREAKPOINTS && do { $Msg .= " :-bias at i=$i_test" };
14989 # otherwise increase strength a bit if this token would be at the
14990 # maximum line length. This is necessary to avoid blinking
14991 # in the above example when the -iob flag is added.
14995 $summed_lengths_to_go[ $i_test + 1 ] -
14997 if ( $len >= $maximum_line_length ) {
14998 $strength += $tiny_bias;
14999 DEBUG_BREAKPOINTS && do { $Msg .= " :+bias at i=$i_test" };
15003 my $must_break = 0;
15005 # Force an immediate break at certain operators
15006 # with lower level than the start of the line,
15007 # unless we've already seen a better break.
15009 ##############################################
15010 # Note on an issue with a preceding ?
15011 ##############################################
15012 # We don't include a ? in the above list, but there may
15013 # be a break at a previous ? if the line is long.
15014 # Because of this we do not want to force a break if
15015 # there is a previous ? on this line. For now the best way
15016 # to do this is to not break if we have seen a lower strength
15017 # point, which is probably a ?.
15019 # Example of unwanted breaks we are avoiding at a '.' following a ?
15020 # from pod2html using perltidy -gnu:
15022 # ? "\n<A NAME=\""
15024 # . "\">\n$text</A>\n"
15025 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
15027 ( $strength <= $lowest_strength )
15028 && ( $nesting_depth_to_go[$i_begin] >
15029 $nesting_depth_to_go[$i_next_nonblank] )
15031 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
15032 || ( $next_nonblank_type eq 'k'
15033 && $next_nonblank_token =~ /^(and|or)$/ )
15037 $self->set_forced_breakpoint($i_next_nonblank);
15039 && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
15044 # Try to put a break where requested by scan_list
15045 $forced_breakpoint_to_go[$i_test]
15047 # break between ) { in a continued line so that the '{' can
15049 # See similar logic in scan_list which catches instances
15050 # where a line is just something like ') {'. We have to
15051 # be careful because the corresponding block keyword might
15052 # not be on the first line, such as 'for' here:
15056 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
15062 && ( $token eq ')' )
15063 && ( $next_nonblank_type eq '{' )
15064 && ($next_nonblank_block_type)
15065 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
15067 # RT #104427: Dont break before opening sub brace because
15068 # sub block breaks handled at higher level, unless
15069 # it looks like the preceding list is long and broken
15071 $next_nonblank_block_type =~ /$ANYSUB_PATTERN/
15072 && ( $nesting_depth_to_go[$i_begin] ==
15073 $nesting_depth_to_go[$i_next_nonblank] )
15076 && !$rOpts->{'opening-brace-always-on-right'}
15079 # There is an implied forced break at a terminal opening brace
15080 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
15084 # Forced breakpoints must sometimes be overridden, for example
15085 # because of a side comment causing a NO_BREAK. It is easier
15086 # to catch this here than when they are set.
15087 if ( $strength < NO_BREAK - 1 ) {
15088 $strength = $lowest_strength - $tiny_bias;
15091 && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
15095 # quit if a break here would put a good terminal token on
15096 # the next line and we already have a possible break
15099 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
15103 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
15105 ) > $maximum_line_length
15109 if ( $i_lowest >= 0 ) {
15110 DEBUG_BREAKPOINTS && do {
15111 $Msg .= " :quit at good terminal='$next_nonblank_type'";
15117 # Avoid a break which would strand a single punctuation
15118 # token. For example, we do not want to strand a leading
15119 # '.' which is followed by a long quoted string.
15120 # But note that we do want to do this with -extrude (l=1)
15121 # so please test any changes to this code on -extrude.
15124 && ( $i_test == $i_begin )
15125 && ( $i_test < $imax )
15126 && ( $token eq $type )
15130 $summed_lengths_to_go[ $i_test + 1 ] -
15132 ) < $maximum_line_length
15136 $i_test = min( $imax, $inext_to_go[$i_test] );
15137 DEBUG_BREAKPOINTS && do {
15138 $Msg .= " :redo at i=$i_test";
15143 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
15146 # break at previous best break if it would have produced
15147 # a leading alignment of certain common tokens, and it
15148 # is different from the latest candidate break
15149 if ($leading_alignment_type) {
15150 DEBUG_BREAKPOINTS && do {
15152 " :last at leading_alignment='$leading_alignment_type'";
15157 # Force at least one breakpoint if old code had good
15158 # break It is only called if a breakpoint is required or
15159 # desired. This will probably need some adjustments
15160 # over time. A goal is to try to be sure that, if a new
15161 # side comment is introduced into formatted text, then
15162 # the same breakpoints will occur. scbreak.t
15164 $i_test == $imax # we are at the end
15165 && !get_forced_breakpoint_count()
15166 && $saw_good_break # old line had good break
15167 && $type =~ /^[#;\{]$/ # and this line ends in
15168 # ';' or side comment
15169 && $i_last_break < 0 # and we haven't made a break
15170 && $i_lowest >= 0 # and we saw a possible break
15171 && $i_lowest < $imax - 1 # (but not just before this ;)
15172 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
15176 DEBUG_BREAKPOINTS && do {
15177 $Msg .= " :last at good old break\n";
15182 # Do not skip past an important break point in a short final
15183 # segment. For example, without this check we would miss the
15184 # break at the final / in the following code:
15187 # ( $tau * $mass_pellet * $q_0 *
15188 # ( 1. - exp( -$t_stop / $tau ) ) -
15189 # 4. * $pi * $factor * $k_ice *
15190 # ( $t_melt - $t_ice ) *
15193 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
15197 && $i_lowest >= 0 # and we saw a possible break
15198 && $i_lowest < $i_test
15199 && $i_test > $imax - 2
15200 && $nesting_depth_to_go[$i_begin] >
15201 $nesting_depth_to_go[$i_lowest]
15202 && $lowest_strength < $last_break_strength - .5 * WEAK
15205 # Make this break for math operators for now
15206 my $ir = $inext_to_go[$i_lowest];
15207 my $il = $iprev_to_go[$ir];
15208 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
15209 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
15211 DEBUG_BREAKPOINTS && do {
15212 $Msg .= " :last-noskip_short";
15218 # Update the minimum bond strength location
15219 $lowest_strength = $strength;
15220 $i_lowest = $i_test;
15221 $lowest_next_token = $next_nonblank_token;
15222 $lowest_next_type = $next_nonblank_type;
15223 $i_lowest_next_nonblank = $i_next_nonblank;
15225 DEBUG_BREAKPOINTS && do {
15226 $Msg .= " :last-must_break";
15231 # set flags to remember if a break here will produce a
15232 # leading alignment of certain common tokens
15233 if ( $line_count > 0
15235 && ( $lowest_strength - $last_break_strength <= $max_bias )
15238 my $i_last_end = $iprev_to_go[$i_begin];
15239 my $tok_beg = $tokens_to_go[$i_begin];
15240 my $type_beg = $types_to_go[$i_begin];
15243 # check for leading alignment of certain tokens
15245 $tok_beg eq $next_nonblank_token
15246 && $is_chain_operator{$tok_beg}
15247 && ( $type_beg eq 'k'
15248 || $type_beg eq $tok_beg )
15249 && $nesting_depth_to_go[$i_begin] >=
15250 $nesting_depth_to_go[$i_next_nonblank]
15253 || ( $tokens_to_go[$i_last_end] eq $token
15254 && $is_chain_operator{$token}
15255 && ( $type eq 'k' || $type eq $token )
15256 && $nesting_depth_to_go[$i_last_end] >=
15257 $nesting_depth_to_go[$i_test] )
15260 $leading_alignment_token = $next_nonblank_token;
15261 $leading_alignment_type = $next_nonblank_type;
15266 my $too_long = ( $i_test >= $imax );
15267 if ( !$too_long ) {
15270 $summed_lengths_to_go[ $i_test + 2 ] -
15272 $too_long = $next_length > $maximum_line_length;
15274 # To prevent blinkers we will avoid leaving a token exactly at
15275 # the line length limit unless it is the last token or one of
15276 # several "good" types.
15278 # The following code was a blinker with -pbp before this
15280 ## $last_nonblank_token eq '('
15281 ## && $is_indirect_object_taker{ $paren_type
15282 ## [$paren_depth] }
15283 # The issue causing the problem is that if the
15284 # term [$paren_depth] gets broken across a line then
15285 # the whitespace routine doesn't see both opening and closing
15286 # brackets and will format like '[ $paren_depth ]'. This
15287 # leads to an oscillation in length depending if we break
15288 # before the closing bracket or not.
15290 && $i_test + 1 < $imax
15291 && $next_nonblank_type ne ','
15292 && !$is_closing_type{$next_nonblank_type} )
15294 $too_long = $next_length >= $maximum_line_length;
15295 DEBUG_BREAKPOINTS && do {
15296 $Msg .= " :too_long=$too_long" if ($too_long);
15301 DEBUG_BREAKPOINTS && do {
15303 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
15304 my $i_testp2 = $i_test + 2;
15305 if ( $i_testp2 > $max_index_to_go + 1 ) {
15306 $i_testp2 = $max_index_to_go + 1;
15308 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
15309 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
15311 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
15314 # allow one extra terminal token after exceeding line length
15315 # if it would strand this token.
15316 if ( $rOpts_fuzzy_line_length
15318 && $i_lowest == $i_test
15319 && $token_lengths_to_go[$i_test] > 1
15320 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
15324 DEBUG_BREAKPOINTS && do {
15325 $Msg .= " :do_not_strand next='$next_nonblank_type'";
15329 # we are done if...
15332 # ... no more space and we have a break
15333 $too_long && $i_lowest >= 0
15335 # ... or no more tokens
15336 || $i_test == $imax
15339 DEBUG_BREAKPOINTS && do {
15341 " :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
15347 #-------------------------------------------------------
15348 # END of inner loop to find the best next breakpoint
15349 # Now decide exactly where to put the breakpoint
15350 #-------------------------------------------------------
15352 # it's always ok to break at imax if no other break was found
15353 if ( $i_lowest < 0 ) { $i_lowest = $imax }
15355 # semi-final index calculation
15356 my $i_next_nonblank = $inext_to_go[$i_lowest];
15357 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
15358 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15360 #-------------------------------------------------------
15361 # ?/: rule 1 : if a break here will separate a '?' on this
15362 # line from its closing ':', then break at the '?' instead.
15363 #-------------------------------------------------------
15364 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
15365 next unless ( $tokens_to_go[$i] eq '?' );
15367 # do not break if probable sequence of ?/: statements
15368 next if ($is_colon_chain);
15370 # do not break if statement is broken by side comment
15372 if ( $tokens_to_go[$max_index_to_go] eq '#'
15373 && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
15375 # no break needed if matching : is also on the line
15377 if ( $mate_index_to_go[$i] >= 0
15378 && $mate_index_to_go[$i] <= $i_next_nonblank );
15381 if ( $want_break_before{'?'} ) { $i_lowest-- }
15385 #-------------------------------------------------------
15386 # END of inner loop to find the best next breakpoint:
15387 # Break the line after the token with index i=$i_lowest
15388 #-------------------------------------------------------
15390 # final index calculation
15391 $i_next_nonblank = $inext_to_go[$i_lowest];
15392 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15393 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15397 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
15400 #-------------------------------------------------------
15401 # ?/: rule 2 : if we break at a '?', then break at its ':'
15403 # Note: this rule is also in sub scan_list to handle a break
15404 # at the start and end of a line (in case breaks are dictated
15405 # by side comments).
15406 #-------------------------------------------------------
15407 if ( $next_nonblank_type eq '?' ) {
15408 $self->set_closing_breakpoint($i_next_nonblank);
15410 elsif ( $types_to_go[$i_lowest] eq '?' ) {
15411 $self->set_closing_breakpoint($i_lowest);
15414 #-------------------------------------------------------
15415 # ?/: rule 3 : if we break at a ':' then we save
15416 # its location for further work below. We may need to go
15417 # back and break at its '?'.
15418 #-------------------------------------------------------
15419 if ( $next_nonblank_type eq ':' ) {
15420 push @i_colon_breaks, $i_next_nonblank;
15422 elsif ( $types_to_go[$i_lowest] eq ':' ) {
15423 push @i_colon_breaks, $i_lowest;
15426 # here we should set breaks for all '?'/':' pairs which are
15427 # separated by this line
15431 # save this line segment, after trimming blanks at the ends
15433 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
15435 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
15437 # set a forced breakpoint at a container opening, if necessary, to
15438 # signal a break at a closing container. Excepting '(' for now.
15441 $tokens_to_go[$i_lowest] eq '{'
15442 || $tokens_to_go[$i_lowest] eq '['
15444 && !$forced_breakpoint_to_go[$i_lowest]
15447 $self->set_closing_breakpoint($i_lowest);
15450 # get ready to go again
15451 $i_begin = $i_lowest + 1;
15452 $last_break_strength = $lowest_strength;
15453 $i_last_break = $i_lowest;
15454 $leading_alignment_token = "";
15455 $leading_alignment_type = "";
15456 $lowest_next_token = '';
15457 $lowest_next_type = 'b';
15459 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
15463 # update indentation size
15464 if ( $i_begin <= $imax ) {
15465 $leading_spaces = leading_spaces_to_go($i_begin);
15468 "updating leading spaces to be $leading_spaces at i=$i_begin\n";
15472 #-------------------------------------------------------
15473 # END of main loop to set continuation breakpoints
15474 # Now go back and make any necessary corrections
15475 #-------------------------------------------------------
15477 #-------------------------------------------------------
15478 # ?/: rule 4 -- if we broke at a ':', then break at
15479 # corresponding '?' unless this is a chain of ?: expressions
15480 #-------------------------------------------------------
15481 if (@i_colon_breaks) {
15483 # using a simple method for deciding if we are in a ?/: chain --
15484 # this is a chain if it has multiple ?/: pairs all in order;
15486 # Note that if line starts in a ':' we count that above as a break
15487 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
15489 unless ($is_chain) {
15490 my @insert_list = ();
15491 foreach (@i_colon_breaks) {
15492 my $i_question = $mate_index_to_go[$_];
15493 if ( $i_question >= 0 ) {
15494 if ( $want_break_before{'?'} ) {
15495 $i_question = $iprev_to_go[$i_question];
15498 if ( $i_question >= 0 ) {
15499 push @insert_list, $i_question;
15502 $self->insert_additional_breaks( \@insert_list, \@i_first,
15507 return ( \@i_first, \@i_last );
15510 ###########################################
15511 # CODE SECTION 11: Code to break long lists
15512 ###########################################
15514 { ## begin closure scan_list
15516 # These routines and variables are involved in finding good
15517 # places to break long lists.
15520 $block_type, $current_depth,
15522 $i_last_nonblank_token, $last_colon_sequence_number,
15523 $last_nonblank_token, $last_nonblank_type,
15524 $last_nonblank_block_type, $last_old_breakpoint_count,
15525 $minimum_depth, $next_nonblank_block_type,
15526 $next_nonblank_token, $next_nonblank_type,
15527 $old_breakpoint_count, $starting_breakpoint_count,
15528 $starting_depth, $token,
15529 $type, $type_sequence,
15533 @breakpoint_stack, @breakpoint_undo_stack,
15534 @comma_index, @container_type,
15535 @identifier_count_stack, @index_before_arrow,
15536 @interrupted_list, @item_count_stack,
15537 @last_comma_index, @last_dot_index,
15538 @last_nonblank_type, @old_breakpoint_count_stack,
15539 @opening_structure_index_stack, @rfor_semicolon_list,
15540 @has_old_logical_breakpoints, @rand_or_list,
15541 @i_equals, @override_cab3,
15542 @type_sequence_stack,
15545 # these arrays must retain values between calls
15546 my ( @has_broken_sublist, @dont_align, @want_comma_break );
15549 my $length_tol_boost;
15551 sub initialize_scan_list {
15553 @has_broken_sublist = ();
15554 @want_comma_break = ();
15556 ####################################################
15557 # Set tolerances to prevent formatting instabilities
15558 ####################################################
15560 # Define tolerances to use when checking if closed
15561 # containers will fit on one line. This is necessary to avoid
15562 # formatting instability. The basic tolerance is based on the
15565 # - Always allow for at least one extra space after a closing token so
15566 # that we do not strand a comma or semicolon. (oneline.t).
15568 # - Use an increased line length tolerance when -ci > -i to avoid
15569 # blinking states (case b923 and others).
15571 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
15573 # In addition, it may be necessary to use a few extra tolerance spaces
15574 # when -lp is used and/or when -xci is used. The history of this
15575 # so far is as follows:
15577 # FIX1: At least 3 characters were been found to be required for -lp
15578 # to fixes cases b1059 b1063 b1117.
15580 # FIX2: Further testing showed that we need a total of 3 extra spaces
15581 # when -lp is set for non-lists, and at least 2 spaces when -lp and
15583 # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
15584 # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
15587 # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
15588 # 'find_token_starting_list' to go back before an initial blank space.
15589 # This fixed these three cases, and allowed the tolerances to be
15590 # reduced to continue to fix all other known cases of instability.
15591 # This gives the current tolerance formulation (note that
15592 # variable $length_tol_boost is always 0 now):
15594 $length_tol_boost = 0;
15595 if ($rOpts_line_up_parentheses) {
15597 if ( $rOpts->{'extended-continuation-indentation'} ) {
15599 $length_tol_boost = 0; # was 1 for FIX2, 0 for FIX3
15602 $length_tol_boost = 0; # was 3 for FIX2, 0 for FIX3
15606 # The -xci option alone also needs a slightly larger tol for non-lists
15607 elsif ( $rOpts->{'extended-continuation-indentation'} ) {
15608 $length_tol_boost = 0; # was 1 for FIX2, 0 for FIX3
15613 # routine to define essential variables when we go 'up' to
15615 sub check_for_new_minimum_depth {
15617 if ( $depth < $minimum_depth ) {
15619 $minimum_depth = $depth;
15621 # these arrays need not retain values between calls
15622 $breakpoint_stack[$depth] = $starting_breakpoint_count;
15623 $container_type[$depth] = "";
15624 $identifier_count_stack[$depth] = 0;
15625 $index_before_arrow[$depth] = -1;
15626 $interrupted_list[$depth] = 1;
15627 $item_count_stack[$depth] = 0;
15628 $last_nonblank_type[$depth] = "";
15629 $opening_structure_index_stack[$depth] = -1;
15631 $breakpoint_undo_stack[$depth] = undef;
15632 $comma_index[$depth] = undef;
15633 $last_comma_index[$depth] = undef;
15634 $last_dot_index[$depth] = undef;
15635 $old_breakpoint_count_stack[$depth] = undef;
15636 $has_old_logical_breakpoints[$depth] = 0;
15637 $rand_or_list[$depth] = [];
15638 $rfor_semicolon_list[$depth] = [];
15639 $i_equals[$depth] = -1;
15641 # these arrays must retain values between calls
15642 if ( !defined( $has_broken_sublist[$depth] ) ) {
15643 $dont_align[$depth] = 0;
15644 $has_broken_sublist[$depth] = 0;
15645 $want_comma_break[$depth] = 0;
15651 # routine to decide which commas to break at within a container;
15653 # $bp_count = number of comma breakpoints set
15654 # $do_not_break_apart = a flag indicating if container need not
15656 sub set_comma_breakpoints {
15658 my ( $self, $dd ) = @_;
15660 my $do_not_break_apart = 0;
15663 if ( $item_count_stack[$dd] ) {
15665 # handle commas not in containers...
15666 if ( $dont_align[$dd] ) {
15667 $self->do_uncontained_comma_breaks($dd);
15670 # handle commas within containers...
15672 my $fbc = get_forced_breakpoint_count();
15674 # always open comma lists not preceded by keywords,
15675 # barewords, identifiers (that is, anything that doesn't
15676 # look like a function call)
15677 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
15679 $self->set_comma_breakpoints_do(
15682 i_opening_paren => $opening_structure_index_stack[$dd],
15683 i_closing_paren => $i,
15684 item_count => $item_count_stack[$dd],
15685 identifier_count => $identifier_count_stack[$dd],
15686 rcomma_index => $comma_index[$dd],
15687 next_nonblank_type => $next_nonblank_type,
15688 list_type => $container_type[$dd],
15689 interrupted => $interrupted_list[$dd],
15690 rdo_not_break_apart => \$do_not_break_apart,
15691 must_break_open => $must_break_open,
15692 has_broken_sublist => $has_broken_sublist[$dd],
15695 $bp_count = get_forced_breakpoint_count() - $fbc;
15696 $do_not_break_apart = 0 if $must_break_open;
15699 return ( $bp_count, $do_not_break_apart );
15702 # These types are excluded at breakpoints to prevent blinking
15703 my %is_uncontained_comma_break_excluded_type;
15706 my @q = qw< L { ( [ ? : + - >;
15707 @is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
15710 sub do_uncontained_comma_breaks {
15712 # Handle commas not in containers...
15713 # This is a catch-all routine for commas that we
15714 # don't know what to do with because the don't fall
15715 # within containers. We will bias the bond strength
15716 # to break at commas which ended lines in the input
15717 # file. This usually works better than just trying
15718 # to put as many items on a line as possible. A
15719 # downside is that if the input file is garbage it
15720 # won't work very well. However, the user can always
15721 # prevent following the old breakpoints with the
15723 my ( $self, $dd ) = @_;
15725 my $old_comma_break_count = 0;
15726 foreach my $ii ( @{ $comma_index[$dd] } ) {
15727 if ( $old_breakpoint_to_go[$ii] ) {
15728 $old_comma_break_count++;
15729 $bond_strength_to_go[$ii] = $bias;
15731 # reduce bias magnitude to force breaks in order
15736 # Also put a break before the first comma if
15737 # (1) there was a break there in the input, and
15738 # (2) there was exactly one old break before the first comma break
15739 # (3) OLD: there are multiple old comma breaks
15740 # (3) NEW: there are one or more old comma breaks (see return example)
15741 # (4) the first comma is at the starting level ...
15742 # ... fixes cases b064 b065 b068 b210 b747
15744 # For example, we will follow the user and break after
15745 # 'print' in this snippet:
15747 # "conformability (Not the same dimension)\n",
15748 # "\t", $have, " is ", text_unit($hu), "\n",
15749 # "\t", $want, " is ", text_unit($wu), "\n",
15752 # Another example, just one comma, where we will break after
15755 # $x * cos($a) - $y * sin($a),
15756 # $x * sin($a) + $y * cos($a);
15758 # Breaking a print statement:
15760 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
15761 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
15763 # But we will not force a break after the opening paren here
15764 # (causes a blinker):
15765 # $heap->{stream}->set_output_filter(
15766 # poe::filter::reference->new('myotherfreezer') ),
15769 my $i_first_comma = $comma_index[$dd]->[0];
15770 my $level_comma = $levels_to_go[$i_first_comma];
15771 if ( $old_breakpoint_to_go[$i_first_comma]
15772 && $level_comma == $levels_to_go[0] )
15776 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
15777 if ( $old_breakpoint_to_go[$ii] ) {
15779 last if ( $obp_count > 1 );
15781 if ( $levels_to_go[$ii] == $level_comma );
15785 # Changed rule from multiple old commas to just one here:
15786 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
15788 my $ibreakm = $ibreak;
15789 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
15790 if ( $ibreakm >= 0 ) {
15792 # In order to avoid blinkers we have to be fairly
15795 # Rule 1: Do not to break before an opening token
15796 # Rule 2: avoid breaking at ternary operators
15797 # (see b931, which is similar to the above print example)
15798 # Rule 3: Do not break at chain operators to fix case b1119
15799 # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
15801 # Be sure to test any changes to these rules against runs
15802 # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
15805 my $typem = $types_to_go[$ibreakm];
15806 if ( !$is_uncontained_comma_break_excluded_type{$typem} ) {
15807 $self->set_forced_breakpoint($ibreak);
15815 my %is_logical_container;
15819 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
15820 @is_logical_container{@q} = (1) x scalar(@q);
15822 # This filter will allow most tokens to skip past a section of code
15823 %quick_filter = %is_assignment;
15824 @q = qw# => . ; < > ~ #;
15826 @quick_filter{@q} = (1) x scalar(@q);
15829 sub set_for_semicolon_breakpoints {
15830 my ( $self, $dd ) = @_;
15831 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
15832 $self->set_forced_breakpoint($_);
15837 sub set_logical_breakpoints {
15838 my ( $self, $dd ) = @_;
15840 $item_count_stack[$dd] == 0
15841 && $is_logical_container{ $container_type[$dd] }
15843 || $has_old_logical_breakpoints[$dd]
15847 # Look for breaks in this order:
15850 foreach my $i ( 0 .. 3 ) {
15851 if ( $rand_or_list[$dd][$i] ) {
15852 foreach ( @{ $rand_or_list[$dd][$i] } ) {
15853 $self->set_forced_breakpoint($_);
15856 # break at any 'if' and 'unless' too
15857 foreach ( @{ $rand_or_list[$dd][4] } ) {
15858 $self->set_forced_breakpoint($_);
15860 $rand_or_list[$dd] = [];
15868 sub is_unbreakable_container {
15870 # never break a container of one of these types
15871 # because bad things can happen (map1.t)
15873 return $is_sort_map_grep{ $container_type[$dd] };
15878 my ( $self, $is_long_line ) = @_;
15880 # This routine is responsible for setting line breaks for all lists,
15881 # so that hierarchical structure can be displayed and so that list
15882 # items can be vertically aligned. The output of this routine is
15883 # stored in the array @forced_breakpoint_to_go, which is used to set
15884 # final breakpoints.
15886 # It is called once per batch if the batch is a list.
15887 my $rLL = $self->[_rLL_];
15888 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
15889 my $ris_broken_container = $self->[_ris_broken_container_];
15890 my $rbreak_before_container_by_seqno =
15891 $self->[_rbreak_before_container_by_seqno_];
15893 $starting_depth = $nesting_depth_to_go[0];
15896 $current_depth = $starting_depth;
15898 $last_colon_sequence_number = -1;
15899 $last_nonblank_token = ';';
15900 $last_nonblank_type = ';';
15901 $last_nonblank_block_type = ' ';
15902 $last_old_breakpoint_count = 0;
15903 $minimum_depth = $current_depth + 1; # forces update in check below
15904 $old_breakpoint_count = 0;
15905 $starting_breakpoint_count = get_forced_breakpoint_count();
15908 $type_sequence = '';
15910 my $total_depth_variation = 0;
15911 my $i_old_assignment_break;
15912 my $depth_last = $starting_depth;
15914 check_for_new_minimum_depth($current_depth);
15916 my $want_previous_breakpoint = -1;
15918 my $saw_good_breakpoint;
15919 my $i_line_end = -1;
15920 my $i_line_start = -1;
15922 # loop over all tokens in this batch
15923 while ( ++$i <= $max_index_to_go ) {
15924 if ( $type ne 'b' ) {
15925 $i_last_nonblank_token = $i - 1;
15926 $last_nonblank_type = $type;
15927 $last_nonblank_token = $token;
15928 $last_nonblank_block_type = $block_type;
15929 } ## end if ( $type ne 'b' )
15930 $type = $types_to_go[$i];
15931 $block_type = $block_type_to_go[$i];
15932 $token = $tokens_to_go[$i];
15933 $type_sequence = $type_sequence_to_go[$i];
15934 my $next_type = $types_to_go[ $i + 1 ];
15935 my $next_token = $tokens_to_go[ $i + 1 ];
15936 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15937 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15938 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15939 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15941 # set break if flag was set
15942 if ( $want_previous_breakpoint >= 0 ) {
15943 $self->set_forced_breakpoint($want_previous_breakpoint);
15944 $want_previous_breakpoint = -1;
15947 $last_old_breakpoint_count = $old_breakpoint_count;
15949 # Fixed for case b1097 to not consider old breaks at highly
15950 # stressed locations, such as types 'L' and 'R'. It might be
15951 # useful to generalize this concept in the future by looking at
15952 # actual bond strengths.
15953 if ( $old_breakpoint_to_go[$i]
15955 && $next_nonblank_type ne 'R' )
15958 $i_line_start = $i_next_nonblank;
15960 $old_breakpoint_count++;
15962 # Break before certain keywords if user broke there and
15963 # this is a 'safe' break point. The idea is to retain
15964 # any preferred breaks for sequential list operations,
15965 # like a schwartzian transform.
15966 if ($rOpts_break_at_old_keyword_breakpoints) {
15968 $next_nonblank_type eq 'k'
15969 && $is_keyword_returning_list{$next_nonblank_token}
15970 && ( $type =~ /^[=\)\]\}Riw]$/
15972 && $is_keyword_returning_list{$token} )
15976 # we actually have to set this break next time through
15977 # the loop because if we are at a closing token (such
15978 # as '}') which forms a one-line block, this break might
15981 # And do not do this at an equals if the user wants
15982 # breaks before an equals (blinker cases b434 b903)
15983 unless ( $type eq '=' && $want_break_before{$type} ) {
15984 $want_previous_breakpoint = $i;
15986 } ## end if ( $next_nonblank_type...)
15987 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
15989 # Break before attributes if user broke there
15990 if ($rOpts_break_at_old_attribute_breakpoints) {
15991 if ( $next_nonblank_type eq 'A' ) {
15992 $want_previous_breakpoint = $i;
15996 # remember an = break as possible good break point
15997 if ( $is_assignment{$type} ) {
15998 $i_old_assignment_break = $i;
16000 elsif ( $is_assignment{$next_nonblank_type} ) {
16001 $i_old_assignment_break = $i_next_nonblank;
16003 } ## end if ( $old_breakpoint_to_go...)
16005 next if ( $type eq 'b' );
16006 $depth = $nesting_depth_to_go[ $i + 1 ];
16008 $total_depth_variation += abs( $depth - $depth_last );
16009 $depth_last = $depth;
16011 # safety check - be sure we always break after a comment
16012 # Shouldn't happen .. an error here probably means that the
16013 # nobreak flag did not get turned off correctly during
16015 if ( $type eq '#' ) {
16016 if ( $i != $max_index_to_go ) {
16018 "Non-fatal program bug: backup logic required to break after a comment\n"
16020 report_definite_bug();
16021 $nobreak_to_go[$i] = 0;
16022 $self->set_forced_breakpoint($i);
16023 } ## end if ( $i != $max_index_to_go)
16024 } ## end if ( $type eq '#' )
16026 # Force breakpoints at certain tokens in long lines.
16027 # Note that such breakpoints will be undone later if these tokens
16028 # are fully contained within parens on a line.
16031 # break before a keyword within a line
16035 # if one of these keywords:
16036 # /^(if|unless|while|until|for)$/
16037 && $is_if_unless_while_until_for{$token}
16039 # but do not break at something like '1 while'
16040 && ( $last_nonblank_type ne 'n' || $i > 2 )
16042 # and let keywords follow a closing 'do' brace
16043 && $last_nonblank_block_type ne 'do'
16048 # or container is broken (by side-comment, etc)
16049 || ( $next_nonblank_token eq '('
16050 && $mate_index_to_go[$i_next_nonblank] < $i )
16054 $self->set_forced_breakpoint( $i - 1 );
16055 } ## end if ( $type eq 'k' && $i...)
16057 # remember locations of '||' and '&&' for possible breaks if we
16058 # decide this is a long logical expression.
16059 if ( $type eq '||' ) {
16060 push @{ $rand_or_list[$depth][2] }, $i;
16061 ++$has_old_logical_breakpoints[$depth]
16062 if ( ( $i == $i_line_start || $i == $i_line_end )
16063 && $rOpts_break_at_old_logical_breakpoints );
16064 } ## end elsif ( $type eq '||' )
16065 elsif ( $type eq '&&' ) {
16066 push @{ $rand_or_list[$depth][3] }, $i;
16067 ++$has_old_logical_breakpoints[$depth]
16068 if ( ( $i == $i_line_start || $i == $i_line_end )
16069 && $rOpts_break_at_old_logical_breakpoints );
16070 } ## end elsif ( $type eq '&&' )
16071 elsif ( $type eq 'f' ) {
16072 push @{ $rfor_semicolon_list[$depth] }, $i;
16074 elsif ( $type eq 'k' ) {
16075 if ( $token eq 'and' ) {
16076 push @{ $rand_or_list[$depth][1] }, $i;
16077 ++$has_old_logical_breakpoints[$depth]
16078 if ( ( $i == $i_line_start || $i == $i_line_end )
16079 && $rOpts_break_at_old_logical_breakpoints );
16080 } ## end if ( $token eq 'and' )
16082 # break immediately at 'or's which are probably not in a logical
16083 # block -- but we will break in logical breaks below so that
16084 # they do not add to the forced_breakpoint_count
16085 elsif ( $token eq 'or' ) {
16086 push @{ $rand_or_list[$depth][0] }, $i;
16087 ++$has_old_logical_breakpoints[$depth]
16088 if ( ( $i == $i_line_start || $i == $i_line_end )
16089 && $rOpts_break_at_old_logical_breakpoints );
16090 if ( $is_logical_container{ $container_type[$depth] } ) {
16093 if ($is_long_line) { $self->set_forced_breakpoint($i) }
16094 elsif ( ( $i == $i_line_start || $i == $i_line_end )
16095 && $rOpts_break_at_old_logical_breakpoints )
16097 $saw_good_breakpoint = 1;
16099 } ## end else [ if ( $is_logical_container...)]
16100 } ## end elsif ( $token eq 'or' )
16101 elsif ( $token eq 'if' || $token eq 'unless' ) {
16102 push @{ $rand_or_list[$depth][4] }, $i;
16103 if ( ( $i == $i_line_start || $i == $i_line_end )
16104 && $rOpts_break_at_old_logical_breakpoints )
16106 $self->set_forced_breakpoint($i);
16108 } ## end elsif ( $token eq 'if' ||...)
16109 } ## end elsif ( $type eq 'k' )
16110 elsif ( $is_assignment{$type} ) {
16111 $i_equals[$depth] = $i;
16114 if ($type_sequence) {
16116 # handle any postponed closing breakpoints
16117 if ( $is_closing_sequence_token{$token} ) {
16118 if ( $type eq ':' ) {
16119 $last_colon_sequence_number = $type_sequence;
16121 # retain break at a ':' line break
16122 if ( ( $i == $i_line_start || $i == $i_line_end )
16123 && $rOpts_break_at_old_ternary_breakpoints )
16126 $self->set_forced_breakpoint($i);
16128 # break at previous '='
16129 if ( $i_equals[$depth] > 0 ) {
16130 $self->set_forced_breakpoint(
16131 $i_equals[$depth] );
16132 $i_equals[$depth] = -1;
16134 } ## end if ( ( $i == $i_line_start...))
16135 } ## end if ( $type eq ':' )
16136 if ( has_postponed_breakpoint($type_sequence) ) {
16137 my $inc = ( $type eq ':' ) ? 0 : 1;
16138 $self->set_forced_breakpoint( $i - $inc );
16140 } ## end if ( $is_closing_sequence_token{$token} )
16142 # set breaks at ?/: if they will get separated (and are
16143 # not a ?/: chain), or if the '?' is at the end of the
16145 elsif ( $token eq '?' ) {
16146 my $i_colon = $mate_index_to_go[$i];
16148 $i_colon <= 0 # the ':' is not in this batch
16149 || $i == 0 # this '?' is the first token of the line
16151 $max_index_to_go # or this '?' is the last token
16155 # don't break at a '?' if preceded by ':' on
16156 # this line of previous ?/: pair on this line.
16157 # This is an attempt to preserve a chain of ?/:
16158 # expressions (elsif2.t). And don't break if
16159 # this has a side comment.
16160 $self->set_forced_breakpoint($i)
16162 $type_sequence == (
16163 $last_colon_sequence_number +
16164 TYPE_SEQUENCE_INCREMENT
16166 || $tokens_to_go[$max_index_to_go] eq '#'
16168 $self->set_closing_breakpoint($i);
16169 } ## end if ( $i_colon <= 0 ||...)
16170 } ## end elsif ( $token eq '?' )
16171 } ## end if ($type_sequence)
16173 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
16175 #------------------------------------------------------------
16176 # Handle Increasing Depth..
16178 # prepare for a new list when depth increases
16179 # token $i is a '(','{', or '['
16180 #------------------------------------------------------------
16181 if ( $depth > $current_depth ) {
16183 $type_sequence_stack[$depth] = $type_sequence;
16184 $override_cab3[$depth] =
16185 $rOpts_comma_arrow_breakpoints == 3
16187 && $self->[_roverride_cab3_]->{$type_sequence};
16188 $breakpoint_stack[$depth] = get_forced_breakpoint_count();
16189 $breakpoint_undo_stack[$depth] =
16190 get_forced_breakpoint_undo_count();
16191 $has_broken_sublist[$depth] = 0;
16192 $identifier_count_stack[$depth] = 0;
16193 $index_before_arrow[$depth] = -1;
16194 $interrupted_list[$depth] = 0;
16195 $item_count_stack[$depth] = 0;
16196 $last_comma_index[$depth] = undef;
16197 $last_dot_index[$depth] = undef;
16198 $last_nonblank_type[$depth] = $last_nonblank_type;
16199 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
16200 $opening_structure_index_stack[$depth] = $i;
16201 $rand_or_list[$depth] = [];
16202 $rfor_semicolon_list[$depth] = [];
16203 $i_equals[$depth] = -1;
16204 $want_comma_break[$depth] = 0;
16205 $container_type[$depth] =
16208 $is_container_label_type{$last_nonblank_type}
16209 ? $last_nonblank_token
16211 $has_old_logical_breakpoints[$depth] = 0;
16213 # if line ends here then signal closing token to break
16214 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
16216 $self->set_closing_breakpoint($i);
16219 # Not all lists of values should be vertically aligned..
16220 $dont_align[$depth] =
16222 # code BLOCKS are handled at a higher level
16223 ( $block_type ne "" )
16225 # certain paren lists
16226 || ( $type eq '(' ) && (
16228 # it does not usually look good to align a list of
16229 # identifiers in a parameter list, as in:
16230 # my($var1, $var2, ...)
16231 # (This test should probably be refined, for now I'm just
16232 # testing for any keyword)
16233 ( $last_nonblank_type eq 'k' )
16235 # a trailing '(' usually indicates a non-list
16236 || ( $next_nonblank_type eq '(' )
16239 # patch to outdent opening brace of long if/for/..
16240 # statements (like this one). See similar coding in
16241 # set_continuation breaks. We have also catch it here for
16242 # short line fragments which otherwise will not go through
16243 # set_continuation_breaks.
16247 # if we have the ')' but not its '(' in this batch..
16248 && ( $last_nonblank_token eq ')' )
16249 && $mate_index_to_go[$i_last_nonblank_token] < 0
16251 # and user wants brace to left
16252 && !$rOpts->{'opening-brace-always-on-right'}
16254 && ( $type eq '{' ) # should be true
16255 && ( $token eq '{' ) # should be true
16258 $self->set_forced_breakpoint( $i - 1 );
16259 } ## end if ( $block_type && ( ...))
16260 } ## end if ( $depth > $current_depth)
16262 #------------------------------------------------------------
16263 # Handle Decreasing Depth..
16265 # finish off any old list when depth decreases
16266 # token $i is a ')','}', or ']'
16267 #------------------------------------------------------------
16268 elsif ( $depth < $current_depth ) {
16270 check_for_new_minimum_depth($depth);
16272 # force all outer logical containers to break after we see on
16274 $has_old_logical_breakpoints[$depth] ||=
16275 $has_old_logical_breakpoints[$current_depth];
16277 # Patch to break between ') {' if the paren list is broken.
16278 # There is similar logic in set_continuation_breaks for
16279 # non-broken lists.
16281 && $next_nonblank_block_type
16282 && $interrupted_list[$current_depth]
16283 && $next_nonblank_type eq '{'
16284 && !$rOpts->{'opening-brace-always-on-right'} )
16286 $self->set_forced_breakpoint($i);
16287 } ## end if ( $token eq ')' && ...
16289 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
16291 # set breaks at commas if necessary
16292 my ( $bp_count, $do_not_break_apart ) =
16293 $self->set_comma_breakpoints($current_depth);
16295 my $i_opening = $opening_structure_index_stack[$current_depth];
16296 my $saw_opening_structure = ( $i_opening >= 0 );
16298 # this term is long if we had to break at interior commas..
16299 my $is_long_term = $bp_count > 0;
16301 # If this is a short container with one or more comma arrows,
16302 # then we will mark it as a long term to open it if requested.
16303 # $rOpts_comma_arrow_breakpoints =
16304 # 0 - open only if comma precedes closing brace
16305 # 1 - stable: except for one line blocks
16306 # 2 - try to form 1 line blocks
16308 # 4 - always open up if vt=0
16309 # 5 - stable: even for one line blocks if vt=0
16311 # PATCH: Modify the -cab flag if we are not processing a list:
16312 # We only want the -cab flag to apply to list containers, so
16313 # for non-lists we use the default and stable -cab=5 value.
16314 # Fixes case b939a.
16315 my $cab_flag = $rOpts_comma_arrow_breakpoints;
16316 if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
16320 if ( !$is_long_term
16321 && $saw_opening_structure
16322 && $is_opening_token{ $tokens_to_go[$i_opening] }
16323 && $index_before_arrow[ $depth + 1 ] > 0
16324 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
16329 || $cab_flag == 0 && $last_nonblank_token eq ','
16330 || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
16331 } ## end if ( !$is_long_term &&...)
16333 # mark term as long if the length between opening and closing
16334 # parens exceeds allowed line length
16335 if ( !$is_long_term && $saw_opening_structure ) {
16337 my $i_opening_minus =
16338 $self->find_token_starting_list($i_opening);
16341 $self->excess_line_length( $i_opening_minus, $i );
16345 && !$ris_list_by_seqno->{$type_sequence}
16346 ? $length_tol + $length_tol_boost
16349 # Patch to avoid blinking with -bbxi=2 and -cab=2
16350 # in which variations in -ci cause unstable formatting
16351 # in edge cases. We just always add one ci level so that
16352 # the formatting is independent of the -BBX results.
16353 # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
16354 # b1161 b1166 b1167 b1168
16355 if ( !$ci_levels_to_go[$i_opening]
16356 && $rbreak_before_container_by_seqno->{$type_sequence} )
16358 $tol += $rOpts->{'continuation-indentation'};
16361 $is_long_term = $excess + $tol > 0;
16363 } ## end if ( !$is_long_term &&...)
16365 # We've set breaks after all comma-arrows. Now we have to
16366 # undo them if this can be a one-line block
16367 # (the only breakpoints set will be due to comma-arrows)
16371 # user doesn't require breaking after all comma-arrows
16372 ( $cab_flag != 0 ) && ( $cab_flag != 4 )
16374 # and if the opening structure is in this batch
16375 && $saw_opening_structure
16377 # and either on the same old line
16379 $old_breakpoint_count_stack[$current_depth] ==
16380 $last_old_breakpoint_count
16382 # or user wants to form long blocks with arrows
16385 # if -cab=3 is overridden then use -cab=2 behavior
16386 || $cab_flag == 3 && $override_cab3[$current_depth]
16389 # and we made breakpoints between the opening and closing
16390 && ( $breakpoint_undo_stack[$current_depth] <
16391 get_forced_breakpoint_undo_count() )
16393 # and this block is short enough to fit on one line
16394 # Note: use < because need 1 more space for possible comma
16399 $self->undo_forced_breakpoint_stack(
16400 $breakpoint_undo_stack[$current_depth] );
16401 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
16403 # now see if we have any comma breakpoints left
16404 my $has_comma_breakpoints =
16405 ( $breakpoint_stack[$current_depth] !=
16406 get_forced_breakpoint_count() );
16408 # update broken-sublist flag of the outer container
16409 $has_broken_sublist[$depth] =
16410 $has_broken_sublist[$depth]
16411 || $has_broken_sublist[$current_depth]
16413 || $has_comma_breakpoints;
16415 # Having come to the closing ')', '}', or ']', now we have to decide if we
16416 # should 'open up' the structure by placing breaks at the opening and
16417 # closing containers. This is a tricky decision. Here are some of the
16418 # basic considerations:
16420 # -If this is a BLOCK container, then any breakpoints will have already
16421 # been set (and according to user preferences), so we need do nothing here.
16423 # -If we have a comma-separated list for which we can align the list items,
16424 # then we need to do so because otherwise the vertical aligner cannot
16425 # currently do the alignment.
16427 # -If this container does itself contain a container which has been broken
16428 # open, then it should be broken open to properly show the structure.
16430 # -If there is nothing to align, and no other reason to break apart,
16431 # then do not do it.
16433 # We will not break open the parens of a long but 'simple' logical expression.
16436 # This is an example of a simple logical expression and its formatting:
16438 # if ( $bigwasteofspace1 && $bigwasteofspace2
16439 # || $bigwasteofspace3 && $bigwasteofspace4 )
16441 # Most people would prefer this than the 'spacey' version:
16444 # $bigwasteofspace1 && $bigwasteofspace2
16445 # || $bigwasteofspace3 && $bigwasteofspace4
16448 # To illustrate the rules for breaking logical expressions, consider:
16452 # and ( exists $ids_excl_uc{$id_uc}
16453 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
16455 # This is on the verge of being difficult to read. The current default is to
16456 # open it up like this:
16461 # and ( exists $ids_excl_uc{$id_uc}
16462 # or grep $id_uc =~ /$_/, @ids_excl_uc )
16465 # This is a compromise which tries to avoid being too dense and to spacey.
16466 # A more spaced version would be:
16472 # exists $ids_excl_uc{$id_uc}
16473 # or grep $id_uc =~ /$_/, @ids_excl_uc
16477 # Some people might prefer the spacey version -- an option could be added. The
16478 # innermost expression contains a long block '( exists $ids_... ')'.
16480 # Here is how the logic goes: We will force a break at the 'or' that the
16481 # innermost expression contains, but we will not break apart its opening and
16482 # closing containers because (1) it contains no multi-line sub-containers itself,
16483 # and (2) there is no alignment to be gained by breaking it open like this
16486 # exists $ids_excl_uc{$id_uc}
16487 # or grep $id_uc =~ /$_/, @ids_excl_uc
16490 # (although this looks perfectly ok and might be good for long expressions). The
16491 # outer 'if' container, though, contains a broken sub-container, so it will be
16492 # broken open to avoid too much density. Also, since it contains no 'or's, there
16493 # will be a forced break at its 'and'.
16495 # set some flags telling something about this container..
16496 my $is_simple_logical_expression = 0;
16497 if ( $item_count_stack[$current_depth] == 0
16498 && $saw_opening_structure
16499 && $tokens_to_go[$i_opening] eq '('
16500 && $is_logical_container{ $container_type[$current_depth] }
16504 # This seems to be a simple logical expression with
16505 # no existing breakpoints. Set a flag to prevent
16507 if ( !$has_comma_breakpoints ) {
16508 $is_simple_logical_expression = 1;
16511 # This seems to be a simple logical expression with
16512 # breakpoints (broken sublists, for example). Break
16513 # at all 'or's and '||'s.
16515 $self->set_logical_breakpoints($current_depth);
16517 } ## end if ( $item_count_stack...)
16520 && @{ $rfor_semicolon_list[$current_depth] } )
16522 $self->set_for_semicolon_breakpoints($current_depth);
16524 # open up a long 'for' or 'foreach' container to allow
16525 # leading term alignment unless -lp is used.
16526 $has_comma_breakpoints = 1
16527 unless $rOpts_line_up_parentheses;
16528 } ## end if ( $is_long_term && ...)
16532 # breaks for code BLOCKS are handled at a higher level
16535 # we do not need to break at the top level of an 'if'
16537 && !$is_simple_logical_expression
16539 ## modification to keep ': (' containers vertically tight;
16540 ## but probably better to let user set -vt=1 to avoid
16541 ## inconsistency with other paren types
16542 ## && ($container_type[$current_depth] ne ':')
16544 # otherwise, we require one of these reasons for breaking:
16547 # - this term has forced line breaks
16548 $has_comma_breakpoints
16550 # - the opening container is separated from this batch
16551 # for some reason (comment, blank line, code block)
16552 # - this is a non-paren container spanning multiple lines
16553 || !$saw_opening_structure
16555 # - this is a long block contained in another breakable
16557 || $is_long_term && !$self->is_in_block_by_i($i_opening)
16562 # For -lp option, we must put a breakpoint before
16563 # the token which has been identified as starting
16564 # this indentation level. This is necessary for
16565 # proper alignment.
16566 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
16568 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
16569 if ( $i_opening + 1 < $max_index_to_go
16570 && $types_to_go[ $i_opening + 1 ] eq 'b' )
16572 $item = $leading_spaces_to_go[ $i_opening + 2 ];
16574 if ( defined($item) ) {
16576 my $K_start_2 = $item->get_starting_index_K();
16577 if ( defined($K_start_2) ) {
16578 $i_start_2 = $K_start_2 - $K_to_go[0];
16581 defined($i_start_2)
16583 # we are breaking after an opening brace, paren,
16584 # so don't break before it too
16585 && $i_start_2 ne $i_opening
16587 && $i_start_2 <= $max_index_to_go
16591 # Only break for breakpoints at the same
16592 # indentation level as the opening paren
16593 my $test1 = $nesting_depth_to_go[$i_opening];
16594 my $test2 = $nesting_depth_to_go[$i_start_2];
16595 if ( $test2 == $test1 ) {
16597 # Back up at a blank (fixes case b932)
16598 my $ibr = $i_start_2 - 1;
16600 && $types_to_go[$ibr] eq 'b' )
16605 $self->set_forced_breakpoint($ibr);
16608 } ## end if ( defined($i_start_2...))
16609 } ## end if ( defined($item) )
16610 } ## end if ( $rOpts_line_up_parentheses...)
16612 # break after opening structure.
16613 # note: break before closing structure will be automatic
16614 if ( $minimum_depth <= $current_depth ) {
16616 $self->set_forced_breakpoint($i_opening)
16617 unless ( $do_not_break_apart
16618 || is_unbreakable_container($current_depth) );
16620 # break at ',' of lower depth level before opening token
16621 if ( $last_comma_index[$depth] ) {
16622 $self->set_forced_breakpoint(
16623 $last_comma_index[$depth] );
16626 # break at '.' of lower depth level before opening token
16627 if ( $last_dot_index[$depth] ) {
16628 $self->set_forced_breakpoint(
16629 $last_dot_index[$depth] );
16632 # break before opening structure if preceded by another
16633 # closing structure and a comma. This is normally
16634 # done by the previous closing brace, but not
16635 # if it was a one-line block.
16636 if ( $i_opening > 2 ) {
16638 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
16643 $types_to_go[$i_prev] eq ','
16644 && ( $types_to_go[ $i_prev - 1 ] eq ')'
16645 || $types_to_go[ $i_prev - 1 ] eq '}' )
16648 $self->set_forced_breakpoint($i_prev);
16651 # also break before something like ':(' or '?('
16654 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
16656 my $token_prev = $tokens_to_go[$i_prev];
16657 if ( $want_break_before{$token_prev} ) {
16658 $self->set_forced_breakpoint($i_prev);
16660 } ## end elsif ( $types_to_go[$i_prev...])
16661 } ## end if ( $i_opening > 2 )
16662 } ## end if ( $minimum_depth <=...)
16664 # break after comma following closing structure
16665 if ( $next_type eq ',' ) {
16666 $self->set_forced_breakpoint( $i + 1 );
16669 # break before an '=' following closing structure
16671 $is_assignment{$next_nonblank_type}
16672 && ( $breakpoint_stack[$current_depth] !=
16673 get_forced_breakpoint_count() )
16676 $self->set_forced_breakpoint($i);
16677 } ## end if ( $is_assignment{$next_nonblank_type...})
16679 # break at any comma before the opening structure Added
16680 # for -lp, but seems to be good in general. It isn't
16681 # obvious how far back to look; the '5' below seems to
16682 # work well and will catch the comma in something like
16683 # push @list, myfunc( $param, $param, ..
16685 my $icomma = $last_comma_index[$depth];
16686 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
16687 unless ( $forced_breakpoint_to_go[$icomma] ) {
16688 $self->set_forced_breakpoint($icomma);
16691 } # end logic to open up a container
16693 # Break open a logical container open if it was already open
16694 elsif ($is_simple_logical_expression
16695 && $has_old_logical_breakpoints[$current_depth] )
16697 $self->set_logical_breakpoints($current_depth);
16700 # Handle long container which does not get opened up
16701 elsif ($is_long_term) {
16703 # must set fake breakpoint to alert outer containers that
16705 set_fake_breakpoint();
16706 } ## end elsif ($is_long_term)
16708 } ## end elsif ( $depth < $current_depth)
16710 #------------------------------------------------------------
16711 # Handle this token
16712 #------------------------------------------------------------
16714 $current_depth = $depth;
16716 # most token types can skip the rest of this loop
16717 next unless ( $quick_filter{$type} );
16719 # handle comma-arrow
16720 if ( $type eq '=>' ) {
16721 next if ( $last_nonblank_type eq '=>' );
16722 next if $rOpts_break_at_old_comma_breakpoints;
16724 if ( $rOpts_comma_arrow_breakpoints == 3
16725 && !$override_cab3[$depth] );
16726 $want_comma_break[$depth] = 1;
16727 $index_before_arrow[$depth] = $i_last_nonblank_token;
16729 } ## end if ( $type eq '=>' )
16731 elsif ( $type eq '.' ) {
16732 $last_dot_index[$depth] = $i;
16735 # Turn off alignment if we are sure that this is not a list
16736 # environment. To be safe, we will do this if we see certain
16737 # non-list tokens, such as ';', and also the environment is
16738 # not a list. Note that '=' could be in any of the = operators
16739 # (lextest.t). We can't just use the reported environment
16740 # because it can be incorrect in some cases.
16741 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
16742 && !$self->is_in_list_by_i($i) )
16744 $dont_align[$depth] = 1;
16745 $want_comma_break[$depth] = 0;
16746 $index_before_arrow[$depth] = -1;
16747 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
16749 # now just handle any commas
16750 next unless ( $type eq ',' );
16752 $last_dot_index[$depth] = undef;
16753 $last_comma_index[$depth] = $i;
16755 # break here if this comma follows a '=>'
16756 # but not if there is a side comment after the comma
16757 if ( $want_comma_break[$depth] ) {
16759 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
16760 if ($rOpts_comma_arrow_breakpoints) {
16761 $want_comma_break[$depth] = 0;
16766 $self->set_forced_breakpoint($i)
16767 unless ( $next_nonblank_type eq '#' );
16769 # break before the previous token if it looks safe
16770 # Example of something that we will not try to break before:
16771 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
16772 # Also we don't want to break at a binary operator (like +):
16776 # $y - $R, -fill => 'black',
16778 my $ibreak = $index_before_arrow[$depth] - 1;
16780 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
16782 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
16783 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
16784 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
16786 # don't break pointer calls, such as the following:
16787 # File::Spec->curdir => 1,
16788 # (This is tokenized as adjacent 'w' tokens)
16789 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
16791 # And don't break before a comma, as in the following:
16792 # ( LONGER_THAN,=> 1,
16793 # EIGHTY_CHARACTERS,=> 2,
16794 # CAUSES_FORMATTING,=> 3,
16797 # This example is for -tso but should be general rule
16798 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
16799 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
16801 $self->set_forced_breakpoint($ibreak);
16803 } ## end if ( $types_to_go[$ibreak...])
16804 } ## end if ( $ibreak > 0 && $tokens_to_go...)
16806 $want_comma_break[$depth] = 0;
16807 $index_before_arrow[$depth] = -1;
16809 # handle list which mixes '=>'s and ','s:
16810 # treat any list items so far as an interrupted list
16811 $interrupted_list[$depth] = 1;
16813 } ## end if ( $want_comma_break...)
16815 # break after all commas above starting depth
16816 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
16817 $self->set_forced_breakpoint($i)
16818 unless ( $next_nonblank_type eq '#' );
16822 # add this comma to the list..
16823 my $item_count = $item_count_stack[$depth];
16824 if ( $item_count == 0 ) {
16826 # but do not form a list with no opening structure
16829 # open INFILE_COPY, ">$input_file_copy"
16830 # or die ("very long message");
16831 if ( ( $opening_structure_index_stack[$depth] < 0 )
16832 && $self->is_in_block_by_i($i) )
16834 $dont_align[$depth] = 1;
16836 } ## end if ( $item_count == 0 )
16838 $comma_index[$depth][$item_count] = $i;
16839 ++$item_count_stack[$depth];
16840 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
16841 $identifier_count_stack[$depth]++;
16843 } ## end while ( ++$i <= $max_index_to_go)
16845 #-------------------------------------------
16846 # end of loop over all tokens in this batch
16847 #-------------------------------------------
16849 # set breaks for any unfinished lists ..
16850 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
16852 $interrupted_list[$dd] = 1;
16853 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
16854 $self->set_comma_breakpoints($dd);
16855 $self->set_logical_breakpoints($dd)
16856 if ( $has_old_logical_breakpoints[$dd] );
16857 $self->set_for_semicolon_breakpoints($dd);
16859 # break open container...
16860 my $i_opening = $opening_structure_index_stack[$dd];
16861 $self->set_forced_breakpoint($i_opening)
16863 is_unbreakable_container($dd)
16865 # Avoid a break which would place an isolated ' or "
16868 && $i_opening >= $max_index_to_go - 2
16869 && ( $token eq "'" || $token eq '"' ) )
16871 } ## end for ( my $dd = $current_depth...)
16873 # Return a flag indicating if the input file had some good breakpoints.
16874 # This flag will be used to force a break in a line shorter than the
16875 # allowed line length.
16876 if ( $has_old_logical_breakpoints[$current_depth] ) {
16877 $saw_good_breakpoint = 1;
16880 # A complex line with one break at an = has a good breakpoint.
16881 # This is not complex ($total_depth_variation=0):
16885 # This is complex ($total_depth_variation=6):
16887 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
16888 elsif ($i_old_assignment_break
16889 && $total_depth_variation > 4
16890 && $old_breakpoint_count == 1 )
16892 $saw_good_breakpoint = 1;
16893 } ## end elsif ( $i_old_assignment_break...)
16895 return $saw_good_breakpoint;
16896 } ## end sub scan_list
16897 } ## end closure scan_list
16903 # Added 'w' to fix b1172
16904 my @q = qw(k w i Z);
16905 @is_kwiZ{@q} = (1) x scalar(@q);
16908 sub find_token_starting_list {
16910 # When testing to see if a block will fit on one line, some
16911 # previous token(s) may also need to be on the line; particularly
16912 # if this is a sub call. So we will look back at least one
16914 my ( $self, $i_opening_paren ) = @_;
16916 # This will be the return index
16917 my $i_opening_minus = $i_opening_paren;
16919 goto RETURN if ( $i_opening_minus <= 0 );
16921 my $im1 = $i_opening_paren - 1;
16922 my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
16923 if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
16925 $type_prev_nb = $types_to_go[$iprev_nb];
16928 if ( $type_prev_nb eq ',' ) {
16930 # a previous comma is a good break point
16931 # $i_opening_minus = $i_opening_paren;
16933 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
16934 $i_opening_minus = $im1;
16936 # Walk back to improve length estimate...
16937 # FIX for cases b1169 b1170 b1171: start walking back
16938 # at the previous nonblank. This makes the result insensitive
16939 # to the flag --space-function-paren, and similar.
16940 # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
16941 for ( my $j = $iprev_nb ; $j >= 0 ; $j-- ) {
16942 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
16943 $i_opening_minus = $j;
16945 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
16948 # Handle non-parens
16949 elsif ( $is_kwiZ{$type_prev_nb} ) { $i_opening_minus = $iprev_nb }
16953 return $i_opening_minus;
16956 { ## begin closure set_comma_breakpoints_do
16958 my %is_keyword_with_special_leading_term;
16962 # These keywords have prototypes which allow a special leading item
16963 # followed by a list
16965 qw(formline grep kill map printf sprintf push chmod join pack unshift);
16966 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
16969 use constant DEBUG_SPARSE => 0;
16971 sub set_comma_breakpoints_do {
16973 # Given a list with some commas, set breakpoints at some of the
16974 # commas, if necessary, to make it easy to read.
16976 my ( $self, $rinput_hash ) = @_;
16978 my $depth = $rinput_hash->{depth};
16979 my $i_opening_paren = $rinput_hash->{i_opening_paren};
16980 my $i_closing_paren = $rinput_hash->{i_closing_paren};
16981 my $item_count = $rinput_hash->{item_count};
16982 my $identifier_count = $rinput_hash->{identifier_count};
16983 my $rcomma_index = $rinput_hash->{rcomma_index};
16984 my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
16985 my $list_type = $rinput_hash->{list_type};
16986 my $interrupted = $rinput_hash->{interrupted};
16987 my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
16988 my $must_break_open = $rinput_hash->{must_break_open};
16989 my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
16991 # nothing to do if no commas seen
16992 return if ( $item_count < 1 );
16994 my $i_first_comma = $rcomma_index->[0];
16995 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
16996 my $i_last_comma = $i_true_last_comma;
16997 if ( $i_last_comma >= $max_index_to_go ) {
16998 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
16999 return if ( $item_count < 1 );
17002 #---------------------------------------------------------------
17003 # find lengths of all items in the list to calculate page layout
17004 #---------------------------------------------------------------
17005 my $comma_count = $item_count;
17011 my @max_length = ( 0, 0 );
17012 my $first_term_length;
17013 my $i = $i_opening_paren;
17016 foreach my $j ( 0 .. $comma_count - 1 ) {
17017 $is_odd = 1 - $is_odd;
17018 $i_prev_plus = $i + 1;
17019 $i = $rcomma_index->[$j];
17022 ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
17024 ( $types_to_go[$i_prev_plus] eq 'b' )
17027 push @i_term_begin, $i_term_begin;
17028 push @i_term_end, $i_term_end;
17029 push @i_term_comma, $i;
17031 # note: currently adding 2 to all lengths (for comma and space)
17033 2 + token_sequence_length( $i_term_begin, $i_term_end );
17034 push @item_lengths, $length;
17037 $first_term_length = $length;
17041 if ( $length > $max_length[$is_odd] ) {
17042 $max_length[$is_odd] = $length;
17047 # now we have to make a distinction between the comma count and item
17048 # count, because the item count will be one greater than the comma
17049 # count if the last item is not terminated with a comma
17051 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
17052 ? $i_last_comma + 1
17055 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
17056 ? $i_closing_paren - 2
17057 : $i_closing_paren - 1;
17058 my $i_effective_last_comma = $i_last_comma;
17060 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
17062 if ( $last_item_length > 0 ) {
17064 # add 2 to length because other lengths include a comma and a blank
17065 $last_item_length += 2;
17066 push @item_lengths, $last_item_length;
17067 push @i_term_begin, $i_b + 1;
17068 push @i_term_end, $i_e;
17069 push @i_term_comma, undef;
17071 my $i_odd = $item_count % 2;
17073 if ( $last_item_length > $max_length[$i_odd] ) {
17074 $max_length[$i_odd] = $last_item_length;
17078 $i_effective_last_comma = $i_e + 1;
17080 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
17081 $identifier_count++;
17085 #---------------------------------------------------------------
17086 # End of length calculations
17087 #---------------------------------------------------------------
17089 #---------------------------------------------------------------
17090 # Compound List Rule 1:
17091 # Break at (almost) every comma for a list containing a broken
17092 # sublist. This has higher priority than the Interrupted List
17094 #---------------------------------------------------------------
17095 if ($has_broken_sublist) {
17097 # Break at every comma except for a comma between two
17098 # simple, small terms. This prevents long vertical
17099 # columns of, say, just 0's.
17100 my $small_length = 10; # 2 + actual maximum length wanted
17102 # We'll insert a break in long runs of small terms to
17103 # allow alignment in uniform tables.
17104 my $skipped_count = 0;
17105 my $columns = table_columns_available($i_first_comma);
17106 my $fields = int( $columns / $small_length );
17107 if ( $rOpts_maximum_fields_per_table
17108 && $fields > $rOpts_maximum_fields_per_table )
17110 $fields = $rOpts_maximum_fields_per_table;
17112 my $max_skipped_count = $fields - 1;
17114 my $is_simple_last_term = 0;
17115 my $is_simple_next_term = 0;
17116 foreach my $j ( 0 .. $item_count ) {
17117 $is_simple_last_term = $is_simple_next_term;
17118 $is_simple_next_term = 0;
17119 if ( $j < $item_count
17120 && $i_term_end[$j] == $i_term_begin[$j]
17121 && $item_lengths[$j] <= $small_length )
17123 $is_simple_next_term = 1;
17126 if ( $is_simple_last_term
17127 && $is_simple_next_term
17128 && $skipped_count < $max_skipped_count )
17133 $skipped_count = 0;
17134 my $i = $i_term_comma[ $j - 1 ];
17135 last unless defined $i;
17136 $self->set_forced_breakpoint($i);
17140 # always break at the last comma if this list is
17141 # interrupted; we wouldn't want to leave a terminal '{', for
17143 if ($interrupted) {
17144 $self->set_forced_breakpoint($i_true_last_comma);
17149 #my ( $a, $b, $c ) = caller();
17150 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
17151 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
17152 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
17154 #---------------------------------------------------------------
17155 # Interrupted List Rule:
17156 # A list is forced to use old breakpoints if it was interrupted
17157 # by side comments or blank lines, or requested by user.
17158 #---------------------------------------------------------------
17159 if ( $rOpts_break_at_old_comma_breakpoints
17161 || $i_opening_paren < 0 )
17163 $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
17167 #---------------------------------------------------------------
17168 # Looks like a list of items. We have to look at it and size it up.
17169 #---------------------------------------------------------------
17171 my $opening_token = $tokens_to_go[$i_opening_paren];
17172 my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
17174 #-------------------------------------------------------------------
17175 # Return if this will fit on one line
17176 #-------------------------------------------------------------------
17178 my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
17180 unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
17183 #-------------------------------------------------------------------
17184 # Now we know that this block spans multiple lines; we have to set
17185 # at least one breakpoint -- real or fake -- as a signal to break
17186 # open any outer containers.
17187 #-------------------------------------------------------------------
17188 set_fake_breakpoint();
17190 # be sure we do not extend beyond the current list length
17191 if ( $i_effective_last_comma >= $max_index_to_go ) {
17192 $i_effective_last_comma = $max_index_to_go - 1;
17195 # Set a flag indicating if we need to break open to keep -lp
17196 # items aligned. This is necessary if any of the list terms
17197 # exceeds the available space after the '('.
17198 my $need_lp_break_open = $must_break_open;
17199 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
17200 my $columns_if_unbroken =
17201 $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
17202 - total_line_length( $i_opening_minus, $i_opening_paren );
17203 $need_lp_break_open =
17204 ( $max_length[0] > $columns_if_unbroken )
17205 || ( $max_length[1] > $columns_if_unbroken )
17206 || ( $first_term_length > $columns_if_unbroken );
17209 # Specify if the list must have an even number of fields or not.
17210 # It is generally safest to assume an even number, because the
17211 # list items might be a hash list. But if we can be sure that
17212 # it is not a hash, then we can allow an odd number for more
17214 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
17216 if ( $identifier_count >= $item_count - 1
17217 || $is_assignment{$next_nonblank_type}
17218 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
17224 # do we have a long first term which should be
17225 # left on a line by itself?
17226 my $use_separate_first_term = (
17227 $odd_or_even == 1 # only if we can use 1 field/line
17228 && $item_count > 3 # need several items
17229 && $first_term_length >
17230 2 * $max_length[0] - 2 # need long first term
17231 && $first_term_length >
17232 2 * $max_length[1] - 2 # need long first term
17235 # or do we know from the type of list that the first term should
17237 if ( !$use_separate_first_term ) {
17238 if ( $is_keyword_with_special_leading_term{$list_type} ) {
17239 $use_separate_first_term = 1;
17241 # should the container be broken open?
17242 if ( $item_count < 3 ) {
17243 if ( $i_first_comma - $i_opening_paren < 4 ) {
17244 ${$rdo_not_break_apart} = 1;
17247 elsif ($first_term_length < 20
17248 && $i_first_comma - $i_opening_paren < 4 )
17250 my $columns = table_columns_available($i_first_comma);
17251 if ( $first_term_length < $columns ) {
17252 ${$rdo_not_break_apart} = 1;
17259 if ($use_separate_first_term) {
17261 # ..set a break and update starting values
17262 $use_separate_first_term = 1;
17263 $self->set_forced_breakpoint($i_first_comma);
17264 $i_opening_paren = $i_first_comma;
17265 $i_first_comma = $rcomma_index->[1];
17267 return if $comma_count == 1;
17268 shift @item_lengths;
17269 shift @i_term_begin;
17271 shift @i_term_comma;
17274 # if not, update the metrics to include the first term
17276 if ( $first_term_length > $max_length[0] ) {
17277 $max_length[0] = $first_term_length;
17281 # Field width parameters
17282 my $pair_width = ( $max_length[0] + $max_length[1] );
17284 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
17286 # Number of free columns across the page width for laying out tables
17287 my $columns = table_columns_available($i_first_comma);
17289 # Estimated maximum number of fields which fit this space
17290 # This will be our first guess
17291 my $number_of_fields_max =
17292 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
17294 my $number_of_fields = $number_of_fields_max;
17296 # Find the best-looking number of fields
17297 # and make this our second guess if possible
17298 my ( $number_of_fields_best, $ri_ragged_break_list,
17299 $new_identifier_count )
17300 = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
17301 \@item_lengths, $max_width );
17303 if ( $number_of_fields_best != 0
17304 && $number_of_fields_best < $number_of_fields_max )
17306 $number_of_fields = $number_of_fields_best;
17309 # ----------------------------------------------------------------------
17310 # If we are crowded and the -lp option is being used, try to
17311 # undo some indentation
17312 # ----------------------------------------------------------------------
17314 $rOpts_line_up_parentheses
17316 $number_of_fields == 0
17317 || ( $number_of_fields == 1
17318 && $number_of_fields != $number_of_fields_best )
17322 my $available_spaces =
17323 $self->get_available_spaces_to_go($i_first_comma);
17324 if ( $available_spaces > 0 ) {
17326 my $spaces_wanted = $max_width - $columns; # for 1 field
17328 if ( $number_of_fields_best == 0 ) {
17329 $number_of_fields_best =
17330 get_maximum_fields_wanted( \@item_lengths );
17333 if ( $number_of_fields_best != 1 ) {
17334 my $spaces_wanted_2 =
17335 1 + $pair_width - $columns; # for 2 fields
17336 if ( $available_spaces > $spaces_wanted_2 ) {
17337 $spaces_wanted = $spaces_wanted_2;
17341 if ( $spaces_wanted > 0 ) {
17342 my $deleted_spaces =
17343 $self->reduce_lp_indentation( $i_first_comma,
17347 if ( $deleted_spaces > 0 ) {
17348 $columns = table_columns_available($i_first_comma);
17349 $number_of_fields_max =
17350 maximum_number_of_fields( $columns, $odd_or_even,
17351 $max_width, $pair_width );
17352 $number_of_fields = $number_of_fields_max;
17354 if ( $number_of_fields_best == 1
17355 && $number_of_fields >= 1 )
17357 $number_of_fields = $number_of_fields_best;
17364 # try for one column if two won't work
17365 if ( $number_of_fields <= 0 ) {
17366 $number_of_fields = int( $columns / $max_width );
17369 # The user can place an upper bound on the number of fields,
17370 # which can be useful for doing maintenance on tables
17371 if ( $rOpts_maximum_fields_per_table
17372 && $number_of_fields > $rOpts_maximum_fields_per_table )
17374 $number_of_fields = $rOpts_maximum_fields_per_table;
17377 # How many columns (characters) and lines would this container take
17378 # if no additional whitespace were added?
17379 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
17380 $i_effective_last_comma + 1 );
17381 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
17382 my $packed_lines = 1 + int( $packed_columns / $columns );
17384 # are we an item contained in an outer list?
17385 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
17387 if ( $number_of_fields <= 0 ) {
17389 # #---------------------------------------------------------------
17390 # # We're in trouble. We can't find a single field width that works.
17391 # # There is no simple answer here; we may have a single long list
17393 # #---------------------------------------------------------------
17395 # In many cases, it may be best to not force a break if there is just one
17396 # comma, because the standard continuation break logic will do a better
17399 # In the common case that all but one of the terms can fit
17400 # on a single line, it may look better not to break open the
17401 # containing parens. Consider, for example
17405 # sort { $color_value{$::a} <=> $color_value{$::b}; }
17408 # which will look like this with the container broken:
17412 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
17415 # Here is an example of this rule for a long last term:
17417 # log_message( 0, 256, 128,
17418 # "Number of routes in adj-RIB-in to be considered: $peercount" );
17420 # And here is an example with a long first term:
17423 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
17424 # $r, $pu, $ps, $cu, $cs, $tt
17426 # if $style eq 'all';
17428 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
17429 my $long_last_term =
17430 $self->excess_line_length( 0, $i_last_comma ) <= 0;
17431 my $long_first_term =
17432 $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
17435 # break at every comma ...
17438 # if requested by user or is best looking
17439 $number_of_fields_best == 1
17441 # or if this is a sublist of a larger list
17442 || $in_hierarchical_list
17444 # or if multiple commas and we don't have a long first or last
17446 || ( $comma_count > 1
17447 && !( $long_last_term || $long_first_term ) )
17450 foreach ( 0 .. $comma_count - 1 ) {
17451 $self->set_forced_breakpoint( $rcomma_index->[$_] );
17454 elsif ($long_last_term) {
17456 $self->set_forced_breakpoint($i_last_comma);
17457 ${$rdo_not_break_apart} = 1 unless $must_break_open;
17459 elsif ($long_first_term) {
17461 $self->set_forced_breakpoint($i_first_comma);
17465 # let breaks be defined by default bond strength logic
17470 # --------------------------------------------------------
17471 # We have a tentative field count that seems to work.
17472 # How many lines will this require?
17473 # --------------------------------------------------------
17474 my $formatted_lines = $item_count / ($number_of_fields);
17475 if ( $formatted_lines != int $formatted_lines ) {
17476 $formatted_lines = 1 + int $formatted_lines;
17479 # So far we've been trying to fill out to the right margin. But
17480 # compact tables are easier to read, so let's see if we can use fewer
17481 # fields without increasing the number of lines.
17482 $number_of_fields =
17483 compactify_table( $item_count, $number_of_fields, $formatted_lines,
17486 # How many spaces across the page will we fill?
17487 my $columns_per_line =
17488 ( int $number_of_fields / 2 ) * $pair_width +
17489 ( $number_of_fields % 2 ) * $max_width;
17491 my $formatted_columns;
17493 if ( $number_of_fields > 1 ) {
17494 $formatted_columns =
17495 ( $pair_width * ( int( $item_count / 2 ) ) +
17496 ( $item_count % 2 ) * $max_width );
17499 $formatted_columns = $max_width * $item_count;
17501 if ( $formatted_columns < $packed_columns ) {
17502 $formatted_columns = $packed_columns;
17505 my $unused_columns = $formatted_columns - $packed_columns;
17507 # set some empirical parameters to help decide if we should try to
17508 # align; high sparsity does not look good, especially with few lines
17509 my $sparsity = ($unused_columns) / ($formatted_columns);
17510 my $max_allowed_sparsity =
17511 ( $item_count < 3 ) ? 0.1
17512 : ( $packed_lines == 1 ) ? 0.15
17513 : ( $packed_lines == 2 ) ? 0.4
17516 # Begin check for shortcut methods, which avoid treating a list
17517 # as a table for relatively small parenthesized lists. These
17518 # are usually easier to read if not formatted as tables.
17520 $packed_lines <= 2 # probably can fit in 2 lines
17521 && $item_count < 9 # doesn't have too many items
17522 && $opening_is_in_block # not a sub-container
17523 && $opening_token eq '(' # is paren list
17527 # Shortcut method 1: for -lp and just one comma:
17528 # This is a no-brainer, just break at the comma.
17530 $rOpts_line_up_parentheses # -lp
17531 && $item_count == 2 # two items, one comma
17532 && !$must_break_open
17535 my $i_break = $rcomma_index->[0];
17536 $self->set_forced_breakpoint($i_break);
17537 ${$rdo_not_break_apart} = 1;
17542 # method 2 is for most small ragged lists which might look
17543 # best if not displayed as a table.
17545 ( $number_of_fields == 2 && $item_count == 3 )
17547 $new_identifier_count > 0 # isn't all quotes
17548 && $sparsity > 0.15
17549 ) # would be fairly spaced gaps if aligned
17553 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
17554 $ri_ragged_break_list );
17555 ++$break_count if ($use_separate_first_term);
17557 # NOTE: we should really use the true break count here,
17558 # which can be greater if there are large terms and
17559 # little space, but usually this will work well enough.
17560 unless ($must_break_open) {
17562 if ( $break_count <= 1 ) {
17563 ${$rdo_not_break_apart} = 1;
17565 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
17567 ${$rdo_not_break_apart} = 1;
17573 } # end shortcut methods
17576 DEBUG_SPARSE && do {
17578 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
17582 #---------------------------------------------------------------
17583 # Compound List Rule 2:
17584 # If this list is too long for one line, and it is an item of a
17585 # larger list, then we must format it, regardless of sparsity
17586 # (ian.t). One reason that we have to do this is to trigger
17587 # Compound List Rule 1, above, which causes breaks at all commas of
17588 # all outer lists. In this way, the structure will be properly
17590 #---------------------------------------------------------------
17592 # Decide if this list is too long for one line unless broken
17593 my $total_columns = table_columns_available($i_opening_paren);
17594 my $too_long = $packed_columns > $total_columns;
17596 # For a paren list, include the length of the token just before the
17597 # '(' because this is likely a sub call, and we would have to
17598 # include the sub name on the same line as the list. This is still
17599 # imprecise, but not too bad. (steve.t)
17600 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
17602 $too_long = $self->excess_line_length( $i_opening_minus,
17603 $i_effective_last_comma + 1 ) > 0;
17606 # FIXME: For an item after a '=>', try to include the length of the
17607 # thing before the '=>'. This is crude and should be improved by
17608 # actually looking back token by token.
17609 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
17610 my $i_opening_minus = $i_opening_paren - 4;
17611 if ( $i_opening_minus >= 0 ) {
17612 $too_long = $self->excess_line_length( $i_opening_minus,
17613 $i_effective_last_comma + 1 ) > 0;
17617 # Always break lists contained in '[' and '{' if too long for 1 line,
17618 # and always break lists which are too long and part of a more complex
17620 my $must_break_open_container = $must_break_open
17622 && ( $in_hierarchical_list || $opening_token ne '(' ) );
17624 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
17626 #---------------------------------------------------------------
17627 # The main decision:
17628 # Now decide if we will align the data into aligned columns. Do not
17629 # attempt to align columns if this is a tiny table or it would be
17630 # too spaced. It seems that the more packed lines we have, the
17631 # sparser the list that can be allowed and still look ok.
17632 #---------------------------------------------------------------
17634 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
17635 || ( $formatted_lines < 2 )
17636 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
17640 #---------------------------------------------------------------
17641 # too sparse: would look ugly if aligned in a table;
17642 #---------------------------------------------------------------
17644 # use old breakpoints if this is a 'big' list
17645 if ( $packed_lines > 2 && $item_count > 10 ) {
17646 write_logfile_entry("List sparse: using old breakpoints\n");
17647 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
17650 # let the continuation logic handle it if 2 lines
17653 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
17654 $ri_ragged_break_list );
17655 ++$break_count if ($use_separate_first_term);
17657 unless ($must_break_open_container) {
17658 if ( $break_count <= 1 ) {
17659 ${$rdo_not_break_apart} = 1;
17661 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
17663 ${$rdo_not_break_apart} = 1;
17670 #---------------------------------------------------------------
17671 # go ahead and format as a table
17672 #---------------------------------------------------------------
17673 write_logfile_entry(
17674 "List: auto formatting with $number_of_fields fields/row\n");
17676 my $j_first_break =
17677 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
17680 my $j = $j_first_break ;
17681 $j < $comma_count ;
17682 $j += $number_of_fields
17685 my $i = $rcomma_index->[$j];
17686 $self->set_forced_breakpoint($i);
17690 } ## end closure set_comma_breakpoints_do
17692 sub study_list_complexity {
17694 # Look for complex tables which should be formatted with one term per line.
17695 # Returns the following:
17697 # \@i_ragged_break_list = list of good breakpoints to avoid lines
17698 # which are hard to read
17699 # $number_of_fields_best = suggested number of fields based on
17700 # complexity; = 0 if any number may be used.
17702 my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
17703 my $item_count = @{$ri_term_begin};
17704 my $complex_item_count = 0;
17705 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
17706 my $i_max = @{$ritem_lengths} - 1;
17707 ##my @item_complexity;
17709 my $i_last_last_break = -3;
17710 my $i_last_break = -2;
17711 my @i_ragged_break_list;
17713 my $definitely_complex = 30;
17714 my $definitely_simple = 12;
17715 my $quote_count = 0;
17717 for my $i ( 0 .. $i_max ) {
17718 my $ib = $ri_term_begin->[$i];
17719 my $ie = $ri_term_end->[$i];
17721 # define complexity: start with the actual term length
17722 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
17724 ##TBD: join types here and check for variations
17725 ##my $str=join "", @tokens_to_go[$ib..$ie];
17728 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
17732 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
17736 if ( $ib eq $ie ) {
17737 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
17738 $complex_item_count++;
17739 $weighted_length *= 2;
17745 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
17746 $complex_item_count++;
17747 $weighted_length *= 2;
17749 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
17750 $weighted_length += 4;
17754 # add weight for extra tokens.
17755 $weighted_length += 2 * ( $ie - $ib );
17757 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
17758 ## print "# COMPLEXITY:$weighted_length $BUB\n";
17760 ##push @item_complexity, $weighted_length;
17762 # now mark a ragged break after this item it if it is 'long and
17764 if ( $weighted_length >= $definitely_complex ) {
17766 # if we broke after the previous term
17767 # then break before it too
17768 if ( $i_last_break == $i - 1
17770 && $i_last_last_break != $i - 2 )
17773 ## FIXME: don't strand a small term
17774 pop @i_ragged_break_list;
17775 push @i_ragged_break_list, $i - 2;
17776 push @i_ragged_break_list, $i - 1;
17779 push @i_ragged_break_list, $i;
17780 $i_last_last_break = $i_last_break;
17781 $i_last_break = $i;
17784 # don't break before a small last term -- it will
17785 # not look good on a line by itself.
17786 elsif ($i == $i_max
17787 && $i_last_break == $i - 1
17788 && $weighted_length <= $definitely_simple )
17790 pop @i_ragged_break_list;
17794 my $identifier_count = $i_max + 1 - $quote_count;
17796 # Need more tuning here..
17797 if ( $max_width > 12
17798 && $complex_item_count > $item_count / 2
17799 && $number_of_fields_best != 2 )
17801 $number_of_fields_best = 1;
17804 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
17807 sub get_maximum_fields_wanted {
17809 # Not all tables look good with more than one field of items.
17810 # This routine looks at a table and decides if it should be
17811 # formatted with just one field or not.
17812 # This coding is still under development.
17813 my ($ritem_lengths) = @_;
17815 my $number_of_fields_best = 0;
17817 # For just a few items, we tentatively assume just 1 field.
17818 my $item_count = @{$ritem_lengths};
17819 if ( $item_count <= 5 ) {
17820 $number_of_fields_best = 1;
17823 # For larger tables, look at it both ways and see what looks best
17827 my @max_length = ( 0, 0 );
17828 my @last_length_2 = ( undef, undef );
17829 my @first_length_2 = ( undef, undef );
17830 my $last_length = undef;
17831 my $total_variation_1 = 0;
17832 my $total_variation_2 = 0;
17833 my @total_variation_2 = ( 0, 0 );
17835 foreach my $j ( 0 .. $item_count - 1 ) {
17837 $is_odd = 1 - $is_odd;
17838 my $length = $ritem_lengths->[$j];
17839 if ( $length > $max_length[$is_odd] ) {
17840 $max_length[$is_odd] = $length;
17843 if ( defined($last_length) ) {
17844 my $dl = abs( $length - $last_length );
17845 $total_variation_1 += $dl;
17847 $last_length = $length;
17849 my $ll = $last_length_2[$is_odd];
17850 if ( defined($ll) ) {
17851 my $dl = abs( $length - $ll );
17852 $total_variation_2[$is_odd] += $dl;
17855 $first_length_2[$is_odd] = $length;
17857 $last_length_2[$is_odd] = $length;
17859 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
17861 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
17862 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
17863 $number_of_fields_best = 1;
17866 return ($number_of_fields_best);
17869 sub table_columns_available {
17870 my $i_first_comma = shift;
17872 $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
17873 leading_spaces_to_go($i_first_comma);
17875 # Patch: the vertical formatter does not line up lines whose lengths
17876 # exactly equal the available line length because of allowances
17877 # that must be made for side comments. Therefore, the number of
17878 # available columns is reduced by 1 character.
17883 sub maximum_number_of_fields {
17885 # how many fields will fit in the available space?
17886 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
17887 my $max_pairs = int( $columns / $pair_width );
17888 my $number_of_fields = $max_pairs * 2;
17889 if ( $odd_or_even == 1
17890 && $max_pairs * $pair_width + $max_width <= $columns )
17892 $number_of_fields++;
17894 return $number_of_fields;
17897 sub compactify_table {
17899 # given a table with a certain number of fields and a certain number
17900 # of lines, see if reducing the number of fields will make it look
17902 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
17903 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
17907 $min_fields = $number_of_fields ;
17908 $min_fields >= $odd_or_even
17909 && $min_fields * $formatted_lines >= $item_count ;
17910 $min_fields -= $odd_or_even
17913 $number_of_fields = $min_fields;
17916 return $number_of_fields;
17919 sub set_ragged_breakpoints {
17921 # Set breakpoints in a list that cannot be formatted nicely as a
17923 my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
17925 my $break_count = 0;
17926 foreach ( @{$ri_ragged_break_list} ) {
17927 my $j = $ri_term_comma->[$_];
17929 $self->set_forced_breakpoint($j);
17933 return $break_count;
17936 sub copy_old_breakpoints {
17937 my ( $self, $i_first_comma, $i_last_comma ) = @_;
17938 for my $i ( $i_first_comma .. $i_last_comma ) {
17939 if ( $old_breakpoint_to_go[$i] ) {
17940 $self->set_forced_breakpoint($i);
17947 my ( $self, $i, $j ) = @_;
17948 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
17951 my ( $a, $b, $c ) = caller();
17952 my $forced_breakpoint_count = get_forced_breakpoint_count();
17954 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
17957 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
17960 # shouldn't happen; non-critical error
17963 my ( $a, $b, $c ) = caller();
17965 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
17971 ###############################################
17972 # CODE SECTION 12: Code for setting indentation
17973 ###############################################
17975 sub token_sequence_length {
17977 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
17978 # returns 0 if $ibeg > $iend (shouldn't happen)
17979 my ( $ibeg, $iend ) = @_;
17980 return 0 if ( !defined($iend) || $iend < 0 || $ibeg > $iend );
17981 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
17982 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
17985 sub total_line_length {
17987 # return length of a line of tokens ($ibeg .. $iend)
17988 my ( $ibeg, $iend ) = @_;
17991 #return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
17993 # this is basically sub 'leading_spaces_to_go':
17994 my $indentation = $leading_spaces_to_go[$ibeg];
17995 if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
17997 return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
17998 $summed_lengths_to_go[$ibeg];
18001 sub excess_line_length {
18003 # return number of characters by which a line of tokens ($ibeg..$iend)
18004 # exceeds the allowable line length.
18006 # NOTE: Profiling shows that this is a critical routine for efficiency.
18007 # Therefore I have eliminated additional calls to subs from it.
18008 my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
18010 # Original expression for line length
18011 ##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
18013 # This is basically sub 'leading_spaces_to_go':
18014 my $indentation = $leading_spaces_to_go[$ibeg];
18015 if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
18019 $summed_lengths_to_go[ $iend + 1 ] -
18020 $summed_lengths_to_go[$ibeg];
18022 # Include right weld lengths unless requested not to.
18023 if ( $total_weld_count
18024 && !$ignore_right_weld
18025 && $type_sequence_to_go[$iend] )
18027 my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
18028 $length += $wr if defined($wr);
18031 # return the excess
18032 return $length - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
18037 # return the number of leading spaces associated with an indentation
18038 # variable $indentation is either a constant number of spaces or an object
18039 # with a get_spaces method.
18040 my $indentation = shift;
18041 return ref($indentation) ? $indentation->get_spaces() : $indentation;
18044 sub get_recoverable_spaces {
18046 # return the number of spaces (+ means shift right, - means shift left)
18047 # that we would like to shift a group of lines with the same indentation
18048 # to get them to line up with their opening parens
18049 my $indentation = shift;
18050 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
18053 sub get_available_spaces_to_go {
18055 my ( $self, $ii ) = @_;
18056 my $item = $leading_spaces_to_go[$ii];
18058 # return the number of available leading spaces associated with an
18059 # indentation variable. $indentation is either a constant number of
18060 # spaces or an object with a get_available_spaces method.
18061 return ref($item) ? $item->get_available_spaces() : 0;
18064 { ## begin closure set_leading_whitespace (for -lp indentation)
18066 # These routines are called batch-by-batch to handle the -lp indentation
18067 # option. The coding is rather complex, but is only for -lp.
18069 my $gnu_position_predictor;
18070 my $gnu_sequence_number;
18071 my $line_start_index_to_go;
18072 my $max_gnu_item_index;
18073 my $max_gnu_stack_index;
18074 my %gnu_arrow_count;
18075 my %gnu_comma_count;
18076 my %last_gnu_equals;
18080 sub initialize_gnu_vars {
18082 # initialize gnu variables for a new file;
18083 # must be called once at the start of a new file.
18085 # initialize the leading whitespace stack to negative levels
18086 # so that we can never run off the end of the stack
18087 $gnu_position_predictor =
18088 0; # where the current token is predicted to be
18089 $max_gnu_stack_index = 0;
18090 $max_gnu_item_index = -1;
18091 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
18092 @gnu_item_list = ();
18096 sub initialize_gnu_batch_vars {
18098 # initialize gnu variables for a new batch;
18099 # must be called before each new batch
18100 $gnu_sequence_number++; # increment output batch counter
18101 %last_gnu_equals = ();
18102 %gnu_comma_count = ();
18103 %gnu_arrow_count = ();
18104 $line_start_index_to_go = 0;
18105 $max_gnu_item_index = UNDEFINED_INDEX;
18109 sub new_lp_indentation_item {
18111 # this is an interface to the IndentationItem class
18112 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
18114 # A negative level implies not to store the item in the item_list
18116 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
18118 my $starting_index_K = 0;
18119 if ( defined($line_start_index_to_go)
18120 && $line_start_index_to_go >= 0
18121 && $line_start_index_to_go <= $max_index_to_go )
18123 $starting_index_K = $K_to_go[$line_start_index_to_go];
18126 my $item = Perl::Tidy::IndentationItem->new(
18129 ci_level => $ci_level,
18130 available_spaces => $available_spaces,
18132 gnu_sequence_number => $gnu_sequence_number,
18133 align_paren => $align_paren,
18134 stack_depth => $max_gnu_stack_index,
18135 starting_index_K => $starting_index_K,
18138 if ( $level >= 0 ) {
18139 $gnu_item_list[$max_gnu_item_index] = $item;
18145 sub set_leading_whitespace {
18147 # This routine defines leading whitespace for the case of -lp formatting
18148 # given: the level and continuation_level of a token,
18149 # define: space count of leading string which would apply if it
18150 # were the first token of a new line.
18152 my ( $self, $Kj, $K_last_nonblank, $K_last_last_nonblank,
18153 $level_abs, $ci_level, $in_continued_quote )
18156 return unless ($rOpts_line_up_parentheses);
18157 return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
18159 my $rbreak_container = $self->[_rbreak_container_];
18160 my $rshort_nested = $self->[_rshort_nested_];
18161 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
18162 my $rLL = $self->[_rLL_];
18163 my $rbreak_before_container_by_seqno =
18164 $self->[_rbreak_before_container_by_seqno_];
18166 # find needed previous nonblank tokens
18167 my $last_nonblank_token = '';
18168 my $last_nonblank_type = '';
18169 my $last_nonblank_block_type = '';
18171 # and previous nonblank tokens, just in this batch:
18172 my $last_nonblank_token_in_batch = '';
18173 my $last_nonblank_type_in_batch = '';
18174 my $last_last_nonblank_type_in_batch = '';
18176 if ( defined($K_last_nonblank) ) {
18177 $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
18178 $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
18179 $last_nonblank_block_type =
18180 $rLL->[$K_last_nonblank]->[_BLOCK_TYPE_];
18182 if ( $K_last_nonblank >= $K_to_go[0] ) {
18183 $last_nonblank_token_in_batch = $last_nonblank_token;
18184 $last_nonblank_type_in_batch = $last_nonblank_type;
18185 if ( defined($K_last_last_nonblank)
18186 && $K_last_last_nonblank > $K_to_go[0] )
18188 $last_last_nonblank_type_in_batch =
18189 $rLL->[$K_last_last_nonblank]->[_TYPE_];
18194 ################################################################
18196 # Adjust levels if necessary to recycle whitespace:
18197 my $level = $level_abs;
18198 my $radjusted_levels = $self->[_radjusted_levels_];
18200 my $nws = @{$radjusted_levels};
18201 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
18202 $level = $radjusted_levels->[$Kj];
18203 if ( $level < 0 ) { $level = 0 } # note: this should not happen
18206 # The continued_quote flag means that this is the first token of a
18207 # line, and it is the continuation of some kind of multi-line quote
18208 # or pattern. It requires special treatment because it must have no
18209 # added leading whitespace. So we create a special indentation item
18210 # which is not in the stack.
18211 if ($in_continued_quote) {
18212 my $space_count = 0;
18213 my $available_space = 0;
18214 $level = -1; # flag to prevent storing in item_list
18215 $leading_spaces_to_go[$max_index_to_go] =
18216 $reduced_spaces_to_go[$max_index_to_go] =
18217 new_lp_indentation_item( $space_count, $level, $ci_level,
18218 $available_space, 0 );
18222 # get the top state from the stack
18223 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
18224 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
18225 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
18227 my $type = $types_to_go[$max_index_to_go];
18228 my $token = $tokens_to_go[$max_index_to_go];
18229 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
18231 if ( $type eq '{' || $type eq '(' ) {
18233 $gnu_comma_count{ $total_depth + 1 } = 0;
18234 $gnu_arrow_count{ $total_depth + 1 } = 0;
18236 # If we come to an opening token after an '=' token of some type,
18237 # see if it would be helpful to 'break' after the '=' to save space
18238 my $last_equals = $last_gnu_equals{$total_depth};
18239 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
18241 my $seqno = $type_sequence_to_go[$max_index_to_go];
18243 # find the position if we break at the '='
18244 my $i_test = $last_equals;
18245 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
18248 ##my $too_close = ($i_test==$max_index_to_go-1);
18250 my $test_position =
18251 total_line_length( $i_test, $max_index_to_go );
18253 $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
18255 my $bbc_flag = $break_before_container_types{$token};
18259 # the equals is not just before an open paren (testing)
18262 # if we are beyond the midpoint
18263 $gnu_position_predictor >
18264 $mll - $rOpts_maximum_line_length / 2
18266 # if a -bbx flag WANTS a break before this opening token
18267 || ( $seqno && $rbreak_before_container_by_seqno->{$seqno} )
18269 # or if we MIGHT want a break (fixes case b826 b909 b989)
18270 || ( $bbc_flag && $bbc_flag >= 2 )
18272 # or we are beyond the 1/4 point and there was an old
18273 # break at an assignment (not '=>') [fix for b1035]
18275 $gnu_position_predictor >
18276 $mll - $rOpts_maximum_line_length * 3 / 4
18277 && $types_to_go[$last_equals] ne '=>'
18279 $old_breakpoint_to_go[$last_equals]
18280 || ( $last_equals > 0
18281 && $old_breakpoint_to_go[ $last_equals - 1 ] )
18282 || ( $last_equals > 1
18283 && $types_to_go[ $last_equals - 1 ] eq 'b'
18284 && $old_breakpoint_to_go[ $last_equals - 2 ] )
18290 # then make the switch -- note that we do not set a real
18291 # breakpoint here because we may not really need one; sub
18292 # scan_list will do that if necessary
18293 $line_start_index_to_go = $i_test + 1;
18294 $gnu_position_predictor = $test_position;
18300 $maximum_line_length_at_level[$level] -
18301 $rOpts_maximum_line_length / 2;
18303 # Check for decreasing depth ..
18304 # Note that one token may have both decreasing and then increasing
18305 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
18306 # in this example we would first go back to (1,0) then up to (2,0)
18307 # in a single call.
18308 if ( $level < $current_level || $ci_level < $current_ci_level ) {
18310 # loop to find the first entry at or completely below this level
18311 my ( $lev, $ci_lev );
18313 if ($max_gnu_stack_index) {
18315 # save index of token which closes this level
18316 $gnu_stack[$max_gnu_stack_index]
18317 ->set_closed($max_index_to_go);
18319 # Undo any extra indentation if we saw no commas
18320 my $available_spaces =
18321 $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
18323 my $comma_count = 0;
18324 my $arrow_count = 0;
18325 if ( $type eq '}' || $type eq ')' ) {
18326 $comma_count = $gnu_comma_count{$total_depth};
18327 $arrow_count = $gnu_arrow_count{$total_depth};
18328 $comma_count = 0 unless $comma_count;
18329 $arrow_count = 0 unless $arrow_count;
18331 $gnu_stack[$max_gnu_stack_index]
18332 ->set_comma_count($comma_count);
18333 $gnu_stack[$max_gnu_stack_index]
18334 ->set_arrow_count($arrow_count);
18336 if ( $available_spaces > 0 ) {
18338 if ( $comma_count <= 0 || $arrow_count > 0 ) {
18341 $gnu_stack[$max_gnu_stack_index]->get_index();
18343 $gnu_stack[$max_gnu_stack_index]
18344 ->get_sequence_number();
18346 # Be sure this item was created in this batch. This
18347 # should be true because we delete any available
18348 # space from open items at the end of each batch.
18349 if ( $gnu_sequence_number != $seqno
18350 || $i > $max_gnu_item_index )
18353 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
18355 report_definite_bug();
18359 if ( $arrow_count == 0 ) {
18361 ->permanently_decrease_available_spaces(
18362 $available_spaces);
18366 ->tentatively_decrease_available_spaces(
18367 $available_spaces);
18369 foreach my $j ( $i + 1 .. $max_gnu_item_index )
18372 ->decrease_SPACES($available_spaces);
18378 # go down one level
18379 --$max_gnu_stack_index;
18380 $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
18381 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
18383 # stop when we reach a level at or below the current level
18384 if ( $lev <= $level && $ci_lev <= $ci_level ) {
18386 $gnu_stack[$max_gnu_stack_index]->get_spaces();
18387 $current_level = $lev;
18388 $current_ci_level = $ci_lev;
18393 # reached bottom of stack .. should never happen because
18394 # only negative levels can get here, and $level was forced
18395 # to be positive above.
18398 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
18400 report_definite_bug();
18406 # handle increasing depth
18407 if ( $level > $current_level || $ci_level > $current_ci_level ) {
18409 # Compute the standard incremental whitespace. This will be
18410 # the minimum incremental whitespace that will be used. This
18411 # choice results in a smooth transition between the gnu-style
18412 # and the standard style.
18413 my $standard_increment =
18414 ( $level - $current_level ) *
18415 $rOpts_indent_columns +
18416 ( $ci_level - $current_ci_level ) *
18417 $rOpts_continuation_indentation;
18419 # Now we have to define how much extra incremental space
18420 # ("$available_space") we want. This extra space will be
18421 # reduced as necessary when long lines are encountered or when
18422 # it becomes clear that we do not have a good list.
18423 my $available_space = 0;
18424 my $align_paren = 0;
18427 my $last_nonblank_seqno;
18428 if ( defined($K_last_nonblank) ) {
18429 $last_nonblank_seqno =
18430 $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
18433 # initialization on empty stack..
18434 if ( $max_gnu_stack_index == 0 ) {
18435 $space_count = $level * $rOpts_indent_columns;
18438 # if this is a BLOCK, add the standard increment
18439 elsif ($last_nonblank_block_type) {
18440 $space_count += $standard_increment;
18443 # add the standard increment for containers excluded by user rules
18444 # or which contain here-docs or multiline qw text
18445 elsif ( defined($last_nonblank_seqno)
18446 && $ris_excluded_lp_container->{$last_nonblank_seqno} )
18448 $space_count += $standard_increment;
18451 # if last nonblank token was not structural indentation,
18452 # just use standard increment
18453 elsif ( $last_nonblank_type ne '{' ) {
18454 $space_count += $standard_increment;
18457 # otherwise use the space to the first non-blank level change token
18460 $space_count = $gnu_position_predictor;
18462 my $min_gnu_indentation =
18463 $gnu_stack[$max_gnu_stack_index]->get_spaces();
18465 $available_space = $space_count - $min_gnu_indentation;
18466 if ( $available_space >= $standard_increment ) {
18467 $min_gnu_indentation += $standard_increment;
18469 elsif ( $available_space > 1 ) {
18470 $min_gnu_indentation += $available_space + 1;
18472 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
18473 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
18474 $min_gnu_indentation += 2;
18477 $min_gnu_indentation += 1;
18481 $min_gnu_indentation += $standard_increment;
18483 $available_space = $space_count - $min_gnu_indentation;
18485 if ( $available_space < 0 ) {
18486 $space_count = $min_gnu_indentation;
18487 $available_space = 0;
18492 # update state, but not on a blank token
18493 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
18495 $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
18497 ++$max_gnu_stack_index;
18498 $gnu_stack[$max_gnu_stack_index] =
18499 new_lp_indentation_item( $space_count, $level, $ci_level,
18500 $available_space, $align_paren );
18502 # If the opening paren is beyond the half-line length, then
18503 # we will use the minimum (standard) indentation. This will
18504 # help avoid problems associated with running out of space
18505 # near the end of a line. As a result, in deeply nested
18506 # lists, there will be some indentations which are limited
18507 # to this minimum standard indentation. But the most deeply
18508 # nested container will still probably be able to shift its
18509 # parameters to the right for proper alignment, so in most
18510 # cases this will not be noticeable.
18511 if ( $available_space > 0 && $space_count > $halfway ) {
18512 $gnu_stack[$max_gnu_stack_index]
18513 ->tentatively_decrease_available_spaces($available_space);
18518 # Count commas and look for non-list characters. Once we see a
18519 # non-list character, we give up and don't look for any more commas.
18520 if ( $type eq '=>' ) {
18521 $gnu_arrow_count{$total_depth}++;
18523 # remember '=>' like '=' for estimating breaks (but see above note
18525 $last_gnu_equals{$total_depth} = $max_index_to_go;
18528 elsif ( $type eq ',' ) {
18529 $gnu_comma_count{$total_depth}++;
18532 elsif ( $is_assignment{$type} ) {
18533 $last_gnu_equals{$total_depth} = $max_index_to_go;
18536 # this token might start a new line
18537 # if this is a non-blank..
18538 if ( $type ne 'b' ) {
18543 # this is the first nonblank token of the line
18544 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
18546 # or previous character was one of these:
18547 || $last_nonblank_type_in_batch =~ /^([\:\?\,f])$/
18549 # or previous character was opening and this does not close it
18550 || ( $last_nonblank_type_in_batch eq '{' && $type ne '}' )
18551 || ( $last_nonblank_type_in_batch eq '(' and $type ne ')' )
18553 # or this token is one of these:
18554 || $type =~ /^([\.]|\|\||\&\&)$/
18556 # or this is a closing structure
18557 || ( $last_nonblank_type_in_batch eq '}'
18558 && $last_nonblank_token_in_batch eq
18559 $last_nonblank_type_in_batch )
18561 # or previous token was keyword 'return'
18563 $last_nonblank_type_in_batch eq 'k'
18564 && ( $last_nonblank_token_in_batch eq 'return'
18568 # or starting a new line at certain keywords is fine
18570 && $is_if_unless_and_or_last_next_redo_return{$token} )
18572 # or this is after an assignment after a closing structure
18574 $is_assignment{$last_nonblank_type_in_batch}
18576 $last_last_nonblank_type_in_batch =~ /^[\}\)\]]$/
18578 # and it is significantly to the right
18579 || $gnu_position_predictor > $halfway
18584 check_for_long_gnu_style_lines($max_index_to_go);
18585 $line_start_index_to_go = $max_index_to_go;
18587 # back up 1 token if we want to break before that type
18588 # otherwise, we may strand tokens like '?' or ':' on a line
18589 if ( $line_start_index_to_go > 0 ) {
18590 if ( $last_nonblank_type_in_batch eq 'k' ) {
18592 if ( $want_break_before{$last_nonblank_token_in_batch} )
18594 $line_start_index_to_go--;
18597 elsif ( $want_break_before{$last_nonblank_type_in_batch} ) {
18598 $line_start_index_to_go--;
18604 # remember the predicted position of this token on the output line
18605 if ( $max_index_to_go > $line_start_index_to_go ) {
18606 $gnu_position_predictor =
18607 total_line_length( $line_start_index_to_go, $max_index_to_go );
18610 $gnu_position_predictor =
18611 $space_count + $token_lengths_to_go[$max_index_to_go];
18614 # store the indentation object for this token
18615 # this allows us to manipulate the leading whitespace
18616 # (in case we have to reduce indentation to fit a line) without
18617 # having to change any token values
18618 $leading_spaces_to_go[$max_index_to_go] =
18619 $gnu_stack[$max_gnu_stack_index];
18620 $reduced_spaces_to_go[$max_index_to_go] =
18621 ( $max_gnu_stack_index > 0 && $ci_level )
18622 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
18623 : $gnu_stack[$max_gnu_stack_index];
18627 sub check_for_long_gnu_style_lines {
18629 # look at the current estimated maximum line length, and
18630 # remove some whitespace if it exceeds the desired maximum
18631 my ($mx_index_to_go) = @_;
18633 # this is only for the '-lp' style
18634 return unless ($rOpts_line_up_parentheses);
18636 # nothing can be done if no stack items defined for this line
18637 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
18639 # see if we have exceeded the maximum desired line length
18640 # keep 2 extra free because they are needed in some cases
18641 # (result of trial-and-error testing)
18642 my $spaces_needed =
18643 $gnu_position_predictor -
18644 $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
18646 return if ( $spaces_needed <= 0 );
18648 # We are over the limit, so try to remove a requested number of
18649 # spaces from leading whitespace. We are only allowed to remove
18650 # from whitespace items created on this batch, since others have
18651 # already been used and cannot be undone.
18652 my @candidates = ();
18655 # loop over all whitespace items created for the current batch
18656 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
18657 my $item = $gnu_item_list[$i];
18659 # item must still be open to be a candidate (otherwise it
18660 # cannot influence the current token)
18661 next if ( $item->get_closed() >= 0 );
18663 my $available_spaces = $item->get_available_spaces();
18665 if ( $available_spaces > 0 ) {
18666 push( @candidates, [ $i, $available_spaces ] );
18670 return unless (@candidates);
18672 # sort by available whitespace so that we can remove whitespace
18673 # from the maximum available first
18674 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
18676 # keep removing whitespace until we are done or have no more
18677 foreach my $candidate (@candidates) {
18678 my ( $i, $available_spaces ) = @{$candidate};
18679 my $deleted_spaces =
18680 ( $available_spaces > $spaces_needed )
18682 : $available_spaces;
18684 # remove the incremental space from this item
18685 $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
18689 # update the leading whitespace of this item and all items
18690 # that came after it
18691 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
18693 my $old_spaces = $gnu_item_list[$i]->get_spaces();
18694 if ( $old_spaces >= $deleted_spaces ) {
18695 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
18698 # shouldn't happen except for code bug:
18700 my $level = $gnu_item_list[$i_debug]->get_level();
18701 my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
18702 my $old_level = $gnu_item_list[$i]->get_level();
18703 my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
18705 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
18707 report_definite_bug();
18710 $gnu_position_predictor -= $deleted_spaces;
18711 $spaces_needed -= $deleted_spaces;
18712 last unless ( $spaces_needed > 0 );
18717 sub finish_lp_batch {
18719 # This routine is called once after each output stream batch is
18720 # finished to undo indentation for all incomplete -lp
18721 # indentation levels. It is too risky to leave a level open,
18722 # because then we can't backtrack in case of a long line to follow.
18723 # This means that comments and blank lines will disrupt this
18724 # indentation style. But the vertical aligner may be able to
18725 # get the space back if there are side comments.
18727 # this is only for the 'lp' style
18728 return unless ($rOpts_line_up_parentheses);
18730 # nothing can be done if no stack items defined for this line
18731 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
18733 # loop over all whitespace items created for the current batch
18734 foreach my $i ( 0 .. $max_gnu_item_index ) {
18735 my $item = $gnu_item_list[$i];
18737 # only look for open items
18738 next if ( $item->get_closed() >= 0 );
18740 # Tentatively remove all of the available space
18741 # (The vertical aligner will try to get it back later)
18742 my $available_spaces = $item->get_available_spaces();
18743 if ( $available_spaces > 0 ) {
18745 # delete incremental space for this item
18747 ->tentatively_decrease_available_spaces($available_spaces);
18749 # Reduce the total indentation space of any nodes that follow
18750 # Note that any such nodes must necessarily be dependents
18752 foreach ( $i + 1 .. $max_gnu_item_index ) {
18753 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
18759 } ## end closure set_leading_whitespace
18761 sub reduce_lp_indentation {
18763 # reduce the leading whitespace at token $i if possible by $spaces_needed
18764 # (a large value of $spaces_needed will remove all excess space)
18765 # NOTE: to be called from scan_list only for a sequence of tokens
18766 # contained between opening and closing parens/braces/brackets
18768 my ( $self, $i, $spaces_wanted ) = @_;
18769 my $deleted_spaces = 0;
18771 my $item = $leading_spaces_to_go[$i];
18772 my $available_spaces = $item->get_available_spaces();
18775 $available_spaces > 0
18776 && ( ( $spaces_wanted <= $available_spaces )
18777 || !$item->get_have_child() )
18781 # we'll remove these spaces, but mark them as recoverable
18783 $item->tentatively_decrease_available_spaces($spaces_wanted);
18786 return $deleted_spaces;
18789 ###########################################################
18790 # CODE SECTION 13: Preparing batches for vertical alignment
18791 ###########################################################
18793 sub send_lines_to_vertical_aligner {
18797 # This routine receives a batch of code for which the final line breaks
18798 # have been defined. Here we prepare the lines for passing to the vertical
18799 # aligner. We do the following tasks:
18800 # - mark certain vertical alignment tokens, such as '=', in each line
18801 # - make minor indentation adjustments
18802 # - do logical padding: insert extra blank spaces to help display certain
18803 # logical constructions
18805 my $this_batch = $self->[_this_batch_];
18806 my $rlines_K = $this_batch->[_rlines_K_];
18807 if ( !@{$rlines_K} ) {
18809 # This can't happen because sub grind_batch_of_CODE always receives
18810 # tokens which it turns into one or more lines. If we get here it means
18811 # that a programming error has caused those lines to be lost.
18812 Fault("Unexpected call with no lines");
18815 my $n_last_line = @{$rlines_K} - 1;
18817 my $do_not_pad = $this_batch->[_do_not_pad_];
18818 my $peak_batch_size = $this_batch->[_peak_batch_size_];
18819 my $starting_in_quote = $this_batch->[_starting_in_quote_];
18820 my $ending_in_quote = $this_batch->[_ending_in_quote_];
18821 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
18822 my $ibeg0 = $this_batch->[_ibeg0_];
18823 my $rK_to_go = $this_batch->[_rK_to_go_];
18824 my $batch_count = $this_batch->[_batch_count_];
18825 my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
18827 my $rLL = $self->[_rLL_];
18828 my $Klimit = $self->[_Klimit_];
18830 my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
18831 my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
18832 my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
18833 my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
18835 # Construct indexes to the global_to_go arrays so that called routines can
18836 # still access those arrays. This might eventually be removed
18837 # when all called routines have been converted to access token values
18838 # in the rLL array instead.
18839 my $Kbeg0 = $Kbeg_next;
18840 my ( $ri_first, $ri_last );
18841 foreach my $rline ( @{$rlines_K} ) {
18842 my ( $Kbeg, $Kend ) = @{$rline};
18843 my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
18844 my $iend = $ibeg0 + $Kend - $Kbeg0;
18845 push @{$ri_first}, $ibeg;
18846 push @{$ri_last}, $iend;
18849 my ( $cscw_block_comment, $closing_side_comment );
18850 if ( $rOpts->{'closing-side-comments'} ) {
18851 ( $closing_side_comment, $cscw_block_comment ) =
18852 $self->add_closing_side_comment();
18855 my $rindentation_list = [0]; # ref to indentations for each line
18857 # define the array @{$ralignment_type_to_go} for the output tokens
18858 # which will be non-blank for each special token (such as =>)
18859 # for which alignment is required.
18860 my $ralignment_type_to_go =
18861 $self->set_vertical_alignment_markers( $ri_first, $ri_last );
18863 # flush before a long if statement to avoid unwanted alignment
18864 if ( $n_last_line > 0
18865 && $type_beg_next eq 'k'
18866 && $token_beg_next =~ /^(if|unless)$/ )
18868 $self->flush_vertical_aligner();
18871 $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci );
18873 $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
18874 $starting_in_quote )
18875 if ( $rOpts->{'logical-padding'} );
18877 # Resum lengths. We need accurate lengths for making alignment patterns,
18878 # and we may have unmasked a semicolon which was not included at the start.
18879 for ( 0 .. $max_index_to_go ) {
18880 $summed_lengths_to_go[ $_ + 1 ] =
18881 $summed_lengths_to_go[$_] + $token_lengths_to_go[$_];
18884 # loop to prepare each line for shipment
18885 my ( $Kbeg, $type_beg, $token_beg );
18886 my ( $Kend, $type_end );
18887 for my $n ( 0 .. $n_last_line ) {
18889 my $ibeg = $ri_first->[$n];
18890 my $iend = $ri_last->[$n];
18891 my $rline = $rlines_K->[$n];
18892 my $forced_breakpoint = $rline->[2];
18894 # we may need to look at variables on three consecutive lines ...
18896 # Some vars on line [n-1], if any:
18897 my $Kbeg_last = $Kbeg;
18898 my $type_beg_last = $type_beg;
18899 my $token_beg_last = $token_beg;
18900 my $Kend_last = $Kend;
18901 my $type_end_last = $type_end;
18903 # Some vars on line [n]:
18904 $Kbeg = $Kbeg_next;
18905 $type_beg = $type_beg_next;
18906 $token_beg = $token_beg_next;
18907 $Kend = $Kend_next;
18908 $type_end = $type_end_next;
18910 # Only forward ending K values of non-comments down the pipeline.
18911 # This is equivalent to checking that the last CODE_type is blank or
18912 # equal to 'VER'. See also sub resync_lines_and_tokens for related
18913 # coding. Note that '$batch_CODE_type' is the code type of the line
18914 # to which the ending token belongs.
18915 my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
18917 $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
18919 # We use two slightly different definitions of level jump at the end
18921 # $ljump is the level jump needed by 'sub set_adjusted_indentation'
18922 # $level_jump is the level jump needed by the vertical aligner.
18923 my $ljump = 0; # level jump at end of line
18925 # Get some vars on line [n+1], if any:
18926 if ( $n < $n_last_line ) {
18927 ( $Kbeg_next, $Kend_next ) =
18928 @{ $rlines_K->[ $n + 1 ] };
18929 $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
18930 $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
18931 $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
18932 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
18936 # Patch for git #51, a bare closing qw paren was not outdented
18937 # if the flag '-nodelete-old-newlines is set
18938 my $Kbeg_next = $self->K_next_code($Kend);
18939 if ( defined($Kbeg_next) ) {
18941 $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
18945 # level jump at end of line for the vertical aligner:
18949 : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
18951 $self->delete_needless_alignments( $ibeg, $iend,
18952 $ralignment_type_to_go );
18954 my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
18955 $self->make_alignment_patterns( $ibeg, $iend,
18956 $ralignment_type_to_go );
18958 my ( $indentation, $lev, $level_end, $terminal_type,
18959 $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
18960 = $self->set_adjusted_indentation( $ibeg, $iend, $rfields,
18961 $rpatterns, $ri_first, $ri_last,
18962 $rindentation_list, $ljump, $starting_in_quote,
18963 $is_static_block_comment, );
18965 # we will allow outdenting of long lines..
18966 my $outdent_long_lines = (
18968 # which are long quotes, if allowed
18969 ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
18971 # which are long block comments, if allowed
18974 && $rOpts->{'outdent-long-comments'}
18976 # but not if this is a static block comment
18977 && !$is_static_block_comment
18981 my $break_alignment_before = $is_outdented_line || $do_not_pad;
18982 my $break_alignment_after = $is_outdented_line;
18984 # flush at an 'if' which follows a line with (1) terminal semicolon
18985 # or (2) terminal block_type which is not an 'if'. This prevents
18986 # unwanted alignment between the lines.
18987 if ( $type_beg eq 'k' && $token_beg eq 'if' ) {
18988 my $Km = $self->K_previous_code($Kbeg);
18990 my $block_type_m = 'b';
18991 if ( defined($Km) ) {
18992 $type_m = $rLL->[$Km]->[_TYPE_];
18993 $block_type_m = $rLL->[$Km]->[_BLOCK_TYPE_];
18996 # break after anything that is not if-like
18997 $break_alignment_before ||= $type_m eq ';'
18998 || ( $type_m eq '}'
18999 && $block_type_m ne 'if'
19000 && $block_type_m ne 'unless'
19001 && $block_type_m ne 'elsif'
19002 && $block_type_m ne 'else' );
19005 my $rvertical_tightness_flags =
19006 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
19007 $ri_first, $ri_last, $ending_in_quote, $closing_side_comment );
19009 # Set a flag at the final ':' of a ternary chain to request
19010 # vertical alignment of the final term. Here is a
19011 # slightly complex example:
19013 # $self->{_text} = (
19015 # : $type eq 'item' ? "the $section entry"
19016 # : "the section on $section"
19020 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
19021 # : ' elsewhere in this document'
19024 my $is_terminal_ternary = 0;
19026 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
19027 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
19028 if ( $terminal_type ne ';'
19029 && $n_last_line > $n
19030 && $level_end == $lev )
19032 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
19033 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
19036 $last_leading_type eq ':'
19037 && ( ( $terminal_type eq ';' && $level_end <= $lev )
19038 || ( $terminal_type ne ':' && $level_end < $lev ) )
19042 # the terminal term must not contain any ternary terms, as in
19044 # $Is_MSWin32 ? ".\\echo$$"
19045 # : $Is_MacOS ? ":echo$$"
19046 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
19048 $is_terminal_ternary = 1;
19050 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
19051 while ( defined($KP) && $KP <= $Kend ) {
19052 my $type_KP = $rLL->[$KP]->[_TYPE_];
19053 if ( $type_KP eq '?' || $type_KP eq ':' ) {
19054 $is_terminal_ternary = 0;
19057 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
19062 my $level_adj = $lev;
19063 my $radjusted_levels = $self->[_radjusted_levels_];
19064 if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
19065 $level_adj = $radjusted_levels->[$Kbeg];
19066 if ( $level_adj < 0 ) { $level_adj = 0 }
19069 # add any new closing side comment to the last line
19070 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
19071 $rfields->[-1] .= " $closing_side_comment";
19073 # NOTE: Patch for csc. We can just use 1 for the length of the csc
19074 # because its length should not be a limiting factor from here on.
19075 $rfield_lengths->[-1] += 2;
19078 # Programming check: (shouldn't happen)
19079 # The number of tokens which separate the fields must always be
19080 # one less than the number of fields. If this is not true then
19081 # an error has been introduced in sub make_alignment_patterns.
19082 if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
19083 my $nt = @{$rtokens};
19084 my $nf = @{$rfields};
19086 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
19087 The number of tokens = $nt should be one less than number of fields: $nf
19092 # Set flag which tells if this line is contained in a multi-line list
19093 my $list_seqno = $self->is_list_by_K($Kbeg);
19095 # send this new line down the pipe
19096 my $rvalign_hash = {};
19097 $rvalign_hash->{level} = $lev;
19098 $rvalign_hash->{level_end} = $level_end;
19099 $rvalign_hash->{level_adj} = $level_adj;
19100 $rvalign_hash->{indentation} = $indentation;
19101 $rvalign_hash->{list_seqno} = $list_seqno;
19102 $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
19103 $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
19104 $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
19105 $rvalign_hash->{level_jump} = $level_jump;
19106 $rvalign_hash->{rfields} = $rfields;
19107 $rvalign_hash->{rpatterns} = $rpatterns;
19108 $rvalign_hash->{rtokens} = $rtokens;
19109 $rvalign_hash->{rfield_lengths} = $rfield_lengths;
19110 $rvalign_hash->{terminal_block_type} = $terminal_block_type;
19111 $rvalign_hash->{batch_count} = $batch_count;
19112 $rvalign_hash->{break_alignment_before} = $break_alignment_before;
19113 $rvalign_hash->{break_alignment_after} = $break_alignment_after;
19114 $rvalign_hash->{Kend} = $Kend_code;
19115 $rvalign_hash->{ci_level} = $ci_levels_to_go[$ibeg];
19117 my $vao = $self->[_vertical_aligner_object_];
19118 $vao->valign_input($rvalign_hash);
19122 # Set flag indicating if this line ends in an opening
19123 # token and is very short, so that a blank line is not
19124 # needed if the subsequent line is a comment.
19125 # Examples of what we are looking for:
19131 $self->[_last_output_short_opening_token_]
19133 # line ends in opening token
19135 = $is_opening_type{$type_end}
19139 # line has either single opening token
19142 # or is a single token followed by opening token.
19143 # Note that sub identifiers have blanks like 'sub doit'
19144 # $token_beg !~ /\s+/
19145 || ( $Kend - $Kbeg <= 2 && index( $token_beg, ' ' ) < 0 )
19148 # and limit total to 10 character widths
19149 && token_sequence_length( $ibeg, $iend ) <= 10;
19151 } # end of loop to output each line
19153 # remember indentation of lines containing opening containers for
19154 # later use by sub set_adjusted_indentation
19155 $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
19157 # output any new -cscw block comment
19158 if ($cscw_block_comment) {
19159 $self->flush_vertical_aligner();
19160 my $file_writer_object = $self->[_file_writer_object_];
19161 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
19166 { ## begin closure set_vertical_alignment_markers
19167 my %is_vertical_alignment_type;
19168 my %is_not_vertical_alignment_token;
19169 my %is_vertical_alignment_keyword;
19170 my %is_terminal_alignment_type;
19171 my %is_low_level_alignment_token;
19177 # Replaced =~ and // in the list. // had been removed in RT 119588
19179 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
19180 { ? : => && || ~~ !~~ =~ !~ // <=> ->
19182 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
19184 # These 'tokens' are not aligned. We need this to remove [
19185 # from the above list because it has type ='{'
19187 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
19189 # these are the only types aligned at a line end
19191 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
19193 # these tokens only align at line level
19195 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
19197 # eq and ne were removed from this list to improve alignment chances
19198 @q = qw(if unless and or err for foreach while until);
19199 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
19202 sub set_vertical_alignment_markers {
19204 # This routine takes the first step toward vertical alignment of the
19205 # lines of output text. It looks for certain tokens which can serve as
19206 # vertical alignment markers (such as an '=').
19208 # Method: We look at each token $i in this output batch and set
19209 # $ralignment_type_to_go->[$i] equal to those tokens at which we would
19210 # accept vertical alignment.
19212 my ( $self, $ri_first, $ri_last ) = @_;
19213 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
19215 my $ralignment_type_to_go;
19217 # Initialize the alignment array. Note that closing side comments can
19218 # insert up to 2 additional tokens beyond the original
19219 # $max_index_to_go, so we need to check ri_last for the last index.
19220 my $max_line = @{$ri_first} - 1;
19221 my $iend = $ri_last->[$max_line];
19222 if ( $iend < $max_index_to_go ) { $iend = $max_index_to_go }
19224 # nothing to do if we aren't allowed to change whitespace
19225 # or there is only 1 token
19226 if ( $iend == 0 || !$rOpts_add_whitespace ) {
19227 for my $i ( 0 .. $iend ) {
19228 $ralignment_type_to_go->[$i] = '';
19230 return $ralignment_type_to_go;
19233 # remember the index of last nonblank token before any sidecomment
19234 my $i_terminal = $max_index_to_go;
19235 if ( $types_to_go[$i_terminal] eq '#' ) {
19236 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
19237 if ( $i_terminal > 0 ) { --$i_terminal }
19241 # look at each line of this batch..
19242 my $last_vertical_alignment_before_index;
19243 my $vert_last_nonblank_type;
19244 my $vert_last_nonblank_token;
19245 my $vert_last_nonblank_block_type;
19247 foreach my $line ( 0 .. $max_line ) {
19248 my $ibeg = $ri_first->[$line];
19249 my $iend = $ri_last->[$line];
19250 $last_vertical_alignment_before_index = -1;
19251 $vert_last_nonblank_type = '';
19252 $vert_last_nonblank_token = '';
19253 $vert_last_nonblank_block_type = '';
19255 # look at each token in this output line..
19256 my $level_beg = $levels_to_go[$ibeg];
19257 foreach my $i ( $ibeg .. $iend ) {
19258 my $alignment_type = '';
19259 my $type = $types_to_go[$i];
19260 my $block_type = $block_type_to_go[$i];
19261 my $token = $tokens_to_go[$i];
19263 # do not align tokens at lower level then start of line
19264 # except for side comments
19265 if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
19268 $ralignment_type_to_go->[$i] = '';
19272 #--------------------------------------------------------
19273 # First see if we want to align BEFORE this token
19274 #--------------------------------------------------------
19276 # The first possible token that we can align before
19277 # is index 2 because: 1) it doesn't normally make sense to
19278 # align before the first token and 2) the second
19279 # token must be a blank if we are to align before
19281 if ( $i < $ibeg + 2 ) { }
19283 # must follow a blank token
19284 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
19286 # align a side comment --
19287 elsif ( $type eq '#' ) {
19289 my $KK = $K_to_go[$i];
19290 my $sc_type = $rspecial_side_comment_type->{$KK};
19294 # it is any specially marked side comment
19297 # or it is a static side comment
19298 || ( $rOpts->{'static-side-comments'}
19299 && $token =~ /$static_side_comment_pattern/ )
19301 # or a closing side comment
19302 || ( $vert_last_nonblank_block_type
19304 /$closing_side_comment_prefix_pattern/ )
19307 $alignment_type = $type;
19308 } ## Example of a static side comment
19311 # otherwise, do not align two in a row to create a
19313 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
19315 # align before one of these keywords
19316 # (within a line, since $i>1)
19317 elsif ( $type eq 'k' ) {
19319 # /^(if|unless|and|or|eq|ne)$/
19320 if ( $is_vertical_alignment_keyword{$token} ) {
19321 $alignment_type = $token;
19325 # align before one of these types..
19326 # Note: add '.' after new vertical aligner is operational
19327 elsif ( $is_vertical_alignment_type{$type}
19328 && !$is_not_vertical_alignment_token{$token} )
19330 $alignment_type = $token;
19332 # Do not align a terminal token. Although it might
19333 # occasionally look ok to do this, this has been found to be
19334 # a good general rule. The main problems are:
19335 # (1) that the terminal token (such as an = or :) might get
19336 # moved far to the right where it is hard to see because
19337 # nothing follows it, and
19338 # (2) doing so may prevent other good alignments.
19339 # Current exceptions are && and || and =>
19340 if ( $i == $iend || $i >= $i_terminal ) {
19341 $alignment_type = ""
19342 unless ( $is_terminal_alignment_type{$type} );
19345 # Do not align leading ': (' or '. ('. This would prevent
19346 # alignment in something like the following:
19348 # ( $input_line_number < 10 ) ? " "
19349 # : ( $input_line_number < 100 ) ? " "
19353 # ( $case_matters ? $accessor : " lc($accessor) " )
19354 # . ( $yesno ? " eq " : " ne " )
19356 # Also, do not align a ( following a leading ? so we can
19357 # align something like this:
19358 # $converter{$_}->{ushortok} =
19359 # $PDL::IO::Pic::biggrays
19360 # ? ( m/GIF/ ? 0 : 1 )
19361 # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
19364 && $types_to_go[ $i - 1 ] eq 'b'
19365 && ( $types_to_go[$ibeg] eq '.'
19366 || $types_to_go[$ibeg] eq ':'
19367 || $types_to_go[$ibeg] eq '?' )
19370 $alignment_type = "";
19373 # Certain tokens only align at the same level as the
19374 # initial line level
19375 if ( $is_low_level_alignment_token{$token}
19376 && $levels_to_go[$i] != $level_beg )
19378 $alignment_type = "";
19381 # For a paren after keyword, only align something like this:
19383 # elsif ( $b ) { &b }
19384 if ( $token eq '(' ) {
19386 if ( $vert_last_nonblank_type eq 'k' ) {
19387 $alignment_type = ""
19388 unless $vert_last_nonblank_token =~
19389 /^(if|unless|elsif)$/;
19392 # Do not align a spaced-function-paren if requested.
19393 # Issue git #53. Note that $i-1 is a blank token if we
19395 if ( !$rOpts_function_paren_vertical_alignment
19396 && $i > $ibeg + 1 )
19398 my $type_m = $types_to_go[ $i - 2 ];
19399 my $token_m = $tokens_to_go[ $i - 2 ];
19401 # this is the same test as 'space-function-paren'
19402 if ( $type_m =~ /^[wUG]$/
19404 || $type_m =~ /^[wi]$/
19405 && $token_m =~ /^(\&|->)/ )
19407 $alignment_type = "";
19412 # be sure the alignment tokens are unique
19413 # This didn't work well: reason not determined
19414 # if ($token ne $type) {$alignment_type .= $type}
19417 # NOTE: This is deactivated because it causes the previous
19418 # if/elsif alignment to fail
19419 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
19420 #{ $alignment_type = $type; }
19422 if ($alignment_type) {
19423 $last_vertical_alignment_before_index = $i;
19426 #--------------------------------------------------------
19427 # Next see if we want to align AFTER the previous nonblank
19428 #--------------------------------------------------------
19430 # We want to line up ',' and interior ';' tokens, with the added
19431 # space AFTER these tokens. (Note: interior ';' is included
19432 # because it may occur in short blocks).
19435 # we haven't already set it
19438 # and its not the first token of the line
19441 # and it follows a blank
19442 && $types_to_go[ $i - 1 ] eq 'b'
19444 # and previous token IS one of these:
19445 && ( $vert_last_nonblank_type eq ','
19446 || $vert_last_nonblank_type eq ';' )
19448 # and it's NOT one of these
19451 && !$is_closing_token{$type} )
19453 # then go ahead and align
19457 $alignment_type = $vert_last_nonblank_type;
19460 #--------------------------------------------------------
19461 # Undo alignment in special cases
19462 #--------------------------------------------------------
19463 if ($alignment_type) {
19465 # do not align the opening brace of an anonymous sub
19466 if ( $token eq '{' && $block_type =~ /$ASUB_PATTERN/ ) {
19467 $alignment_type = "";
19471 #--------------------------------------------------------
19472 # then store the value
19473 #--------------------------------------------------------
19474 $ralignment_type_to_go->[$i] = $alignment_type;
19475 if ( $type ne 'b' ) {
19476 $vert_last_nonblank_type = $type;
19477 $vert_last_nonblank_token = $token;
19478 $vert_last_nonblank_block_type = $block_type;
19482 return $ralignment_type_to_go;
19484 } ## end closure set_vertical_alignment_markers
19488 # get opening and closing sequence numbers of a token for the vertical
19489 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
19490 # to be treated somewhat like opening and closing tokens for stacking
19491 # tokens by the vertical aligner.
19492 my ( $self, $ii, $ending_in_quote ) = @_;
19494 my $rLL = $self->[_rLL_];
19495 my $this_batch = $self->[_this_batch_];
19496 my $rK_to_go = $this_batch->[_rK_to_go_];
19498 my $KK = $rK_to_go->[$ii];
19499 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
19501 if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
19503 my $token = $rLL->[$KK]->[_TOKEN_];
19505 $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
19508 if ( !$ending_in_quote ) {
19509 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
19517 my %undo_extended_ci;
19519 sub initialize_undo_ci {
19520 %undo_extended_ci = ();
19526 # Undo continuation indentation in certain sequences
19527 my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
19528 my ( $line_1, $line_2, $lev_last );
19529 my $this_line_is_semicolon_terminated;
19530 my $max_line = @{$ri_first} - 1;
19532 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
19534 # Prepare a list of controlling indexes for each line if required.
19535 # This is used for efficient processing below. Note: this is
19536 # critical for speed. In the initial implementation I just looped
19537 # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
19538 # found that this routine was causing a huge run time in large lists.
19539 # On a very large list test case, this new coding dropped the run time
19540 # of this routine from 30 seconds to 169 milliseconds.
19541 my @i_controlling_ci;
19542 if ( @{$rix_seqno_controlling_ci} ) {
19543 my @tmp = reverse @{$rix_seqno_controlling_ci};
19544 my $ix_next = pop @tmp;
19545 foreach my $line ( 0 .. $max_line ) {
19546 my $iend = $ri_last->[$line];
19547 while ( defined($ix_next) && $ix_next <= $iend ) {
19548 push @{ $i_controlling_ci[$line] }, $ix_next;
19549 $ix_next = pop @tmp;
19554 # Loop over all lines of the batch ...
19556 # Workaround for problem c007, in which the combination -lp -xci
19557 # can produce a "Program bug" message in unusual circumstances.
19558 my $skip_SECTION_1 = $rOpts_line_up_parentheses
19559 && $rOpts->{'extended-continuation-indentation'};
19561 foreach my $line ( 0 .. $max_line ) {
19563 my $ibeg = $ri_first->[$line];
19564 my $iend = $ri_last->[$line];
19565 my $lev = $levels_to_go[$ibeg];
19567 ####################################
19568 # SECTION 1: Undo needless common CI
19569 ####################################
19571 # We are looking at leading tokens and looking for a sequence all
19572 # at the same level and all at a higher level than enclosing lines.
19574 # For example, we can undo continuation indentation in sort/map/grep
19577 # my $dat1 = pack( "n*",
19578 # map { $_, $lookup->{$_} }
19579 # sort { $a <=> $b }
19580 # grep { $lookup->{$_} ne $default } keys %$lookup );
19584 # my $dat1 = pack( "n*",
19585 # map { $_, $lookup->{$_} }
19586 # sort { $a <=> $b }
19587 # grep { $lookup->{$_} ne $default } keys %$lookup );
19589 if ( $line > 0 && !$skip_SECTION_1 ) {
19591 # if we have started a chain..
19594 # see if it continues..
19595 if ( $lev == $lev_last ) {
19596 if ( $types_to_go[$ibeg] eq 'k'
19597 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
19600 # chain continues...
19601 # check for chain ending at end of a statement
19602 if ( $line == $max_line ) {
19604 # see of this line ends a statement
19605 $this_line_is_semicolon_terminated =
19606 $types_to_go[$iend] eq ';'
19608 # with possible side comment
19609 || ( $types_to_go[$iend] eq '#'
19610 && $iend - $ibeg >= 2
19611 && $types_to_go[ $iend - 2 ] eq ';'
19612 && $types_to_go[ $iend - 1 ] eq 'b' );
19615 if ($this_line_is_semicolon_terminated);
19623 elsif ( $lev < $lev_last ) {
19625 # chain ends with previous line
19626 $line_2 = $line - 1;
19628 elsif ( $lev > $lev_last ) {
19634 # undo the continuation indentation if a chain ends
19635 if ( defined($line_2) && defined($line_1) ) {
19636 my $continuation_line_count = $line_2 - $line_1 + 1;
19637 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
19638 = (0) x ($continuation_line_count)
19639 if ( $continuation_line_count >= 0 );
19640 @leading_spaces_to_go[ @{$ri_first}
19641 [ $line_1 .. $line_2 ] ] =
19642 @reduced_spaces_to_go[ @{$ri_first}
19643 [ $line_1 .. $line_2 ] ];
19648 # not in a chain yet..
19651 # look for start of a new sort/map/grep chain
19652 if ( $lev > $lev_last ) {
19653 if ( $types_to_go[$ibeg] eq 'k'
19654 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
19662 ######################################
19663 # SECTION 2: Undo ci at cuddled blocks
19664 ######################################
19666 # Note that sub set_adjusted_indentation will be called later to
19667 # actually do this, but for now we will tentatively mark cuddled
19668 # lines with ci=0 so that the the -xci loop which follows will be
19669 # correct at cuddles.
19671 $types_to_go[$ibeg] eq '}'
19672 && ( $nesting_depth_to_go[$iend] + 1 ==
19673 $nesting_depth_to_go[$ibeg] )
19676 my $terminal_type = $types_to_go[$iend];
19677 if ( $terminal_type eq '#' && $iend > $ibeg ) {
19678 $terminal_type = $types_to_go[ $iend - 1 ];
19679 if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
19680 $terminal_type = $types_to_go[ $iend - 2 ];
19683 if ( $terminal_type eq '{' ) {
19684 my $Kbeg = $K_to_go[$ibeg];
19685 $ci_levels_to_go[$ibeg] = 0;
19689 #########################################################
19690 # SECTION 3: Undo ci set by sub extended_ci if not needed
19691 #########################################################
19693 # Undo the ci of the leading token if its controlling token
19694 # went out on a previous line without ci
19695 if ( $ci_levels_to_go[$ibeg] ) {
19696 my $Kbeg = $K_to_go[$ibeg];
19697 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
19698 if ( $seqno && $undo_extended_ci{$seqno} ) {
19700 # but do not undo ci set by the -lp flag
19701 if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
19702 $ci_levels_to_go[$ibeg] = 0;
19703 $leading_spaces_to_go[$ibeg] =
19704 $reduced_spaces_to_go[$ibeg];
19709 # Flag any controlling opening tokens in lines without ci. This
19710 # will be used later in the above if statement to undo the ci which
19711 # they added. The array i_controlling_ci[$line] was prepared at
19712 # the top of this routine.
19713 if ( !$ci_levels_to_go[$ibeg]
19714 && defined( $i_controlling_ci[$line] ) )
19716 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
19717 my $seqno = $type_sequence_to_go[$i];
19718 $undo_extended_ci{$seqno} = 1;
19729 { ## begin closure set_logical_padding
19734 my @q = qw( + - * / );
19735 @is_math_op{@q} = (1) x scalar(@q);
19738 sub set_logical_padding {
19740 # Look at a batch of lines and see if extra padding can improve the
19741 # alignment when there are certain leading operators. Here is an
19742 # example, in which some extra space is introduced before
19743 # '( $year' to make it line up with the subsequent lines:
19745 # if ( ( $Year < 1601 )
19746 # || ( $Year > 2899 )
19747 # || ( $EndYear < 1601 )
19748 # || ( $EndYear > 2899 ) )
19750 # &Error_OutOfRange;
19753 my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
19755 my $max_line = @{$ri_first} - 1;
19757 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
19758 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
19760 # Patch to produce padding in the first line of short code blocks.
19761 # This is part of an update to fix cases b562 .. b983.
19762 # This is needed to compensate for a change which was made in 'sub
19763 # starting_one_line_block' to prevent blinkers. Previously, that sub
19764 # would not look at the total block size and rely on sub
19765 # set_continuation_breaks to break up long blocks. Consequently, the
19766 # first line of those batches would end in the opening block brace of a
19767 # sort/map/grep/eval block. When this was changed to immediately check
19768 # for blocks which were too long, the opening block brace would go out
19769 # in a single batch, and the block contents would go out as the next
19770 # batch. This caused the logic in this routine which decides if the
19771 # first line should be padded to be incorrect. To fix this, we set a
19772 # flag if the previous batch ended in an opening sort/map/grep/eval
19773 # block brace, and use it to adjust the logic to compensate.
19775 # For example, the following would have previously been a single batch
19776 # but now is two batches. We want to pad the line starting in '$dir':
19777 # my (@indices) = # batch n-1 (prev batch n)
19778 # sort { # batch n-1 (prev batch n)
19779 # $dir eq 'left' # batch n
19780 # ? $cells[$a] <=> $cells[$b] # batch n
19781 # : $cells[$b] <=> $cells[$a]; # batch n
19782 # } ( 0 .. $#cells ); # batch n
19784 my $rLL = $self->[_rLL_];
19785 my $K0 = $K_to_go[0];
19786 my $Kprev = $self->K_previous_code($K0);
19787 my $is_short_block;
19788 if ( defined($Kprev)
19789 && $rLL->[$Kprev]->[_BLOCK_TYPE_] )
19791 my $block_type = $rLL->[$Kprev]->[_BLOCK_TYPE_];
19792 $is_short_block = $is_sort_map_grep_eval{$block_type};
19793 $is_short_block ||= $want_one_line_block{$block_type};
19796 # looking at each line of this batch..
19797 foreach my $line ( 0 .. $max_line - 1 ) {
19799 # see if the next line begins with a logical operator
19800 $ibeg = $ri_first->[$line];
19801 $iend = $ri_last->[$line];
19802 $ibeg_next = $ri_first->[ $line + 1 ];
19803 $tok_next = $tokens_to_go[$ibeg_next];
19804 $type_next = $types_to_go[$ibeg_next];
19806 $has_leading_op_next = ( $tok_next =~ /^\w/ )
19807 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
19808 : $is_chain_operator{$type_next}; # and, or
19810 next unless ($has_leading_op_next);
19812 # next line must not be at lesser depth
19814 if ( $nesting_depth_to_go[$ibeg] >
19815 $nesting_depth_to_go[$ibeg_next] );
19817 # identify the token in this line to be padded on the left
19820 # handle lines at same depth...
19821 if ( $nesting_depth_to_go[$ibeg] ==
19822 $nesting_depth_to_go[$ibeg_next] )
19825 # if this is not first line of the batch ...
19828 # and we have leading operator..
19829 next if $has_leading_op;
19831 # Introduce padding if..
19832 # 1. the previous line is at lesser depth, or
19833 # 2. the previous line ends in an assignment
19834 # 3. the previous line ends in a 'return'
19835 # 4. the previous line ends in a comma
19836 # Example 1: previous line at lesser depth
19837 # if ( ( $Year < 1601 ) # <- we are here but
19838 # || ( $Year > 2899 ) # list has not yet
19839 # || ( $EndYear < 1601 ) # collapsed vertically
19840 # || ( $EndYear > 2899 ) )
19843 # Example 2: previous line ending in assignment:
19845 # $year % 4 ? 0 # <- We are here
19846 # : $year % 100 ? 1
19847 # : $year % 400 ? 0
19850 # Example 3: previous line ending in comma:
19857 # be sure levels agree (do not indent after an indented 'if')
19859 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
19861 # allow padding on first line after a comma but only if:
19862 # (1) this is line 2 and
19863 # (2) there are at more than three lines and
19864 # (3) lines 3 and 4 have the same leading operator
19865 # These rules try to prevent padding within a long
19866 # comma-separated list.
19868 if ( $types_to_go[$iendm] eq ','
19872 my $ibeg_next_next = $ri_first->[ $line + 2 ];
19873 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
19874 $ok_comma = $tok_next_next eq $tok_next;
19879 $is_assignment{ $types_to_go[$iendm] }
19881 || ( $nesting_depth_to_go[$ibegm] <
19882 $nesting_depth_to_go[$ibeg] )
19883 || ( $types_to_go[$iendm] eq 'k'
19884 && $tokens_to_go[$iendm] eq 'return' )
19887 # we will add padding before the first token
19891 # for first line of the batch..
19894 # WARNING: Never indent if first line is starting in a
19895 # continued quote, which would change the quote.
19896 next if $starting_in_quote;
19898 # if this is text after closing '}'
19899 # then look for an interior token to pad
19900 if ( $types_to_go[$ibeg] eq '}' ) {
19904 # otherwise, we might pad if it looks really good
19905 elsif ($is_short_block) {
19910 # we might pad token $ibeg, so be sure that it
19911 # is at the same depth as the next line.
19913 if ( $nesting_depth_to_go[$ibeg] !=
19914 $nesting_depth_to_go[$ibeg_next] );
19916 # We can pad on line 1 of a statement if at least 3
19917 # lines will be aligned. Otherwise, it
19918 # can look very confusing.
19920 # We have to be careful not to pad if there are too few
19921 # lines. The current rule is:
19922 # (1) in general we require at least 3 consecutive lines
19923 # with the same leading chain operator token,
19924 # (2) but an exception is that we only require two lines
19925 # with leading colons if there are no more lines. For example,
19926 # the first $i in the following snippet would get padding
19927 # by the second rule:
19929 # $i == 1 ? ( "First", "Color" )
19930 # : $i == 2 ? ( "Then", "Rarity" )
19931 # : ( "Then", "Name" );
19933 if ( $max_line > 1 ) {
19934 my $leading_token = $tokens_to_go[$ibeg_next];
19937 # never indent line 1 of a '.' series because
19938 # previous line is most likely at same level.
19939 # TODO: we should also look at the leading_spaces
19940 # of the last output line and skip if it is same
19942 next if ( $leading_token eq '.' );
19945 foreach my $l ( 2 .. 3 ) {
19946 last if ( $line + $l > $max_line );
19947 my $ibeg_next_next = $ri_first->[ $line + $l ];
19948 if ( $tokens_to_go[$ibeg_next_next] ne
19951 $tokens_differ = 1;
19956 next if ($tokens_differ);
19957 next if ( $count < 3 && $leading_token ne ':' );
19967 # find interior token to pad if necessary
19968 if ( !defined($ipad) ) {
19970 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
19972 # find any unclosed container
19974 unless ( $type_sequence_to_go[$i]
19975 && $mate_index_to_go[$i] > $iend );
19977 # find next nonblank token to pad
19978 $ipad = $inext_to_go[$i];
19979 last if ( $ipad > $iend );
19984 # We cannot pad the first leading token of a file because
19985 # it could cause a bug in which the starting indentation
19986 # level is guessed incorrectly each time the code is run
19987 # though perltidy, thus causing the code to march off to
19988 # the right. For example, the following snippet would have
19991 ## ov_method mycan( $package, '(""' ), $package
19992 ## or ov_method mycan( $package, '(0+' ), $package
19993 ## or ov_method mycan( $package, '(bool' ), $package
19994 ## or ov_method mycan( $package, '(nomethod' ), $package;
19996 # If this snippet is within a block this won't happen
19997 # unless the user just processes the snippet alone within
19998 # an editor. In that case either the user will see and
19999 # fix the problem or it will be corrected next time the
20000 # entire file is processed with perltidy.
20001 next if ( $ipad == 0 && $peak_batch_size <= 1 );
20003 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
20004 ## IT DID MORE HARM THAN GOOD
20006 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
20009 ##? # do not put leading padding for just 2 lines of math
20010 ##? if ( $ipad == $ibeg
20012 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
20013 ##? && $is_math_op{$type_next}
20014 ##? && $line + 2 <= $max_line )
20016 ##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
20017 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
20018 ##? next if !$is_math_op{$type_next_next};
20021 # next line must not be at greater depth
20022 my $iend_next = $ri_last->[ $line + 1 ];
20024 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
20025 $nesting_depth_to_go[$ipad] );
20027 # lines must be somewhat similar to be padded..
20028 my $inext_next = $inext_to_go[$ibeg_next];
20029 my $type = $types_to_go[$ipad];
20030 my $type_next = $types_to_go[ $ipad + 1 ];
20032 # see if there are multiple continuation lines
20033 my $logical_continuation_lines = 1;
20034 if ( $line + 2 <= $max_line ) {
20035 my $leading_token = $tokens_to_go[$ibeg_next];
20036 my $ibeg_next_next = $ri_first->[ $line + 2 ];
20037 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
20038 && $nesting_depth_to_go[$ibeg_next] eq
20039 $nesting_depth_to_go[$ibeg_next_next] )
20041 $logical_continuation_lines++;
20045 # see if leading types match
20046 my $types_match = $types_to_go[$inext_next] eq $type;
20047 my $matches_without_bang;
20049 # if first line has leading ! then compare the following token
20050 if ( !$types_match && $type eq '!' ) {
20051 $types_match = $matches_without_bang =
20052 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
20056 # either we have multiple continuation lines to follow
20057 # and we are not padding the first token
20059 $logical_continuation_lines > 1
20060 && ( $ipad > 0 || $is_short_block )
20069 # and keywords must match if keyword
20072 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
20078 #----------------------begin special checks--------------
20081 # A check is needed before we can make the pad.
20082 # If we are in a list with some long items, we want each
20083 # item to stand out. So in the following example, the
20084 # first line beginning with '$casefold->' would look good
20085 # padded to align with the next line, but then it
20086 # would be indented more than the last line, so we
20090 # $casefold->{code} eq '0041'
20091 # && $casefold->{status} eq 'C'
20092 # && $casefold->{mapping} eq '0061',
20097 # It would be faster, and almost as good, to use a comma
20098 # count, and not pad if comma_count > 1 and the previous
20099 # line did not end with a comma.
20103 my $ibg = $ri_first->[ $line + 1 ];
20104 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
20106 # just use simplified formula for leading spaces to avoid
20107 # needless sub calls
20108 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
20110 # look at each line beyond the next ..
20112 foreach my $ltest ( $line + 2 .. $max_line ) {
20114 my $ibg = $ri_first->[$l];
20116 # quit looking at the end of this container
20118 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
20119 || ( $nesting_depth_to_go[$ibg] < $depth );
20121 # cannot do the pad if a later line would be
20123 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
20129 # don't pad if we end in a broken list
20130 if ( $l == $max_line ) {
20131 my $i2 = $ri_last->[$l];
20132 if ( $types_to_go[$i2] eq '#' ) {
20133 my $i1 = $ri_first->[$l];
20134 next if terminal_type_i( $i1, $i2 ) eq ',';
20139 # a minus may introduce a quoted variable, and we will
20140 # add the pad only if this line begins with a bare word,
20141 # such as for the word 'Button' here:
20143 # Button => "Print letter \"~$_\"",
20144 # -command => [ sub { print "$_[0]\n" }, $_ ],
20145 # -accelerator => "Meta+$_"
20148 # On the other hand, if 'Button' is quoted, it looks best
20151 # 'Button' => "Print letter \"~$_\"",
20152 # -command => [ sub { print "$_[0]\n" }, $_ ],
20153 # -accelerator => "Meta+$_"
20155 if ( $types_to_go[$ibeg_next] eq 'm' ) {
20156 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
20159 next unless $ok_to_pad;
20161 #----------------------end special check---------------
20163 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
20164 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
20165 $pad_spaces = $length_2 - $length_1;
20167 # If the first line has a leading ! and the second does
20168 # not, then remove one space to try to align the next
20169 # leading characters, which are often the same. For example:
20171 # || $ts == $self->Holder
20172 # || $self->Holder->Type eq "Arena" )
20174 # This usually helps readability, but if there are subsequent
20175 # ! operators things will still get messed up. For example:
20177 # if ( !exists $Net::DNS::typesbyname{$qtype}
20178 # && exists $Net::DNS::classesbyname{$qtype}
20179 # && !exists $Net::DNS::classesbyname{$qclass}
20180 # && exists $Net::DNS::typesbyname{$qclass} )
20181 # We can't fix that.
20182 if ($matches_without_bang) { $pad_spaces-- }
20184 # make sure this won't change if -lp is used
20185 my $indentation_1 = $leading_spaces_to_go[$ibeg];
20186 if ( ref($indentation_1) ) {
20187 if ( $indentation_1->get_recoverable_spaces() == 0 ) {
20188 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
20189 unless ( $indentation_2->get_recoverable_spaces() == 0 )
20196 # we might be able to handle a pad of -1 by removing a blank
20198 if ( $pad_spaces < 0 ) {
20200 # Deactivated for -kpit due to conflict. This block deletes
20201 # a space in an attempt to improve alignment in some cases,
20202 # but it may conflict with user spacing requests. For now
20203 # it is just deactivated if the -kpit option is used.
20204 if ( $pad_spaces == -1 ) {
20206 && $types_to_go[ $ipad - 1 ] eq 'b'
20207 && !%keyword_paren_inner_tightness )
20209 $self->pad_token( $ipad - 1, $pad_spaces );
20215 # now apply any padding for alignment
20216 if ( $ipad >= 0 && $pad_spaces ) {
20218 my $length_t = total_line_length( $ibeg, $iend );
20219 if ( $pad_spaces + $length_t <=
20220 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
20222 $self->pad_token( $ipad, $pad_spaces );
20230 $has_leading_op = $has_leading_op_next;
20231 } # end of loop over lines
20234 } ## end closure set_logical_padding
20238 # insert $pad_spaces before token number $ipad
20239 my ( $self, $ipad, $pad_spaces ) = @_;
20240 my $rLL = $self->[_rLL_];
20241 my $KK = $K_to_go[$ipad];
20242 my $tok = $rLL->[$KK]->[_TOKEN_];
20243 my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
20245 if ( $pad_spaces > 0 ) {
20246 $tok = ' ' x $pad_spaces . $tok;
20247 $tok_len += $pad_spaces;
20249 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
20259 $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
20260 $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
20262 $token_lengths_to_go[$ipad] += $pad_spaces;
20263 $tokens_to_go[$ipad] = $tok;
20265 foreach my $i ( $ipad .. $max_index_to_go ) {
20266 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
20271 { ## begin closure make_alignment_patterns
20273 my %block_type_map;
20280 # map related block names into a common name to
20282 %block_type_map = (
20293 # map certain keywords to the same 'if' class to align
20294 # long if/elsif sequences. [elsif.pl]
20300 'default' => 'given',
20301 'case' => 'switch',
20303 # treat an 'undef' similar to numbers and quotes
20307 # map certain operators to the same class for pattern matching
20323 sub delete_needless_alignments {
20324 my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
20326 # Remove unwanted alignments. This routine is a place to remove
20327 # alignments which might cause problems at later stages. There are
20328 # currently two types of fixes:
20330 # 1. Remove excess parens
20331 # 2. Remove alignments within 'elsif' conditions
20333 # Patch #1: Excess alignment of parens can prevent other good
20334 # alignments. For example, note the parens in the first two rows of
20335 # the following snippet. They would normally get marked for alignment
20336 # and aligned as follows:
20338 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
20339 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
20340 # my $img = new Gimp::Image( $w, $h, RGB );
20342 # This causes unnecessary paren alignment and prevents the third equals
20343 # from aligning. If we remove the unwanted alignments we get:
20345 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
20346 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
20347 # my $img = new Gimp::Image( $w, $h, RGB );
20349 # A rule for doing this which works well is to remove alignment of
20350 # parens whose containers do not contain other aligning tokens, with
20351 # the exception that we always keep alignment of the first opening
20352 # paren on a line (for things like 'if' and 'elsif' statements).
20354 # Setup needed constants
20355 my $i_good_paren = -1;
20356 my $imin_match = $iend + 1;
20357 my $i_elsif_close = $ibeg - 1;
20358 my $i_elsif_open = $iend + 1;
20359 if ( $iend > $ibeg ) {
20360 if ( $types_to_go[$ibeg] eq 'k' ) {
20362 # Paren patch: mark a location of a paren we should keep, such
20363 # as one following something like a leading 'if', 'elsif',..
20364 $i_good_paren = $ibeg + 1;
20365 if ( $types_to_go[$i_good_paren] eq 'b' ) {
20369 # 'elsif' patch: remember the range of the parens of an elsif,
20370 # and do not make alignments within them because this can cause
20371 # loss of padding and overall brace alignment in the vertical
20373 if ( $tokens_to_go[$ibeg] eq 'elsif'
20374 && $i_good_paren < $iend
20375 && $tokens_to_go[$i_good_paren] eq '(' )
20377 $i_elsif_open = $i_good_paren;
20378 $i_elsif_close = $mate_index_to_go[$i_good_paren];
20383 # Loop to make the fixes on this line
20385 for my $i ( $ibeg .. $iend ) {
20387 if ( $ralignment_type_to_go->[$i] ) {
20389 # Patch #2: undo alignment within elsif parens
20390 if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
20391 $ralignment_type_to_go->[$i] = '';
20394 push @imatch_list, $i;
20397 if ( $tokens_to_go[$i] eq ')' ) {
20399 # Patch #1: undo the corresponding opening paren if:
20400 # - it is at the top of the stack
20401 # - and not the first overall opening paren
20402 # - does not follow a leading keyword on this line
20403 my $imate = $mate_index_to_go[$i];
20405 && $imatch_list[-1] eq $imate
20406 && ( $ibeg > 1 || @imatch_list > 1 )
20407 && $imate > $i_good_paren )
20409 $ralignment_type_to_go->[$imate] = '';
20417 sub make_alignment_patterns {
20419 # Here we do some important preliminary work for the
20420 # vertical aligner. We create three arrays for one
20421 # output line. These arrays contain strings that can
20422 # be tested by the vertical aligner to see if
20423 # consecutive lines can be aligned vertically.
20425 # The three arrays are indexed on the vertical
20426 # alignment fields and are:
20427 # @tokens - a list of any vertical alignment tokens for this line.
20428 # These are tokens, such as '=' '&&' '#' etc which
20429 # we want to might align vertically. These are
20430 # decorated with various information such as
20431 # nesting depth to prevent unwanted vertical
20432 # alignment matches.
20433 # @fields - the actual text of the line between the vertical alignment
20435 # @patterns - a modified list of token types, one for each alignment
20436 # field. These should normally each match before alignment is
20437 # allowed, even when the alignment tokens match.
20438 my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
20442 my @field_lengths = ();
20443 my $i_start = $ibeg;
20445 # For a 'use' statement, use the module name as container name.
20446 # Fixes issue rt136416.
20448 if ( $types_to_go[$ibeg] eq 'k' && $tokens_to_go[$ibeg] eq 'use' ) {
20449 my $inext = $inext_to_go[$ibeg];
20450 if ( $inext <= $iend ) { $cname = $tokens_to_go[$inext] }
20454 my %container_name = ( 0 => "$cname" );
20456 my $j = 0; # field index
20460 for my $i ( $ibeg .. $iend ) {
20462 # Keep track of containers balanced on this line only.
20463 # These are used below to prevent unwanted cross-line alignments.
20464 # Unbalanced containers already avoid aligning across
20465 # container boundaries.
20467 my $type = $types_to_go[$i];
20468 my $token = $tokens_to_go[$i];
20469 my $depth_last = $depth;
20470 if ( $type_sequence_to_go[$i] ) {
20471 if ( $is_opening_type{$token} ) {
20473 # if container is balanced on this line...
20474 my $i_mate = $mate_index_to_go[$i];
20475 if ( $i_mate > $i && $i_mate <= $iend ) {
20478 # Append the previous token name to make the container name
20479 # more unique. This name will also be given to any commas
20480 # within this container, and it helps avoid undesirable
20481 # alignments of different types of containers.
20483 # Containers beginning with { and [ are given those names
20484 # for uniqueness. That way commas in different containers
20485 # will not match. Here is an example of what this prevents:
20486 # a => [ 1, 2, 3 ],
20487 # b => { b1 => 4, b2 => 5 },
20488 # Here is another example of what we avoid by labeling the
20491 # is_d( [ $a, $a ], [ $b, $c ] );
20492 # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
20493 # is_d( [ \$a, \$a ], [ \$b, \$c ] );
20496 if ( $token eq '(' ) {
20497 $name = $self->make_paren_name($i);
20499 $container_name{$depth} = "+" . $name;
20501 # Make the container name even more unique if necessary.
20502 # If we are not vertically aligning this opening paren,
20503 # append a character count to avoid bad alignment because
20504 # it usually looks bad to align commas within containers
20505 # for which the opening parens do not align. Here
20506 # is an example very BAD alignment of commas (because
20507 # the atan2 functions are not all aligned):
20509 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
20510 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
20511 # $X * atan2( $X, 1 ) -
20512 # $Y * atan2( $Y, 1 );
20514 # On the other hand, it is usually okay to align commas
20515 # if opening parens align, such as:
20516 # glVertex3d( $cx + $s * $xs, $cy, $z );
20517 # glVertex3d( $cx, $cy + $s * $ys, $z );
20518 # glVertex3d( $cx - $s * $xs, $cy, $z );
20519 # glVertex3d( $cx, $cy - $s * $ys, $z );
20521 # To distinguish between these situations, we will append
20522 # the length of the line from the previous matching
20523 # token, or beginning of line, to the function name.
20524 # This will allow the vertical aligner to reject
20525 # undesirable matches.
20527 # if we are not aligning on this paren...
20528 if ( !$ralignment_type_to_go->[$i] ) {
20530 # Sum length from previous alignment
20531 my $len = token_sequence_length( $i_start, $i - 1 );
20533 # Minor patch: do not include the length of any '!'.
20534 # Otherwise, commas in the following line will not
20536 # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
20537 # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
20538 if ( grep { $_ eq '!' }
20539 @types_to_go[ $i_start .. $i - 1 ] )
20544 if ( $i_start == $ibeg ) {
20546 # For first token, use distance from start of line
20547 # but subtract off the indentation due to level.
20548 # Otherwise, results could vary with indentation.
20550 leading_spaces_to_go($ibeg) -
20551 $levels_to_go[$i_start] *
20552 $rOpts_indent_columns;
20553 if ( $len < 0 ) { $len = 0 }
20556 # tack this length onto the container name to try
20557 # to make a unique token name
20558 $container_name{$depth} .= "-" . $len;
20562 elsif ( $is_closing_type{$token} ) {
20563 $depth-- if $depth > 0;
20567 # if we find a new synchronization token, we are done with
20569 if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
20571 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
20573 # map similar items
20574 my $tok_map = $operator_map{$tok};
20575 $tok = $tok_map if ($tok_map);
20577 # make separators in different nesting depths unique
20578 # by appending the nesting depth digit.
20579 if ( $raw_tok ne '#' ) {
20580 $tok .= "$nesting_depth_to_go[$i]";
20583 # also decorate commas with any container name to avoid
20584 # unwanted cross-line alignments.
20585 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
20587 # If we are at an opening token which increased depth, we have
20588 # to use the name from the previous depth.
20590 ( $depth_last < $depth ? $depth_last : $depth );
20591 if ( $container_name{$depth_p} ) {
20592 $tok .= $container_name{$depth_p};
20596 # Patch to avoid aligning leading and trailing if, unless.
20597 # Mark trailing if, unless statements with container names.
20598 # This makes them different from leading if, unless which
20599 # are not so marked at present. If we ever need to name
20600 # them too, we could use ci to distinguish them.
20601 # Example problem to avoid:
20602 # return ( 2, "DBERROR" )
20603 # if ( $retval == 2 );
20604 # if ( scalar @_ ) {
20605 # my ( $a, $b, $c, $d, $e, $f ) = @_;
20607 if ( $raw_tok eq '(' ) {
20608 if ( $ci_levels_to_go[$ibeg]
20609 && $container_name{$depth} =~ /^\+(if|unless)/ )
20611 $tok .= $container_name{$depth};
20615 # Decorate block braces with block types to avoid
20616 # unwanted alignments such as the following:
20617 # foreach ( @{$routput_array} ) { $fh->print($_) }
20618 # eval { $fh->close() };
20619 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
20620 my $block_type = $block_type_to_go[$i];
20622 # map certain related block types to allow
20623 # else blocks to align
20624 $block_type = $block_type_map{$block_type}
20625 if ( defined( $block_type_map{$block_type} ) );
20627 # remove sub names to allow one-line sub braces to align
20628 # regardless of name
20629 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
20631 # allow all control-type blocks to align
20632 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
20634 $tok .= $block_type;
20637 # Mark multiple copies of certain tokens with the copy number
20638 # This will allow the aligner to decide if they are matched.
20639 # For now, only do this for equals. For example, the two
20640 # equals on the next line will be labeled '=0' and '=0.2'.
20641 # Later, the '=0.2' will be ignored in alignment because it
20644 # $| = $debug = 1 if $opt_d;
20645 # $full_index = 1 if $opt_i;
20647 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
20648 $token_count{$tok}++;
20649 if ( $token_count{$tok} > 1 ) {
20650 $tok .= '.' . $token_count{$tok};
20654 # concatenate the text of the consecutive tokens to form
20657 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
20659 push @field_lengths,
20660 $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
20662 # store the alignment token for this field
20663 push( @tokens, $tok );
20665 # get ready for the next batch
20668 $patterns[$j] = "";
20671 # continue accumulating tokens
20673 # for keywords we have to use the actual text
20674 if ( $type eq 'k' ) {
20676 my $tok_fix = $tokens_to_go[$i];
20678 # but map certain keywords to a common string to allow
20680 $tok_fix = $keyword_map{$tok_fix}
20681 if ( defined( $keyword_map{$tok_fix} ) );
20682 $patterns[$j] .= $tok_fix;
20685 elsif ( $type eq 'b' ) {
20686 $patterns[$j] .= $type;
20689 # handle non-keywords..
20692 my $type_fix = $type;
20694 # Mark most things before arrows as a quote to
20695 # get them to line up. Testfile: mixed.pl.
20696 # $type =~ /^[wnC]$/
20697 if ( $i < $iend - 1 && $is_w_n_C{$type} ) {
20698 my $next_type = $types_to_go[ $i + 1 ];
20699 my $i_next_nonblank =
20700 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
20702 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
20705 # Patch to ignore leading minus before words,
20706 # by changing pattern 'mQ' into just 'Q',
20707 # so that we can align things like this:
20708 # Button => "Print letter \"~$_\"",
20709 # -command => [ sub { print "$_[0]\n" }, $_ ],
20710 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
20714 # Convert a bareword within braces into a quote for matching.
20715 # This will allow alignment of expressions like this:
20716 # local ( $SIG{'INT'} ) = IGNORE;
20717 # local ( $SIG{ALRM} ) = 'POSTMAN';
20721 && $types_to_go[ $i - 1 ] eq 'L'
20722 && $types_to_go[ $i + 1 ] eq 'R' )
20727 # patch to make numbers and quotes align
20728 if ( $type eq 'n' ) { $type_fix = 'Q' }
20730 # patch to ignore any ! in patterns
20731 if ( $type eq '!' ) { $type_fix = '' }
20733 $patterns[$j] .= $type_fix;
20735 # remove any zero-level name at first fat comma
20736 if ( $depth == 0 && $type eq '=>' ) {
20737 $container_name{$depth} = "";
20743 # done with this line .. join text of tokens to make the last field
20744 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
20745 push @field_lengths,
20746 $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
20748 return ( \@tokens, \@fields, \@patterns, \@field_lengths );
20751 } ## end closure make_alignment_patterns
20753 sub make_paren_name {
20754 my ( $self, $i ) = @_;
20756 # The token at index $i is a '('.
20757 # Create an alignment name for it to avoid incorrect alignments.
20759 # Start with the name of the previous nonblank token...
20762 return "" if ( $im < 0 );
20763 if ( $types_to_go[$im] eq 'b' ) { $im--; }
20764 return "" if ( $im < 0 );
20765 $name = $tokens_to_go[$im];
20767 # Prepend any sub name to an isolated -> to avoid unwanted alignments
20768 # [test case is test8/penco.pl]
20769 if ( $name eq '->' ) {
20771 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
20772 $name = $tokens_to_go[$im] . $name;
20776 # Finally, remove any leading arrows
20777 if ( substr( $name, 0, 2 ) eq '->' ) {
20778 $name = substr( $name, 2 );
20783 { ## begin closure set_adjusted_indentation
20785 my ( $last_indentation_written, $last_unadjusted_indentation,
20786 $last_leading_token );
20788 sub initialize_adjusted_indentation {
20789 $last_indentation_written = 0;
20790 $last_unadjusted_indentation = 0;
20791 $last_leading_token = "";
20795 sub set_adjusted_indentation {
20797 # This routine has the final say regarding the actual indentation of
20798 # a line. It starts with the basic indentation which has been
20799 # defined for the leading token, and then takes into account any
20800 # options that the user has set regarding special indenting and
20803 # This routine has to resolve a number of complex interacting issues,
20805 # 1. The various -cti=n type flags, which contain the desired change in
20806 # indentation for lines ending in commas and semicolons, should be
20808 # 2. qw quotes require special processing and do not fit perfectly
20809 # with normal containers,
20810 # 3. formatting with -wn can complicate things, especially with qw
20812 # 4. formatting with the -lp option is complicated, and does not
20813 # work well with qw quotes and with -wn formatting.
20814 # 5. a number of special situations, such as 'cuddled' formatting.
20815 # 6. This routine is mainly concerned with outdenting closing tokens
20816 # but note that there is some overlap with the functions of sub
20817 # undo_ci, which was processed earlier, so care has to be taken to
20818 # keep them coordinated.
20823 $rpatterns, $ri_first,
20824 $ri_last, $rindentation_list,
20825 $level_jump, $starting_in_quote,
20826 $is_static_block_comment,
20829 my $rLL = $self->[_rLL_];
20830 my $ris_bli_container = $self->[_ris_bli_container_];
20831 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
20832 my $rwant_reduced_ci = $self->[_rwant_reduced_ci_];
20833 my $rK_weld_left = $self->[_rK_weld_left_];
20835 # we need to know the last token of this line
20836 my ( $terminal_type, $i_terminal ) = terminal_type_i( $ibeg, $iend );
20838 my $terminal_block_type = $block_type_to_go[$i_terminal];
20839 my $is_outdented_line = 0;
20841 my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
20843 my $type_beg = $types_to_go[$ibeg];
20844 my $token_beg = $tokens_to_go[$ibeg];
20845 my $K_beg = $K_to_go[$ibeg];
20846 my $ibeg_weld_fix = $ibeg;
20847 my $seqno_beg = $type_sequence_to_go[$ibeg];
20848 my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
20850 # QW INDENTATION PATCH 3:
20851 my $seqno_qw_closing;
20852 if ( $type_beg eq 'q' && $ibeg == 0 ) {
20853 my $KK = $K_to_go[$ibeg];
20854 $seqno_qw_closing =
20855 $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
20858 my $is_semicolon_terminated = $terminal_type eq ';'
20859 && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
20860 || $seqno_qw_closing );
20862 # NOTE: A future improvement would be to make it semicolon terminated
20863 # even if it does not have a semicolon but is followed by a closing
20864 # block brace. This would undo ci even for something like the
20865 # following, in which the final paren does not have a semicolon because
20866 # it is a possible weld location:
20868 # if ($BOLD_MATH) {
20870 # $labels, $comment,
20871 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
20876 # MOJO: Set a flag if this lines begins with ')->'
20877 my $leading_paren_arrow = (
20878 $types_to_go[$ibeg] eq '}'
20879 && $tokens_to_go[$ibeg] eq ')'
20881 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
20882 || ( $ibeg < $i_terminal - 1
20883 && $types_to_go[ $ibeg + 1 ] eq 'b'
20884 && $types_to_go[ $ibeg + 2 ] eq '->' )
20888 ##########################################################
20889 # Section 1: set a flag and a default indentation
20891 # Most lines are indented according to the initial token.
20892 # But it is common to outdent to the level just after the
20893 # terminal token in certain cases...
20894 # adjust_indentation flag:
20895 # 0 - do not adjust
20897 # 2 - vertically align with opening token
20899 ##########################################################
20900 my $adjust_indentation = 0;
20901 my $default_adjust_indentation = $adjust_indentation;
20904 $opening_indentation, $opening_offset,
20905 $is_leading, $opening_exists
20908 # Honor any flag to reduce -ci set by the -bbxi=n option
20909 if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
20911 # if this is an opening, it must be alone on the line ...
20912 if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
20913 $adjust_indentation = 1;
20916 # ... or a single welded unit (fix for b1173)
20917 elsif ($total_weld_count) {
20918 my $Kterm = $K_to_go[$i_terminal];
20919 my $Kterm_test = $rK_weld_left->{$Kterm};
20920 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
20921 $Kterm = $Kterm_test;
20923 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
20927 # Update the $is_bli flag as we go. It is initially 1.
20928 # We note seeing a leading opening brace by setting it to 2.
20929 # If we get to the closing brace without seeing the opening then we
20930 # turn it off. This occurs if the opening brace did not get output
20931 # at the start of a line, so we will then indent the closing brace
20932 # in the default way.
20933 if ( $is_bli_beg && $is_bli_beg == 1 ) {
20934 my $K_opening_container = $self->[_K_opening_container_];
20935 my $K_opening = $K_opening_container->{$seqno_beg};
20936 if ( $K_beg eq $K_opening ) {
20937 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
20939 else { $is_bli_beg = 0 }
20942 # QW PATCH for the combination -lp -wn
20943 # For -lp formatting use $ibeg_weld_fix to get around the problem
20944 # that with -lp type formatting the opening and closing tokens to not
20945 # have sequence numbers.
20946 if ( $seqno_qw_closing && $total_weld_count ) {
20947 my $K_next_nonblank = $self->K_next_code($K_beg);
20948 if ( defined($K_next_nonblank)
20949 && defined( $rK_weld_left->{$K_next_nonblank} ) )
20951 my $itest = $ibeg + ( $K_next_nonblank - $K_beg );
20952 if ( $itest <= $max_index_to_go ) {
20953 $ibeg_weld_fix = $itest;
20958 # if we are at a closing token of some type..
20959 if ( $is_closing_type{$type_beg} || $seqno_qw_closing ) {
20961 # get the indentation of the line containing the corresponding
20964 $opening_indentation, $opening_offset,
20965 $is_leading, $opening_exists
20967 = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
20968 $ri_last, $rindentation_list, $seqno_qw_closing );
20970 # First set the default behavior:
20973 # default behavior is to outdent closing lines
20974 # of the form: "); }; ]; )->xxx;"
20975 $is_semicolon_terminated
20977 # and 'cuddled parens' of the form: ")->pack("
20978 # Bug fix for RT #123749]: the types here were
20979 # incorrectly '(' and ')'. Corrected to be '{' and '}'
20981 $terminal_type eq '{'
20982 && $type_beg eq '}'
20983 && ( $nesting_depth_to_go[$iend] + 1 ==
20984 $nesting_depth_to_go[$ibeg] )
20987 # remove continuation indentation for any line like
20989 # or without ending '{' and unbalanced, such as
20990 # such as '}->{$operator}'
20994 && ( $types_to_go[$iend] eq '{'
20995 || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
20998 # and when the next line is at a lower indentation level...
21000 # PATCH #1: and only if the style allows undoing continuation
21001 # for all closing token types. We should really wait until
21002 # the indentation of the next line is known and then make
21003 # a decision, but that would require another pass.
21005 # PATCH #2: and not if this token is under -xci control
21006 || ( $level_jump < 0
21007 && !$some_closing_token_indentation
21008 && !$rseqno_controlling_my_ci->{$K_beg} )
21010 # Patch for -wn=2, multiple welded closing tokens
21011 || ( $i_terminal > $ibeg
21012 && $is_closing_type{ $types_to_go[$iend] } )
21014 # Alternate Patch for git #51, isolated closing qw token not
21015 # outdented if no-delete-old-newlines is set. This works, but
21016 # a more general patch elsewhere fixes the real problem: ljump.
21017 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
21021 $adjust_indentation = 1;
21024 # outdent something like '),'
21026 $terminal_type eq ','
21028 # Removed this constraint for -wn
21029 # OLD: allow just one character before the comma
21030 # && $i_terminal == $ibeg + 1
21032 # require LIST environment; otherwise, we may outdent too much -
21033 # this can happen in calls without parentheses (overload.t);
21034 && $terminal_is_in_list
21037 $adjust_indentation = 1;
21040 # undo continuation indentation of a terminal closing token if
21041 # it is the last token before a level decrease. This will allow
21042 # a closing token to line up with its opening counterpart, and
21043 # avoids an indentation jump larger than 1 level.
21044 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
21045 && $i_terminal == $ibeg
21046 && defined($K_beg) )
21048 my $K_next_nonblank = $self->K_next_code($K_beg);
21050 if ( !$is_bli_beg && defined($K_next_nonblank) ) {
21051 my $lev = $rLL->[$K_beg]->[_LEVEL_];
21052 my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
21054 # and do not undo ci if it was set by the -xci option
21055 $adjust_indentation = 1
21056 if ( $level_next < $lev
21057 && !$rseqno_controlling_my_ci->{$K_beg} );
21060 # Patch for RT #96101, in which closing brace of anonymous subs
21061 # was not outdented. We should look ahead and see if there is
21062 # a level decrease at the next token (i.e., a closing token),
21063 # but right now we do not have that information. For now
21064 # we see if we are in a list, and this works well.
21065 # See test files 'sub*.t' for good test cases.
21066 if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
21067 && $terminal_is_in_list
21068 && !$rOpts->{'indent-closing-brace'} )
21071 $opening_indentation, $opening_offset,
21072 $is_leading, $opening_exists
21074 = $self->get_opening_indentation( $ibeg, $ri_first,
21075 $ri_last, $rindentation_list );
21076 my $indentation = $leading_spaces_to_go[$ibeg];
21077 if ( defined($opening_indentation)
21078 && get_spaces($indentation) >
21079 get_spaces($opening_indentation) )
21081 $adjust_indentation = 1;
21086 # YVES patch 1 of 2:
21087 # Undo ci of line with leading closing eval brace,
21088 # but not beyond the indention of the line with
21089 # the opening brace.
21090 if ( $block_type_to_go[$ibeg] eq 'eval'
21091 && !$rOpts->{'line-up-parentheses'}
21092 && !$rOpts->{'indent-closing-brace'} )
21095 $opening_indentation, $opening_offset,
21096 $is_leading, $opening_exists
21098 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
21099 $rindentation_list );
21100 my $indentation = $leading_spaces_to_go[$ibeg];
21101 if ( defined($opening_indentation)
21102 && get_spaces($indentation) >
21103 get_spaces($opening_indentation) )
21105 $adjust_indentation = 1;
21109 # patch for issue git #40: -bli setting has priority
21110 $adjust_indentation = 0 if ($is_bli_beg);
21112 $default_adjust_indentation = $adjust_indentation;
21114 # Now modify default behavior according to user request:
21115 # handle option to indent non-blocks of the form ); }; ];
21116 # But don't do special indentation to something like ')->pack('
21117 if ( !$block_type_to_go[$ibeg] ) {
21119 # Note that logical padding has already been applied, so we may
21120 # need to remove some spaces to get a valid hash key.
21121 my $tok = $tokens_to_go[$ibeg];
21122 my $cti = $closing_token_indentation{$tok};
21124 # Fix the value of 'cti' for an isloated non-welded closing qw
21126 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
21128 # A quote delimiter which is not a container will not have
21129 # a cti value defined. In this case use the style of a
21130 # paren. For example
21138 if ( !defined($cti) && length($tok) == 1 ) {
21140 # something other than ')', '}', ']' ; use flag for ')'
21141 $cti = $closing_token_indentation{')'};
21143 # But for now, do not outdent non-container qw
21144 # delimiters because it would would change existing
21146 if ( $tok ne '>' ) { $cti = 3 }
21149 # A non-welded closing qw cannot currently use -cti=1
21150 # because that option requires a sequence number to find
21151 # the opening indentation, and qw quote delimiters are not
21153 if ( defined($cti) && $cti == 1 ) { $cti = 0 }
21156 if ( !defined($cti) ) {
21158 # $cti may not be defined for several reasons.
21159 # -padding may have been applied so the character
21161 # - we may have welded to a closing quote token.
21162 # Here is an example (perltidy -wn):
21163 # __PACKAGE__->load_components( qw(
21167 $adjust_indentation = 0;
21170 elsif ( $cti == 1 ) {
21171 if ( $i_terminal <= $ibeg + 1
21172 || $is_semicolon_terminated )
21174 $adjust_indentation = 2;
21177 $adjust_indentation = 0;
21180 elsif ( $cti == 2 ) {
21181 if ($is_semicolon_terminated) {
21182 $adjust_indentation = 3;
21185 $adjust_indentation = 0;
21188 elsif ( $cti == 3 ) {
21189 $adjust_indentation = 3;
21193 # handle option to indent blocks
21196 $rOpts->{'indent-closing-brace'}
21198 $i_terminal == $ibeg # isolated terminal '}'
21199 || $is_semicolon_terminated
21203 $adjust_indentation = 3;
21208 # if at ');', '};', '>;', and '];' of a terminal qw quote
21209 elsif ($rpatterns->[0] =~ /^qb*;$/
21210 && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
21212 if ( $closing_token_indentation{$1} == 0 ) {
21213 $adjust_indentation = 1;
21216 $adjust_indentation = 3;
21220 # if line begins with a ':', align it with any
21221 # previous line leading with corresponding ?
21222 elsif ( $types_to_go[$ibeg] eq ':' ) {
21224 $opening_indentation, $opening_offset,
21225 $is_leading, $opening_exists
21227 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
21228 $rindentation_list );
21229 if ($is_leading) { $adjust_indentation = 2; }
21232 ##########################################################
21233 # Section 2: set indentation according to flag set above
21235 # Select the indentation object to define leading
21236 # whitespace. If we are outdenting something like '} } );'
21237 # then we want to use one level below the last token
21238 # ($i_terminal) in order to get it to fully outdent through
21240 ##########################################################
21243 my $level_end = $levels_to_go[$iend];
21245 if ( $adjust_indentation == 0 ) {
21246 $indentation = $leading_spaces_to_go[$ibeg];
21247 $lev = $levels_to_go[$ibeg];
21249 elsif ( $adjust_indentation == 1 ) {
21251 # Change the indentation to be that of a different token on the line
21252 # Previously, the indentation of the terminal token was used:
21254 # $indentation = $reduced_spaces_to_go[$i_terminal];
21255 # $lev = $levels_to_go[$i_terminal];
21257 # Generalization for MOJO:
21258 # Use the lowest level indentation of the tokens on the line.
21259 # For example, here we can use the indentation of the ending ';':
21260 # } until ($selection > 0 and $selection < 10); # ok to use ';'
21261 # But this will not outdent if we use the terminal indentation:
21262 # )->then( sub { # use indentation of the ->, not the {
21263 # Warning: reduced_spaces_to_go[] may be a reference, do not
21264 # do numerical checks with it
21267 $indentation = $reduced_spaces_to_go[$i_ind];
21268 $lev = $levels_to_go[$i_ind];
21269 while ( $i_ind < $i_terminal ) {
21271 if ( $levels_to_go[$i_ind] < $lev ) {
21272 $indentation = $reduced_spaces_to_go[$i_ind];
21273 $lev = $levels_to_go[$i_ind];
21278 # handle indented closing token which aligns with opening token
21279 elsif ( $adjust_indentation == 2 ) {
21281 # handle option to align closing token with opening token
21282 $lev = $levels_to_go[$ibeg];
21284 # calculate spaces needed to align with opening token
21286 get_spaces($opening_indentation) + $opening_offset;
21288 # Indent less than the previous line.
21290 # Problem: For -lp we don't exactly know what it was if there
21291 # were recoverable spaces sent to the aligner. A good solution
21292 # would be to force a flush of the vertical alignment buffer, so
21293 # that we would know. For now, this rule is used for -lp:
21295 # When the last line did not start with a closing token we will
21296 # be optimistic that the aligner will recover everything wanted.
21298 # This rule will prevent us from breaking a hierarchy of closing
21299 # tokens, and in a worst case will leave a closing paren too far
21300 # indented, but this is better than frequently leaving it not
21302 my $last_spaces = get_spaces($last_indentation_written);
21303 if ( !$is_closing_token{$last_leading_token} ) {
21305 get_recoverable_spaces($last_indentation_written);
21308 # reset the indentation to the new space count if it works
21309 # only options are all or none: nothing in-between looks good
21310 $lev = $levels_to_go[$ibeg];
21311 if ( $space_count < $last_spaces ) {
21312 if ($rOpts_line_up_parentheses) {
21313 my $lev = $levels_to_go[$ibeg];
21315 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
21318 $indentation = $space_count;
21322 # revert to default if it doesn't work
21324 $space_count = leading_spaces_to_go($ibeg);
21325 if ( $default_adjust_indentation == 0 ) {
21326 $indentation = $leading_spaces_to_go[$ibeg];
21328 elsif ( $default_adjust_indentation == 1 ) {
21329 $indentation = $reduced_spaces_to_go[$i_terminal];
21330 $lev = $levels_to_go[$i_terminal];
21335 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
21338 # handle -icb (indented closing code block braces)
21339 # Updated method for indented block braces: indent one full level if
21340 # there is no continuation indentation. This will occur for major
21341 # structures such as sub, if, else, but not for things like map
21344 # Note: only code blocks without continuation indentation are
21345 # handled here (if, else, unless, ..). In the following snippet,
21346 # the terminal brace of the sort block will have continuation
21347 # indentation as shown so it will not be handled by the coding
21348 # here. We would have to undo the continuation indentation to do
21349 # this, but it probably looks ok as is. This is a possible future
21350 # update for semicolon terminated lines.
21352 # if ($sortby eq 'date' or $sortby eq 'size') {
21354 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
21359 if ( $block_type_to_go[$ibeg]
21360 && $ci_levels_to_go[$i_terminal] == 0 )
21362 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
21363 $indentation = $spaces + $rOpts_indent_columns;
21365 # NOTE: for -lp we could create a new indentation object, but
21366 # there is probably no need to do it
21369 # handle -icp and any -icb block braces which fall through above
21370 # test such as the 'sort' block mentioned above.
21373 # There are currently two ways to handle -icp...
21374 # One way is to use the indentation of the previous line:
21375 # $indentation = $last_indentation_written;
21377 # The other way is to use the indentation that the previous line
21378 # would have had if it hadn't been adjusted:
21379 $indentation = $last_unadjusted_indentation;
21381 # Current method: use the minimum of the two. This avoids
21382 # inconsistent indentation.
21383 if ( get_spaces($last_indentation_written) <
21384 get_spaces($indentation) )
21386 $indentation = $last_indentation_written;
21390 # use previous indentation but use own level
21391 # to cause list to be flushed properly
21392 $lev = $levels_to_go[$ibeg];
21395 # remember indentation except for multi-line quotes, which get
21397 unless ( $ibeg == 0 && $starting_in_quote ) {
21398 $last_indentation_written = $indentation;
21399 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
21400 $last_leading_token = $tokens_to_go[$ibeg];
21402 # Patch to make a line which is the end of a qw quote work with the
21403 # -lp option. Make $token_beg look like a closing token as some
21404 # type even if it is not. This veriable will become
21405 # $last_leading_token at the end of this loop. Then, if the -lp
21406 # style is selected, and the next line is also a
21407 # closing token, it will not get more indentation than this line.
21408 # We need to do this because qw quotes (at present) only get
21409 # continuation indentation, not one level of indentation, so we
21410 # need to turn off the -lp indentation.
21412 # ... a picture is worth a thousand words:
21414 # perltidy -wn -gnu (Without this patch):
21416 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
21420 # perltidy -wn -gnu (With this patch):
21422 # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
21425 ## if ($seqno_qw_closing) { $last_leading_token = ')' }
21426 if ( $seqno_qw_closing
21427 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
21429 $last_leading_token = ')';
21433 # be sure lines with leading closing tokens are not outdented more
21434 # than the line which contained the corresponding opening token.
21436 #############################################################
21437 # updated per bug report in alex_bug.pl: we must not
21438 # mess with the indentation of closing logical braces so
21439 # we must treat something like '} else {' as if it were
21440 # an isolated brace
21441 #############################################################
21442 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
21444 $i_terminal == $ibeg
21445 || $is_if_elsif_else_unless_while_until_for_foreach{
21446 $block_type_to_go[$ibeg]
21450 # only do this for a ':; which is aligned with its leading '?'
21451 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
21454 defined($opening_indentation)
21455 && !$leading_paren_arrow # MOJO
21456 && !$is_isolated_block_brace
21457 && !$is_unaligned_colon
21460 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
21461 $indentation = $opening_indentation;
21465 # remember the indentation of each line of this batch
21466 push @{$rindentation_list}, $indentation;
21468 # outdent lines with certain leading tokens...
21471 # must be first word of this batch
21477 # certain leading keywords if requested
21479 $rOpts->{'outdent-keywords'}
21480 && $types_to_go[$ibeg] eq 'k'
21481 && $outdent_keyword{ $tokens_to_go[$ibeg] }
21484 # or labels if requested
21485 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
21487 # or static block comments if requested
21488 || ( $types_to_go[$ibeg] eq '#'
21489 && $rOpts->{'outdent-static-block-comments'}
21490 && $is_static_block_comment )
21495 my $space_count = leading_spaces_to_go($ibeg);
21496 if ( $space_count > 0 ) {
21497 $space_count -= $rOpts_continuation_indentation;
21498 $is_outdented_line = 1;
21499 if ( $space_count < 0 ) { $space_count = 0 }
21501 # do not promote a spaced static block comment to non-spaced;
21502 # this is not normally necessary but could be for some
21503 # unusual user inputs (such as -ci = -i)
21504 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
21508 if ($rOpts_line_up_parentheses) {
21510 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
21513 $indentation = $space_count;
21518 return ( $indentation, $lev, $level_end, $terminal_type,
21519 $terminal_block_type, $is_semicolon_terminated,
21520 $is_outdented_line );
21522 } ## end closure set_adjusted_indentation
21524 sub get_opening_indentation {
21526 # get the indentation of the line which output the opening token
21527 # corresponding to a given closing token in the current output batch.
21530 # $i_closing - index in this line of a closing token ')' '}' or ']'
21532 # $ri_first - reference to list of the first index $i for each output
21533 # line in this batch
21534 # $ri_last - reference to list of the last index $i for each output line
21536 # $rindentation_list - reference to a list containing the indentation
21537 # used for each line.
21538 # $qw_seqno - optional sequence number to use if normal seqno not defined
21539 # (TODO: would be more general to just look this up from index i)
21542 # -the indentation of the line which contained the opening token
21543 # which matches the token at index $i_opening
21544 # -and its offset (number of columns) from the start of the line
21546 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
21549 # first, see if the opening token is in the current batch
21550 my $i_opening = $mate_index_to_go[$i_closing];
21551 my ( $indent, $offset, $is_leading, $exists );
21553 if ( defined($i_opening) && $i_opening >= 0 ) {
21555 # it is..look up the indentation
21556 ( $indent, $offset, $is_leading ) =
21557 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
21558 $rindentation_list );
21561 # if not, it should have been stored in the hash by a previous batch
21563 my $seqno = $type_sequence_to_go[$i_closing];
21564 $seqno = $qw_seqno unless ($seqno);
21565 ( $indent, $offset, $is_leading, $exists ) =
21566 get_saved_opening_indentation($seqno);
21568 return ( $indent, $offset, $is_leading, $exists );
21571 sub set_vertical_tightness_flags {
21573 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
21574 $ending_in_quote, $closing_side_comment )
21577 # Define vertical tightness controls for the nth line of a batch.
21578 # We create an array of parameters which tell the vertical aligner
21579 # if we should combine this line with the next line to achieve the
21580 # desired vertical tightness. The array of parameters contains:
21582 # [0] type: 1=opening non-block 2=closing non-block
21583 # 3=opening block brace 4=closing block brace
21585 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
21586 # if closing: spaces of padding to use
21587 # [2] sequence number of container
21588 # [3] valid flag: do not append if this flag is false. Will be
21589 # true if appropriate -vt flag is set. Otherwise, Will be
21590 # made true only for 2 line container in parens with -lp
21592 # These flags are used by sub set_leading_whitespace in
21593 # the vertical aligner
21595 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
21597 # The vertical tightness mechanism can add whitespace, so whitespace can
21598 # continually increase if we allowed it when the -fws flag is set.
21599 # See case b499 for an example.
21600 return $rvertical_tightness_flags if ($rOpts_freeze_whitespace);
21602 # Uses these parameters:
21603 # $rOpts_block_brace_tightness
21604 # $rOpts_block_brace_vertical_tightness
21605 # $rOpts_stack_closing_block_brace
21606 # %opening_vertical_tightness
21607 # %closing_vertical_tightness
21608 # %opening_token_right
21609 # %stack_closing_token
21610 # %stack_opening_token
21612 #--------------------------------------------------------------
21613 # Vertical Tightness Flags Section 1:
21614 # Handle Lines 1 .. n-1 but not the last line
21615 # For non-BLOCK tokens, we will need to examine the next line
21616 # too, so we won't consider the last line.
21617 #--------------------------------------------------------------
21618 if ( $n < $n_last_line ) {
21620 #--------------------------------------------------------------
21621 # Vertical Tightness Flags Section 1a:
21622 # Look for Type 1, last token of this line is a non-block opening token
21623 #--------------------------------------------------------------
21624 my $ibeg_next = $ri_first->[ $n + 1 ];
21625 my $token_end = $tokens_to_go[$iend];
21626 my $iend_next = $ri_last->[ $n + 1 ];
21628 $type_sequence_to_go[$iend]
21629 && !$block_type_to_go[$iend]
21630 && $is_opening_token{$token_end}
21632 $opening_vertical_tightness{$token_end} > 0
21634 # allow 2-line method call to be closed up
21635 || ( $rOpts_line_up_parentheses
21636 && $token_end eq '('
21638 && $types_to_go[ $iend - 1 ] ne 'b' )
21643 # avoid multiple jumps in nesting depth in one line if
21645 my $ovt = $opening_vertical_tightness{$token_end};
21646 my $iend_next = $ri_last->[ $n + 1 ];
21649 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
21650 $nesting_depth_to_go[$ibeg_next] )
21654 # If -vt flag has not been set, mark this as invalid
21655 # and aligner will validate it if it sees the closing paren
21657 my $valid_flag = $ovt;
21658 @{$rvertical_tightness_flags} =
21659 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
21663 #--------------------------------------------------------------
21664 # Vertical Tightness Flags Section 1b:
21665 # Look for Type 2, first token of next line is a non-block closing
21666 # token .. and be sure this line does not have a side comment
21667 #--------------------------------------------------------------
21668 my $token_next = $tokens_to_go[$ibeg_next];
21669 if ( $type_sequence_to_go[$ibeg_next]
21670 && !$block_type_to_go[$ibeg_next]
21671 && $is_closing_token{$token_next}
21672 && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
21674 my $ovt = $opening_vertical_tightness{$token_next};
21675 my $cvt = $closing_vertical_tightness{$token_next};
21677 # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
21678 # otherwise. Added for rt136417.
21680 my $seqno = $type_sequence_to_go[$ibeg_next];
21681 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
21686 # Never append a trailing line like ')->pack(' because it
21687 # will throw off later alignment. So this line must start at a
21688 # deeper level than the next line (fix1 for welding, git #45).
21690 $nesting_depth_to_go[$ibeg_next] >=
21691 $nesting_depth_to_go[ $iend_next + 1 ] + 1
21696 !$self->is_in_list_by_i($ibeg_next)
21700 # allow closing up 2-line method calls
21701 || ( $rOpts_line_up_parentheses
21702 && $token_next eq ')' )
21709 # decide which trailing closing tokens to append..
21711 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
21713 my $str = join( '',
21714 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
21716 # append closing token if followed by comment or ';'
21717 # or another closing token (fix2 for welding, git #45)
21718 if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
21722 my $valid_flag = $cvt;
21723 @{$rvertical_tightness_flags} = (
21725 $tightness{$token_next} == 2 ? 0 : 1,
21726 $type_sequence_to_go[$ibeg_next], $valid_flag,
21732 #--------------------------------------------------------------
21733 # Vertical Tightness Flags Section 1c:
21734 # Implement the Opening Token Right flag (Type 2)..
21735 # If requested, move an isolated trailing opening token to the end of
21736 # the previous line which ended in a comma. We could do this
21737 # in sub recombine_breakpoints but that would cause problems
21738 # with -lp formatting. The problem is that indentation will
21739 # quickly move far to the right in nested expressions. By
21740 # doing it after indentation has been set, we avoid changes
21741 # to the indentation. Actual movement of the token takes place
21742 # in sub valign_output_step_B.
21744 # Note added 4 May 2021: the man page suggests that the -otr flags
21745 # are mainly for opening tokens following commas. But this seems
21746 # to have been generalized long ago to include other situations.
21747 # I checked the coding back to 2012 and it is essentially the same
21748 # as here, so it is best to leave this unchanged for now.
21749 #--------------------------------------------------------------
21751 $opening_token_right{ $tokens_to_go[$ibeg_next] }
21753 # previous line is not opening
21754 # (use -sot to combine with it)
21755 && !$is_opening_token{$token_end}
21757 # previous line ended in one of these
21758 # (add other cases if necessary; '=>' and '.' are not necessary
21759 && !$block_type_to_go[$ibeg_next]
21761 # this is a line with just an opening token
21762 && ( $iend_next == $ibeg_next
21763 || $iend_next == $ibeg_next + 2
21764 && $types_to_go[$iend_next] eq '#' )
21766 # Fix for case b1060 when both -baoo and -otr are set:
21767 # to avoid blinking, honor the -baoo flag over the -otr flag.
21768 && $token_end ne '||' && $token_end ne '&&'
21770 # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
21771 && !( $token_end eq '=' && $rOpts_line_up_parentheses )
21773 # looks bad if we align vertically with the wrong container
21774 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
21777 my $valid_flag = 1;
21778 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
21779 @{$rvertical_tightness_flags} =
21780 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
21783 #--------------------------------------------------------------
21784 # Vertical Tightness Flags Section 1d:
21785 # Stacking of opening and closing tokens (Type 2)
21786 #--------------------------------------------------------------
21788 my $token_beg_next = $tokens_to_go[$ibeg_next];
21790 # patch to make something like 'qw(' behave like an opening paren
21792 if ( $types_to_go[$ibeg_next] eq 'q' ) {
21793 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
21794 $token_beg_next = $1;
21798 if ( $is_closing_token{$token_end}
21799 && $is_closing_token{$token_beg_next} )
21801 $stackable = $stack_closing_token{$token_beg_next}
21802 unless ( $block_type_to_go[$ibeg_next] )
21803 ; # shouldn't happen; just checking
21805 elsif ($is_opening_token{$token_end}
21806 && $is_opening_token{$token_beg_next} )
21808 $stackable = $stack_opening_token{$token_beg_next}
21809 unless ( $block_type_to_go[$ibeg_next] )
21810 ; # shouldn't happen; just checking
21815 my $is_semicolon_terminated;
21816 if ( $n + 1 == $n_last_line ) {
21817 my ( $terminal_type, $i_terminal ) =
21818 terminal_type_i( $ibeg_next, $iend_next );
21819 $is_semicolon_terminated = $terminal_type eq ';'
21820 && $nesting_depth_to_go[$iend_next] <
21821 $nesting_depth_to_go[$ibeg_next];
21824 # this must be a line with just an opening token
21825 # or end in a semicolon
21827 $is_semicolon_terminated
21828 || ( $iend_next == $ibeg_next
21829 || $iend_next == $ibeg_next + 2
21830 && $types_to_go[$iend_next] eq '#' )
21833 my $valid_flag = 1;
21834 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
21835 @{$rvertical_tightness_flags} = (
21836 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
21842 #--------------------------------------------------------------
21843 # Vertical Tightness Flags Section 2:
21844 # Handle type 3, opening block braces on last line of the batch
21845 # Check for a last line with isolated opening BLOCK curly
21846 #--------------------------------------------------------------
21847 elsif ($rOpts_block_brace_vertical_tightness
21849 && $types_to_go[$iend] eq '{'
21850 && $block_type_to_go[$iend] =~
21851 /$block_brace_vertical_tightness_pattern/ )
21853 @{$rvertical_tightness_flags} =
21854 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
21857 #--------------------------------------------------------------
21858 # Vertical Tightness Flags Section 3:
21859 # Handle type 4, a closing block brace on the last line of the batch Check
21860 # for a last line with isolated closing BLOCK curly
21861 # Patch: added a check for any new closing side comment which the
21862 # -csc option may generate. If it exists, there will be a side comment
21863 # so we cannot combine with a brace on the next line. This issue
21864 # occurs for the combination -scbb and -csc is used.
21865 #--------------------------------------------------------------
21866 elsif ($rOpts_stack_closing_block_brace
21868 && $block_type_to_go[$iend]
21869 && $types_to_go[$iend] eq '}'
21870 && ( !$closing_side_comment || $n < $n_last_line ) )
21872 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
21873 @{$rvertical_tightness_flags} =
21874 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
21877 # pack in the sequence numbers of the ends of this line
21878 my $seqno_beg = $type_sequence_to_go[$ibeg];
21879 if ( !$seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
21880 $seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
21882 my $seqno_end = $type_sequence_to_go[$iend];
21883 if ( !$seqno_end && $types_to_go[$iend] eq 'q' ) {
21884 $seqno_end = $self->get_seqno( $iend, $ending_in_quote );
21886 $rvertical_tightness_flags->[4] = $seqno_beg;
21887 $rvertical_tightness_flags->[5] = $seqno_end;
21888 return $rvertical_tightness_flags;
21891 ##########################################################
21892 # CODE SECTION 14: Code for creating closing side comments
21893 ##########################################################
21895 { ## begin closure accumulate_csc_text
21897 # These routines are called once per batch when the --closing-side-comments flag
21900 my %block_leading_text;
21901 my %block_opening_line_number;
21902 my $csc_new_statement_ok;
21903 my $csc_last_label;
21904 my %csc_block_label;
21905 my $accumulating_text_for_block;
21906 my $leading_block_text;
21907 my $rleading_block_if_elsif_text;
21908 my $leading_block_text_level;
21909 my $leading_block_text_length_exceeded;
21910 my $leading_block_text_line_length;
21911 my $leading_block_text_line_number;
21913 sub initialize_csc_vars {
21914 %block_leading_text = ();
21915 %block_opening_line_number = ();
21916 $csc_new_statement_ok = 1;
21917 $csc_last_label = "";
21918 %csc_block_label = ();
21919 $rleading_block_if_elsif_text = [];
21920 $accumulating_text_for_block = "";
21921 reset_block_text_accumulator();
21925 sub reset_block_text_accumulator {
21927 # save text after 'if' and 'elsif' to append after 'else'
21928 if ($accumulating_text_for_block) {
21930 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
21931 push @{$rleading_block_if_elsif_text}, $leading_block_text;
21934 $accumulating_text_for_block = "";
21935 $leading_block_text = "";
21936 $leading_block_text_level = 0;
21937 $leading_block_text_length_exceeded = 0;
21938 $leading_block_text_line_number = 0;
21939 $leading_block_text_line_length = 0;
21943 sub set_block_text_accumulator {
21944 my ( $self, $i ) = @_;
21945 $accumulating_text_for_block = $tokens_to_go[$i];
21946 if ( $accumulating_text_for_block !~ /^els/ ) {
21947 $rleading_block_if_elsif_text = [];
21949 $leading_block_text = "";
21950 $leading_block_text_level = $levels_to_go[$i];
21951 $leading_block_text_line_number = $self->get_output_line_number();
21952 $leading_block_text_length_exceeded = 0;
21954 # this will contain the column number of the last character
21955 # of the closing side comment
21956 $leading_block_text_line_length =
21957 length($csc_last_label) +
21958 length($accumulating_text_for_block) +
21959 length( $rOpts->{'closing-side-comment-prefix'} ) +
21960 $leading_block_text_level * $rOpts_indent_columns + 3;
21964 sub accumulate_block_text {
21965 my ( $self, $i ) = @_;
21967 # accumulate leading text for -csc, ignoring any side comments
21968 if ( $accumulating_text_for_block
21969 && !$leading_block_text_length_exceeded
21970 && $types_to_go[$i] ne '#' )
21973 my $added_length = $token_lengths_to_go[$i];
21974 $added_length += 1 if $i == 0;
21975 my $new_line_length =
21976 $leading_block_text_line_length + $added_length;
21978 # we can add this text if we don't exceed some limits..
21981 # we must not have already exceeded the text length limit
21982 length($leading_block_text) <
21983 $rOpts_closing_side_comment_maximum_text
21986 # the new total line length must be below the line length limit
21987 # or the new length must be below the text length limit
21988 # (ie, we may allow one token to exceed the text length limit)
21991 $maximum_line_length_at_level[$leading_block_text_level]
21993 || length($leading_block_text) + $added_length <
21994 $rOpts_closing_side_comment_maximum_text
21997 # UNLESS: we are adding a closing paren before the brace we seek.
21998 # This is an attempt to avoid situations where the ... to be
21999 # added are longer than the omitted right paren, as in:
22001 # foreach my $item (@a_rather_long_variable_name_here) {
22003 # } ## end foreach my $item (@a_rather_long_variable_name_here...
22006 $tokens_to_go[$i] eq ')'
22009 $i + 1 <= $max_index_to_go
22010 && $block_type_to_go[ $i + 1 ] eq
22011 $accumulating_text_for_block
22013 || ( $i + 2 <= $max_index_to_go
22014 && $block_type_to_go[ $i + 2 ] eq
22015 $accumulating_text_for_block )
22021 # add an extra space at each newline
22022 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
22023 $leading_block_text .= ' ';
22026 # add the token text
22027 $leading_block_text .= $tokens_to_go[$i];
22028 $leading_block_text_line_length = $new_line_length;
22031 # show that text was truncated if necessary
22032 elsif ( $types_to_go[$i] ne 'b' ) {
22033 $leading_block_text_length_exceeded = 1;
22034 $leading_block_text .= '...';
22040 sub accumulate_csc_text {
22044 # called once per output buffer when -csc is used. Accumulates
22045 # the text placed after certain closing block braces.
22046 # Defines and returns the following for this buffer:
22048 my $block_leading_text = ""; # the leading text of the last '}'
22049 my $rblock_leading_if_elsif_text;
22050 my $i_block_leading_text =
22051 -1; # index of token owning block_leading_text
22052 my $block_line_count = 100; # how many lines the block spans
22053 my $terminal_type = 'b'; # type of last nonblank token
22054 my $i_terminal = 0; # index of last nonblank token
22055 my $terminal_block_type = "";
22057 # update most recent statement label
22058 $csc_last_label = "" unless ($csc_last_label);
22059 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
22060 my $block_label = $csc_last_label;
22062 # Loop over all tokens of this batch
22063 for my $i ( 0 .. $max_index_to_go ) {
22064 my $type = $types_to_go[$i];
22065 my $block_type = $block_type_to_go[$i];
22066 my $token = $tokens_to_go[$i];
22068 # remember last nonblank token type
22069 if ( $type ne '#' && $type ne 'b' ) {
22070 $terminal_type = $type;
22071 $terminal_block_type = $block_type;
22075 my $type_sequence = $type_sequence_to_go[$i];
22076 if ( $block_type && $type_sequence ) {
22078 if ( $token eq '}' ) {
22080 # restore any leading text saved when we entered this block
22081 if ( defined( $block_leading_text{$type_sequence} ) ) {
22082 ( $block_leading_text, $rblock_leading_if_elsif_text )
22083 = @{ $block_leading_text{$type_sequence} };
22084 $i_block_leading_text = $i;
22085 delete $block_leading_text{$type_sequence};
22086 $rleading_block_if_elsif_text =
22087 $rblock_leading_if_elsif_text;
22090 if ( defined( $csc_block_label{$type_sequence} ) ) {
22091 $block_label = $csc_block_label{$type_sequence};
22092 delete $csc_block_label{$type_sequence};
22095 # if we run into a '}' then we probably started accumulating
22096 # at something like a trailing 'if' clause..no harm done.
22097 if ( $accumulating_text_for_block
22098 && $levels_to_go[$i] <= $leading_block_text_level )
22100 my $lev = $levels_to_go[$i];
22101 reset_block_text_accumulator();
22104 if ( defined( $block_opening_line_number{$type_sequence} ) )
22106 my $output_line_number =
22107 $self->get_output_line_number();
22108 $block_line_count =
22109 $output_line_number -
22110 $block_opening_line_number{$type_sequence} + 1;
22111 delete $block_opening_line_number{$type_sequence};
22115 # Error: block opening line undefined for this line..
22116 # This shouldn't be possible, but it is not a
22117 # significant problem.
22121 elsif ( $token eq '{' ) {
22123 my $line_number = $self->get_output_line_number();
22124 $block_opening_line_number{$type_sequence} = $line_number;
22126 # set a label for this block, except for
22127 # a bare block which already has the label
22128 # A label can only be used on the next {
22129 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
22130 $csc_block_label{$type_sequence} = $csc_last_label;
22131 $csc_last_label = "";
22133 if ( $accumulating_text_for_block
22134 && $levels_to_go[$i] == $leading_block_text_level )
22137 if ( $accumulating_text_for_block eq $block_type ) {
22139 # save any leading text before we enter this block
22140 $block_leading_text{$type_sequence} = [
22141 $leading_block_text,
22142 $rleading_block_if_elsif_text
22144 $block_opening_line_number{$type_sequence} =
22145 $leading_block_text_line_number;
22146 reset_block_text_accumulator();
22150 # shouldn't happen, but not a serious error.
22151 # We were accumulating -csc text for block type
22152 # $accumulating_text_for_block and unexpectedly
22153 # encountered a '{' for block type $block_type.
22160 && $csc_new_statement_ok
22161 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
22162 && $token =~ /$closing_side_comment_list_pattern/ )
22164 $self->set_block_text_accumulator($i);
22168 # note: ignoring type 'q' because of tricks being played
22169 # with 'q' for hanging side comments
22170 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
22171 $csc_new_statement_ok =
22172 ( $block_type || $type eq 'J' || $type eq ';' );
22175 && $accumulating_text_for_block
22176 && $levels_to_go[$i] == $leading_block_text_level )
22178 reset_block_text_accumulator();
22181 $self->accumulate_block_text($i);
22186 # Treat an 'else' block specially by adding preceding 'if' and
22187 # 'elsif' text. Otherwise, the 'end else' is not helpful,
22188 # especially for cuddled-else formatting.
22189 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
22190 $block_leading_text =
22191 $self->make_else_csc_text( $i_terminal, $terminal_block_type,
22192 $block_leading_text, $rblock_leading_if_elsif_text );
22195 # if this line ends in a label then remember it for the next pass
22196 $csc_last_label = "";
22197 if ( $terminal_type eq 'J' ) {
22198 $csc_last_label = $tokens_to_go[$i_terminal];
22201 return ( $terminal_type, $i_terminal, $i_block_leading_text,
22202 $block_leading_text, $block_line_count, $block_label );
22205 sub make_else_csc_text {
22207 # create additional -csc text for an 'else' and optionally 'elsif',
22208 # depending on the value of switch
22210 # = 0 add 'if' text to trailing else
22211 # = 1 same as 0 plus:
22212 # add 'if' to 'elsif's if can fit in line length
22213 # add last 'elsif' to trailing else if can fit in one line
22214 # = 2 same as 1 but do not check if exceed line length
22216 # $rif_elsif_text = a reference to a list of all previous closing
22217 # side comments created for this if block
22219 my ( $self, $i_terminal, $block_type, $block_leading_text,
22222 my $csc_text = $block_leading_text;
22224 if ( $block_type eq 'elsif'
22225 && $rOpts_closing_side_comment_else_flag == 0 )
22230 my $count = @{$rif_elsif_text};
22231 return $csc_text unless ($count);
22233 my $if_text = '[ if' . $rif_elsif_text->[0];
22235 # always show the leading 'if' text on 'else'
22236 if ( $block_type eq 'else' ) {
22237 $csc_text .= $if_text;
22240 # see if that's all
22241 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
22245 my $last_elsif_text = "";
22246 if ( $count > 1 ) {
22247 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
22248 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
22251 # tentatively append one more item
22252 my $saved_text = $csc_text;
22253 if ( $block_type eq 'else' ) {
22254 $csc_text .= $last_elsif_text;
22257 $csc_text .= ' ' . $if_text;
22260 # all done if no length checks requested
22261 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
22265 # undo it if line length exceeded
22267 length($csc_text) +
22268 length($block_type) +
22269 length( $rOpts->{'closing-side-comment-prefix'} ) +
22270 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
22272 $length > $maximum_line_length_at_level[$leading_block_text_level] )
22274 $csc_text = $saved_text;
22278 } ## end closure accumulate_csc_text
22280 { ## begin closure balance_csc_text
22282 # Some additional routines for handling the --closing-side-comments option
22297 sub balance_csc_text {
22299 # Append characters to balance a closing side comment so that editors
22300 # such as vim can correctly jump through code.
22302 # input = ## end foreach my $foo ( sort { $b ...
22303 # output = ## end foreach my $foo ( sort { $b ...})
22305 # NOTE: This routine does not currently filter out structures within
22306 # quoted text because the bounce algorithms in text editors do not
22307 # necessarily do this either (a version of vim was checked and
22308 # did not do this).
22310 # Some complex examples which will cause trouble for some editors:
22311 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
22312 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
22313 # if ( $1 eq '{' ) {
22314 # test file test1/braces.pl has many such examples.
22318 # loop to examine characters one-by-one, RIGHT to LEFT and
22319 # build a balancing ending, LEFT to RIGHT.
22320 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
22322 my $char = substr( $csc, $pos, 1 );
22324 # ignore everything except structural characters
22325 next unless ( $matching_char{$char} );
22327 # pop most recently appended character
22328 my $top = chop($csc);
22330 # push it back plus the mate to the newest character
22331 # unless they balance each other.
22332 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
22335 # return the balanced string
22338 } ## end closure balance_csc_text
22340 sub add_closing_side_comment {
22343 my $rLL = $self->[_rLL_];
22345 # add closing side comments after closing block braces if -csc used
22346 my ( $closing_side_comment, $cscw_block_comment );
22348 #---------------------------------------------------------------
22349 # Step 1: loop through all tokens of this line to accumulate
22350 # the text needed to create the closing side comments. Also see
22351 # how the line ends.
22352 #---------------------------------------------------------------
22354 my ( $terminal_type, $i_terminal, $i_block_leading_text,
22355 $block_leading_text, $block_line_count, $block_label )
22356 = $self->accumulate_csc_text();
22358 #---------------------------------------------------------------
22359 # Step 2: make the closing side comment if this ends a block
22360 #---------------------------------------------------------------
22361 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
22363 # if this line might end in a block closure..
22365 $terminal_type eq '}'
22370 # the block is long enough
22371 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
22373 # or there is an existing comment to check
22374 || ( $have_side_comment
22375 && $rOpts->{'closing-side-comment-warnings'} )
22378 # .. and if this is one of the types of interest
22379 && $block_type_to_go[$i_terminal] =~
22380 /$closing_side_comment_list_pattern/
22382 # .. but not an anonymous sub
22383 # These are not normally of interest, and their closing braces are
22384 # often followed by commas or semicolons anyway. This also avoids
22385 # possible erratic output due to line numbering inconsistencies
22386 # in the cases where their closing braces terminate a line.
22387 && $block_type_to_go[$i_terminal] ne 'sub'
22389 # ..and the corresponding opening brace must is not in this batch
22390 # (because we do not need to tag one-line blocks, although this
22391 # should also be caught with a positive -csci value)
22392 && $mate_index_to_go[$i_terminal] < 0
22397 # this is the last token (line doesn't have a side comment)
22398 !$have_side_comment
22400 # or the old side comment is a closing side comment
22401 || $tokens_to_go[$max_index_to_go] =~
22402 /$closing_side_comment_prefix_pattern/
22407 # then make the closing side comment text
22408 if ($block_label) { $block_label .= " " }
22410 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
22412 # append any extra descriptive text collected above
22413 if ( $i_block_leading_text == $i_terminal ) {
22414 $token .= $block_leading_text;
22417 $token = balance_csc_text($token)
22418 if $rOpts->{'closing-side-comments-balanced'};
22420 $token =~ s/\s*$//; # trim any trailing whitespace
22422 # handle case of existing closing side comment
22423 if ($have_side_comment) {
22425 # warn if requested and tokens differ significantly
22426 if ( $rOpts->{'closing-side-comment-warnings'} ) {
22427 my $old_csc = $tokens_to_go[$max_index_to_go];
22428 my $new_csc = $token;
22429 $new_csc =~ s/\s+//g; # trim all whitespace
22430 $old_csc =~ s/\s+//g; # trim all whitespace
22431 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
22432 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
22433 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
22434 my $new_trailing_dots = $1;
22435 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
22437 # Patch to handle multiple closing side comments at
22438 # else and elsif's. These have become too complicated
22439 # to check, so if we see an indication of
22440 # '[ if' or '[ # elsif', then assume they were made
22442 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
22443 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
22445 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
22446 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
22449 # if old comment is contained in new comment,
22450 # only compare the common part.
22451 if ( length($new_csc) > length($old_csc) ) {
22452 $new_csc = substr( $new_csc, 0, length($old_csc) );
22455 # if the new comment is shorter and has been limited,
22456 # only compare the common part.
22457 if ( length($new_csc) < length($old_csc)
22458 && $new_trailing_dots )
22460 $old_csc = substr( $old_csc, 0, length($new_csc) );
22463 # any remaining difference?
22464 if ( $new_csc ne $old_csc ) {
22466 # just leave the old comment if we are below the threshold
22467 # for creating side comments
22468 if ( $block_line_count <
22469 $rOpts->{'closing-side-comment-interval'} )
22474 # otherwise we'll make a note of it
22478 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
22481 # save the old side comment in a new trailing block
22483 my $timestamp = "";
22484 if ( $rOpts->{'timestamp'} ) {
22485 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
22488 $timestamp = "$year-$month-$day";
22490 $cscw_block_comment =
22491 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
22492 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
22497 # No differences.. we can safely delete old comment if we
22498 # are below the threshold
22499 if ( $block_line_count <
22500 $rOpts->{'closing-side-comment-interval'} )
22503 $self->unstore_token_to_go()
22504 if ( $types_to_go[$max_index_to_go] eq '#' );
22505 $self->unstore_token_to_go()
22506 if ( $types_to_go[$max_index_to_go] eq 'b' );
22511 # switch to the new csc (unless we deleted it!)
22513 $tokens_to_go[$max_index_to_go] = $token;
22514 my $K = $K_to_go[$max_index_to_go];
22515 $rLL->[$K]->[_TOKEN_] = $token;
22516 $rLL->[$K]->[_TOKEN_LENGTH_] =
22517 length($token); # NOTE: length no longer important
22521 # handle case of NO existing closing side comment
22524 # To avoid inserting a new token in the token arrays, we
22525 # will just return the new side comment so that it can be
22526 # inserted just before it is needed in the call to the
22527 # vertical aligner.
22528 $closing_side_comment = $token;
22531 return ( $closing_side_comment, $cscw_block_comment );
22534 ############################
22535 # CODE SECTION 15: Summarize
22536 ############################
22540 # This is the last routine called when a file is formatted.
22541 # Flush buffer and write any informative messages
22545 my $file_writer_object = $self->[_file_writer_object_];
22546 $file_writer_object->decrement_output_line_number()
22547 ; # fix up line number since it was incremented
22548 we_are_at_the_last_line();
22549 my $added_semicolon_count = $self->[_added_semicolon_count_];
22550 my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
22551 my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
22553 if ( $added_semicolon_count > 0 ) {
22554 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
22556 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
22557 write_logfile_entry("$added_semicolon_count $what added:\n");
22558 write_logfile_entry(
22559 " $first at input line $first_added_semicolon_at\n");
22561 if ( $added_semicolon_count > 1 ) {
22562 write_logfile_entry(
22563 " Last at input line $last_added_semicolon_at\n");
22565 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
22566 write_logfile_entry("\n");
22569 my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
22570 my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
22571 my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
22572 if ( $deleted_semicolon_count > 0 ) {
22573 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
22575 ( $deleted_semicolon_count > 1 )
22576 ? "semicolons were"
22578 write_logfile_entry(
22579 "$deleted_semicolon_count unnecessary $what deleted:\n");
22580 write_logfile_entry(
22581 " $first at input line $first_deleted_semicolon_at\n");
22583 if ( $deleted_semicolon_count > 1 ) {
22584 write_logfile_entry(
22585 " Last at input line $last_deleted_semicolon_at\n");
22587 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
22588 write_logfile_entry("\n");
22591 my $embedded_tab_count = $self->[_embedded_tab_count_];
22592 my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
22593 my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
22594 if ( $embedded_tab_count > 0 ) {
22595 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
22597 ( $embedded_tab_count > 1 )
22598 ? "quotes or patterns"
22599 : "quote or pattern";
22600 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
22601 write_logfile_entry(
22602 "This means the display of this script could vary with device or software\n"
22604 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
22606 if ( $embedded_tab_count > 1 ) {
22607 write_logfile_entry(
22608 " Last at input line $last_embedded_tab_at\n");
22610 write_logfile_entry("\n");
22613 my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
22614 my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
22615 my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
22616 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
22618 if ($first_tabbing_disagreement) {
22619 write_logfile_entry(
22620 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
22624 my $first_btd = $self->[_first_brace_tabbing_disagreement_];
22627 "First closing brace indentation disagreement started at input line $first_btd\n";
22628 write_logfile_entry($msg);
22630 # leave a hint in the .ERR file if there was a brace error
22631 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
22634 my $in_btd = $self->[_in_brace_tabbing_disagreement_];
22637 "Ending with brace indentation disagreement which started at input line $in_btd\n";
22638 write_logfile_entry($msg);
22640 # leave a hint in the .ERR file if there was a brace error
22641 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
22644 if ($in_tabbing_disagreement) {
22646 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
22647 write_logfile_entry($msg);
22651 if ($last_tabbing_disagreement) {
22653 write_logfile_entry(
22654 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
22658 write_logfile_entry("No indentation disagreement seen\n");
22662 if ($first_tabbing_disagreement) {
22663 write_logfile_entry(
22664 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
22667 write_logfile_entry("\n");
22669 my $vao = $self->[_vertical_aligner_object_];
22670 $vao->report_anything_unusual();
22672 $file_writer_object->report_line_length_errors();
22674 $self->[_converged_] = $file_writer_object->get_convergence_check()
22675 || $rOpts->{'indent-only'};
22680 } ## end package Perl::Tidy::Formatter