1 #####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 # WARNING: This is not a real class for speed reasons. Only one
7 # Formatter may be used.
9 #####################################################################
11 package Perl::Tidy::Formatter;
15 our $VERSION = '20200110';
17 # The Tokenizer will be loaded with the Formatter
18 ##use Perl::Tidy::Tokenizer; # for is_keyword()
22 Perl::Tidy::Die($msg);
23 croak "unexpected return from Perl::Tidy::Die";
28 Perl::Tidy::Warn($msg);
34 Perl::Tidy::Exit($msg);
35 croak "unexpected return from Perl::Tidy::Exit";
40 # Codes for insertion and deletion of blanks
41 use constant DELETE => 0;
42 use constant STABLE => 1;
43 use constant INSERT => 2;
45 # Caution: these debug flags produce a lot of output
46 # They should all be 0 except when debugging small scripts
47 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
48 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
49 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
50 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
51 use constant FORMATTER_DEBUG_FLAG_CI => 0;
52 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
53 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
54 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
55 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
56 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
57 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
58 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
59 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
60 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
62 my $debug_warning = sub {
63 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
66 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
67 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
68 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
69 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
70 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
71 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
72 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
73 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
74 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
75 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
76 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
77 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
78 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
79 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
86 $gnu_position_predictor
87 $line_start_index_to_go
88 $last_indentation_written
89 $last_unadjusted_indentation
91 $last_output_short_opening_token
94 $saw_VERSION_in_this_file
100 $last_output_indentation
107 @container_environment_to_go
109 @forced_breakpoint_to_go
111 @summed_lengths_to_go
113 @leading_spaces_to_go
114 @reduced_spaces_to_go
119 @old_breakpoint_to_go
126 %saved_opening_indentation
129 $comma_count_in_batch
130 $last_nonblank_index_to_go
131 $last_nonblank_type_to_go
132 $last_nonblank_token_to_go
133 $last_last_nonblank_index_to_go
134 $last_last_nonblank_type_to_go
135 $last_last_nonblank_token_to_go
136 @nonblank_lines_at_depth
139 @whitespace_level_stack
140 $whitespace_last_level
142 $format_skipping_pattern_begin
143 $format_skipping_pattern_end
145 $forced_breakpoint_count
146 $forced_breakpoint_undo_count
147 @forced_breakpoint_undo_stack
148 %postponed_breakpoint
152 $first_embedded_tab_at
153 $last_embedded_tab_at
154 $deleted_semicolon_count
155 $first_deleted_semicolon_at
156 $last_deleted_semicolon_at
157 $added_semicolon_count
158 $first_added_semicolon_at
159 $last_added_semicolon_at
160 $first_tabbing_disagreement
161 $last_tabbing_disagreement
162 $in_tabbing_disagreement
163 $tabbing_disagreement_count
166 $last_line_leading_type
167 $last_line_leading_level
168 $last_last_line_leading_level
171 %block_opening_line_number
172 $csc_new_statement_ok
175 $accumulating_text_for_block
177 $rleading_block_if_elsif_text
178 $leading_block_text_level
179 $leading_block_text_length_exceeded
180 $leading_block_text_line_length
181 $leading_block_text_line_number
182 $closing_side_comment_prefix_pattern
183 $closing_side_comment_list_pattern
185 $blank_lines_after_opening_block_pattern
186 $blank_lines_before_closing_block_pattern
190 $last_last_nonblank_token
191 $last_last_nonblank_type
192 $last_nonblank_block_type
195 %is_if_brace_follower
199 %is_last_next_redo_return
200 %is_other_brace_follower
201 %is_else_brace_follower
202 %is_anon_sub_brace_follower
203 %is_anon_sub_1_brace_follower
205 %is_sort_map_grep_eval
207 %is_sort_map_grep_eval_do
208 %is_block_without_semicolon
213 %is_if_unless_and_or_last_next_redo_return
214 %ok_to_add_semicolon_for_block_type
220 $is_static_block_comment
221 $index_start_one_line_block
222 $semicolons_before_block_self_destruct
223 $index_max_forced_break
226 $vertical_aligner_object
233 $static_block_comment_pattern
234 $static_side_comment_pattern
235 %opening_vertical_tightness
236 %closing_vertical_tightness
237 %closing_token_indentation
238 $some_closing_token_indentation
244 $block_brace_vertical_tightness_pattern
245 $keyword_group_list_pattern
246 $keyword_group_list_comment_pattern
249 $rOpts_add_whitespace
250 $rOpts_block_brace_tightness
251 $rOpts_block_brace_vertical_tightness
252 $rOpts_brace_left_and_indent
253 $rOpts_comma_arrow_breakpoints
254 $rOpts_break_at_old_keyword_breakpoints
255 $rOpts_break_at_old_comma_breakpoints
256 $rOpts_break_at_old_logical_breakpoints
257 $rOpts_break_at_old_method_breakpoints
258 $rOpts_break_at_old_ternary_breakpoints
259 $rOpts_break_at_old_attribute_breakpoints
260 $rOpts_closing_side_comment_else_flag
261 $rOpts_closing_side_comment_maximum_text
262 $rOpts_continuation_indentation
263 $rOpts_delete_old_whitespace
264 $rOpts_fuzzy_line_length
265 $rOpts_indent_columns
266 $rOpts_line_up_parentheses
267 $rOpts_maximum_fields_per_table
268 $rOpts_maximum_line_length
269 $rOpts_variable_maximum_line_length
270 $rOpts_short_concatenation_item_length
271 $rOpts_keep_old_blank_lines
272 $rOpts_ignore_old_breakpoints
273 $rOpts_format_skipping
274 $rOpts_space_function_paren
275 $rOpts_space_keyword_paren
276 $rOpts_keep_interior_semicolons
277 $rOpts_ignore_side_comment_lengths
278 $rOpts_stack_closing_block_brace
279 $rOpts_space_backslash_quote
280 $rOpts_whitespace_cycle
281 $rOpts_one_line_block_semicolons
285 %is_keyword_returning_list
303 %weld_len_left_closing
304 %weld_len_right_closing
305 %weld_len_left_opening
306 %weld_len_right_opening
308 $rcuddled_block_types
319 # Array index names for token variables
322 _BLOCK_TYPE_ => $i++,
324 _CONTAINER_ENVIRONMENT_ => $i++,
325 _CONTAINER_TYPE_ => $i++,
326 _CUMULATIVE_LENGTH_ => $i++,
327 _LINE_INDEX_ => $i++,
328 _KNEXT_SEQ_ITEM_ => $i++,
330 _LEVEL_TRUE_ => $i++,
334 _TYPE_SEQUENCE_ => $i++,
336 $NVARS = 1 + _TYPE_SEQUENCE_;
338 # default list of block types for which -bli would apply
339 $bli_list_string = 'if else elsif unless while for foreach do : sub';
344 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
345 <= >= == =~ !~ != ++ -- /= x=
347 @is_digraph{@q} = (1) x scalar(@q);
349 @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
350 @is_trigraph{@q} = (1) x scalar(@q);
353 = **= += *= &= <<= &&=
358 @is_assignment{@q} = (1) x scalar(@q);
368 @is_keyword_returning_list{@q} = (1) x scalar(@q);
370 @q = qw(is if unless and or err last next redo return);
371 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
373 @q = qw(last next redo return);
374 @is_last_next_redo_return{@q} = (1) x scalar(@q);
376 @q = qw(sort map grep);
377 @is_sort_map_grep{@q} = (1) x scalar(@q);
379 @q = qw(sort map grep eval);
380 @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
382 @q = qw(sort map grep eval do);
383 @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
386 @is_if_unless{@q} = (1) x scalar(@q);
389 @is_and_or{@q} = (1) x scalar(@q);
391 # Identify certain operators which often occur in chains.
392 # Note: the minus (-) causes a side effect of padding of the first line in
393 # something like this (by sub set_logical_padding):
394 # Checkbutton => 'Transmission checked',
395 # -variable => \$TRANS
396 # This usually improves appearance so it seems ok.
397 @q = qw(&& || and or : ? . + - * /);
398 @is_chain_operator{@q} = (1) x scalar(@q);
400 # We can remove semicolons after blocks preceded by these keywords
402 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
403 unless while until for foreach given when default);
404 @is_block_without_semicolon{@q} = (1) x scalar(@q);
406 # We will allow semicolons to be added within these block types
407 # as well as sub and package blocks.
409 # 1. Note that these keywords are omitted:
410 # switch case given when default sort map grep
411 # 2. It is also ok to add for sub and package blocks and a labeled block
412 # 3. But not okay for other perltidy types including:
414 # 4. Test files: blktype.t, blktype1.t, semicolon.t
416 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
417 unless do while until eval for foreach );
418 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
420 # 'L' is token for opening { at hash key
422 @is_opening_type{@q} = (1) x scalar(@q);
424 # 'R' is token for closing } at hash key
426 @is_closing_type{@q} = (1) x scalar(@q);
429 @is_opening_token{@q} = (1) x scalar(@q);
432 @is_closing_token{@q} = (1) x scalar(@q);
434 # Patterns for standardizing matches to block types for regular subs and
435 # anonymous subs. Examples
436 # 'sub process' is a named sub
437 # 'sub ::m' is a named sub
438 # 'sub' is an anonymous sub
439 # 'sub:' is a label, not a sub
440 # 'substr' is a keyword
441 $SUB_PATTERN = '^sub\s+(::|\w)';
442 $ASUB_PATTERN = '^sub$';
446 use constant WS_YES => 1;
447 use constant WS_OPTIONAL => 0;
448 use constant WS_NO => -1;
450 # Token bond strengths.
451 use constant NO_BREAK => 10000;
452 use constant VERY_STRONG => 100;
453 use constant STRONG => 2.1;
454 use constant NOMINAL => 1.1;
455 use constant WEAK => 0.8;
456 use constant VERY_WEAK => 0.55;
458 # values for testing indexes in output array
459 use constant UNDEFINED_INDEX => -1;
461 # Maximum number of little messages; probably need not be changed.
462 use constant MAX_NAG_MESSAGES => 6;
464 # increment between sequence numbers for each type
465 # For example, ?: pairs might have numbers 7,11,15,...
466 use constant TYPE_SEQUENCE_INCREMENT => 4;
470 # methods to count instances
472 sub get_count { return $_count; }
473 sub _increment_count { return ++$_count }
474 sub _decrement_count { return --$_count }
479 # trim leading and trailing whitespace from a string
488 my $max = shift @vals;
489 foreach my $val (@vals) {
490 $max = ( $max < $val ) ? $val : $max;
497 my $min = shift @vals;
498 foreach my $val (@vals) {
499 $min = ( $min > $val ) ? $val : $min;
506 # given a string containing words separated by whitespace,
507 # return the list of words
512 return split( /\s+/, $str );
516 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
518 # Check the keys of a hash:
519 # $rtest = ref to hash to test
520 # $rvalid = ref to hash with valid keys
522 # $msg = a message to write in case of error
523 # $exact_match defines the type of check:
524 # = false: test hash must not have unknown key
525 # = true: test hash must have exactly same keys as known hash
527 grep { !exists $rvalid->{$_} } keys %{$rtest};
529 grep { !exists $rtest->{$_} } keys %{$rvalid};
530 my $error = @unknown_keys;
531 if ($exact_match) { $error ||= @missing_keys }
534 my @expected_keys = sort keys %{$rvalid};
535 @unknown_keys = sort @unknown_keys;
537 ------------------------------------------------------------------------
538 Program error detected checking hash keys
540 Expected keys: (@expected_keys)
541 Unknown key(s): (@unknown_keys)
542 Missing key(s): (@missing_keys)
543 ------------------------------------------------------------------------
549 # interface to Perl::Tidy::Logger routines
552 if ($logger_object) { $logger_object->warning($msg); }
558 if ($logger_object) {
559 $logger_object->complain($msg);
564 sub write_logfile_entry {
566 if ($logger_object) {
567 $logger_object->write_logfile_entry(@msg);
574 if ($logger_object) { $logger_object->black_box(@msg); }
578 sub report_definite_bug {
579 if ($logger_object) {
580 $logger_object->report_definite_bug();
585 sub get_saw_brace_error {
586 if ($logger_object) {
587 return $logger_object->get_saw_brace_error();
592 sub we_are_at_the_last_line {
593 if ($logger_object) {
594 $logger_object->we_are_at_the_last_line();
599 # interface to Perl::Tidy::Diagnostics routine
600 sub write_diagnostics {
602 if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
606 sub get_added_semicolon_count {
608 return $added_semicolon_count;
613 $self->_decrement_count();
617 sub get_output_line_number {
618 return $vertical_aligner_object->get_output_line_number();
623 my ( $class, @args ) = @_;
625 # we are given an object with a write_line() method to take lines
627 sink_object => undef,
628 diagnostics_object => undef,
629 logger_object => undef,
631 my %args = ( %defaults, @args );
633 $logger_object = $args{logger_object};
634 $diagnostics_object = $args{diagnostics_object};
636 # we create another object with a get_line() and peek_ahead() method
637 my $sink_object = $args{sink_object};
638 $file_writer_object =
639 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
641 # initialize the leading whitespace stack to negative levels
642 # so that we can never run off the end of the stack
643 $peak_batch_size = 0; # flag to determine if we have output code
644 $gnu_position_predictor = 0; # where the current token is predicted to be
645 $max_gnu_stack_index = 0;
646 $max_gnu_item_index = -1;
647 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
649 $last_output_indentation = 0;
650 $last_indentation_written = 0;
651 $last_unadjusted_indentation = 0;
652 $last_leading_token = "";
653 $last_output_short_opening_token = 0;
655 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
656 $saw_END_or_DATA_ = 0;
658 @block_type_to_go = ();
659 @type_sequence_to_go = ();
660 @container_environment_to_go = ();
661 @bond_strength_to_go = ();
662 @forced_breakpoint_to_go = ();
663 @summed_lengths_to_go = (); # line length to start of ith token
664 @token_lengths_to_go = ();
666 @mate_index_to_go = ();
667 @ci_levels_to_go = ();
668 @nesting_depth_to_go = (0);
670 @old_breakpoint_to_go = ();
674 @leading_spaces_to_go = ();
675 @reduced_spaces_to_go = ();
679 @whitespace_level_stack = ();
680 $whitespace_last_level = -1;
683 @has_broken_sublist = ();
684 @want_comma_break = ();
687 $first_tabbing_disagreement = 0;
688 $last_tabbing_disagreement = 0;
689 $tabbing_disagreement_count = 0;
690 $in_tabbing_disagreement = 0;
691 $input_line_tabbing = undef;
693 $last_last_line_leading_level = 0;
694 $last_line_leading_level = 0;
695 $last_line_leading_type = '#';
697 $last_nonblank_token = ';';
698 $last_nonblank_type = ';';
699 $last_last_nonblank_token = ';';
700 $last_last_nonblank_type = ';';
701 $last_nonblank_block_type = "";
702 $last_output_level = 0;
703 $looking_for_else = 0;
704 $embedded_tab_count = 0;
705 $first_embedded_tab_at = 0;
706 $last_embedded_tab_at = 0;
707 $deleted_semicolon_count = 0;
708 $first_deleted_semicolon_at = 0;
709 $last_deleted_semicolon_at = 0;
710 $added_semicolon_count = 0;
711 $first_added_semicolon_at = 0;
712 $last_added_semicolon_at = 0;
713 $is_static_block_comment = 0;
714 %postponed_breakpoint = ();
716 # variables for adding side comments
717 %block_leading_text = ();
718 %block_opening_line_number = ();
719 $csc_new_statement_ok = 1;
720 %csc_block_label = ();
722 %saved_opening_indentation = ();
724 reset_block_text_accumulator();
726 prepare_for_new_input_lines();
728 $vertical_aligner_object =
729 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
730 $logger_object, $diagnostics_object );
732 if ( $rOpts->{'entab-leading-whitespace'} ) {
734 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
737 elsif ( $rOpts->{'tabs'} ) {
738 write_logfile_entry("Indentation will be with a tab character\n");
742 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
745 # This hash holds the main data structures for formatting
746 # All hash keys must be defined here.
748 rlines => [], # = ref to array of lines of the file
749 rlines_new => [], # = ref to array of output lines
750 # (FOR FUTURE DEVELOPMENT)
751 rLL => [], # = ref to array with all tokens
752 # in the file. LL originally meant
753 # 'Linked List'. Linked lists were a
754 # bad idea but LL is easy to type.
755 Klimit => undef, # = maximum K index for rLL. This is
756 # needed to catch any autovivification
758 rnested_pairs => [], # for welding decisions
759 K_opening_container => {}, # for quickly traversing structure
760 K_closing_container => {}, # for quickly traversing structure
761 K_opening_ternary => {}, # for quickly traversing structure
762 K_closing_ternary => {}, # for quickly traversing structure
763 rcontainer_map => {}, # hierarchical map of containers
764 rK_phantom_semicolons =>
765 undef, # for undoing phantom semicolons if iterating
766 rpaired_to_inner_container => {},
767 rbreak_container => {}, # prevent one-line blocks
768 rshort_nested => {}, # blocks not forced open
769 rvalid_self_keys => [], # for checking
770 valign_batch_count => 0,
772 my @valid_keys = keys %{$formatter_self};
773 $formatter_self->{rvalid_self_keys} = \@valid_keys;
775 bless $formatter_self, $class;
777 # Safety check..this is not a class yet
778 if ( _increment_count() > 1 ) {
780 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
782 return $formatter_self;
785 # Future routines for storing new lines
787 my ( $self, $rline ) = @_;
789 # my $rline = $rlines->[$index_old];
790 # push @{$rlines_new}, $rline;
795 my ( $self, $index_old ) = @_;
797 # TODO: This will copy line with index $index_old to the new line array
798 # my $rlines = $self->{rlines};
799 # my $rline = $rlines->[$index_old];
800 # $self->push_line($rline);
804 sub push_blank_line {
808 # $self->push_line($rline);
813 my ( $self, $Kmin, $Kmax ) = @_;
815 # TODO: This will store the values for one new line of CODE
816 # CHECK TOKEN RANGE HERE
817 # $self->push_line($rline);
821 sub increment_valign_batch_count {
823 return ++$self->{valign_batch_count};
826 sub get_valign_batch_count {
828 return $self->{valign_batch_count};
834 # This routine is called for errors that really should not occur
835 # except if there has been a bug introduced by a recent program change
836 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
837 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
838 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
839 my $input_stream_name = $logger_object->get_input_stream_name();
842 ==============================================================================
843 While operating on input stream with name: '$input_stream_name'
844 A fault was detected at line $line0 of sub '$subroutine1'
846 which was called from line $line1 of sub '$subroutine2'
848 This is probably an error introduced by a recent programming change.
849 ==============================================================================
852 # This is for Perl-Critic
856 sub check_self_hash {
858 my @valid_self_keys = @{ $self->{rvalid_self_keys} };
860 @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
861 check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
865 sub check_token_array {
868 # Check for errors in the array of tokens
869 # Uses package variable $NVARS
870 $self->check_self_hash();
871 my $rLL = $self->{rLL};
872 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
873 my $nvars = @{ $rLL->[$KK] };
874 if ( $nvars != $NVARS ) {
875 my $type = $rLL->[$KK]->[_TYPE_];
876 $type = '*' unless defined($type);
878 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
881 foreach my $var ( _TOKEN_, _TYPE_ ) {
882 if ( !defined( $rLL->[$KK]->[$var] ) ) {
883 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
884 Fault("Undefined variable $var for K=$KK, line=$iline\n");
891 sub set_rLL_max_index {
894 # Set the limit of the rLL array, assuming that it is correct.
895 # This should only be called by routines after they make changes
897 my $rLL = $self->{rLL};
898 if ( !defined($rLL) ) {
900 # Shouldn't happen because rLL was initialized to be an array ref
901 Fault("Undefined Memory rLL");
903 my $Klimit_old = $self->{Klimit};
906 if ( $num > 0 ) { $Klimit = $num - 1 }
907 $self->{Klimit} = $Klimit;
911 sub get_rLL_max_index {
914 # the memory location $rLL and number of tokens should be obtained
915 # from this routine so that any autovivication can be immediately caught.
916 my $rLL = $self->{rLL};
917 my $Klimit = $self->{Klimit};
918 if ( !defined($rLL) ) {
920 # Shouldn't happen because rLL was initialized to be an array ref
921 Fault("Undefined Memory rLL");
924 if ( $num == 0 && defined($Klimit)
925 || $num > 0 && !defined($Klimit)
926 || $num > 0 && $Klimit != $num - 1 )
929 # Possible autovivification problem...
930 if ( !defined($Klimit) ) { $Klimit = '*' }
931 Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
936 sub prepare_for_new_input_lines {
938 # Remember the largest batch size processed. This is needed
939 # by the pad routine to avoid padding the first nonblank token
940 if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
941 $peak_batch_size = $max_index_to_go;
944 $gnu_sequence_number++; # increment output batch counter
945 %last_gnu_equals = ();
946 %gnu_comma_count = ();
947 %gnu_arrow_count = ();
948 $line_start_index_to_go = 0;
949 $max_gnu_item_index = UNDEFINED_INDEX;
950 $index_max_forced_break = UNDEFINED_INDEX;
951 $max_index_to_go = UNDEFINED_INDEX;
952 $last_nonblank_index_to_go = UNDEFINED_INDEX;
953 $last_nonblank_type_to_go = '';
954 $last_nonblank_token_to_go = '';
955 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
956 $last_last_nonblank_type_to_go = '';
957 $last_last_nonblank_token_to_go = '';
958 $forced_breakpoint_count = 0;
959 $forced_breakpoint_undo_count = 0;
960 $rbrace_follower = undef;
961 $summed_lengths_to_go[0] = 0;
962 $comma_count_in_batch = 0;
963 $starting_in_quote = 0;
965 destroy_one_line_block();
969 sub keyword_group_scan {
972 # Manipulate blank lines around keyword groups (kgb* flags)
973 # Scan all lines looking for runs of consecutive lines beginning with
974 # selected keywords. Example keywords are 'my', 'our', 'local', ... but
975 # they may be anything. We will set flags requesting that blanks be
976 # inserted around and within them according to input parameters. Note
977 # that we are scanning the lines as they came in in the input stream, so
978 # they are not necessarily well formatted.
980 # The output of this sub is a return hash ref whose keys are the indexes of
981 # lines after which we desire a blank line. For line index i:
982 # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
983 # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
984 my $rhash_of_desires = {};
986 my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
987 my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
988 my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
989 my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
990 my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
992 # A range of sizes can be input with decimal notation like 'min.max' with
993 # any number of dots between the two numbers. Examples:
994 # string => min max matches
1002 my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
1003 if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
1004 || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
1007 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
1008 ignoring all -kgb flags
1010 return $rhash_of_desires;
1012 $Opt_size_min = 1 unless ($Opt_size_min);
1014 if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
1015 return $rhash_of_desires;
1018 # codes for $Opt_blanks_before and $Opt_blanks_after:
1019 # 0 = never (delete if exist)
1020 # 1 = stable (keep unchanged)
1021 # 2 = always (insert if missing)
1023 return $rhash_of_desires
1024 unless $Opt_size_min > 0
1025 && ( $Opt_blanks_before != 1
1026 || $Opt_blanks_after != 1
1027 || $Opt_blanks_inside
1028 || $Opt_blanks_delete );
1030 my $Opt_pattern = $keyword_group_list_pattern;
1031 my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
1032 my $Opt_repeat_count =
1033 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
1035 my $rlines = $self->{rlines};
1036 my $rLL = $self->{rLL};
1037 my $K_closing_container = $self->{K_closing_container};
1039 # variables for the current group and subgroups:
1040 my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
1044 # ($ibeg, $iend) = starting and ending line indexes of this entire group
1045 # $count = total number of keywords seen in this entire group
1046 # $level_beg = indententation level of this group
1047 # @group = [ $i, $token, $count ] =list of all keywords & blanks
1048 # @subgroup = $j, index of group where token changes
1049 # @iblanks = line indexes of blank lines in input stream in this group
1050 # where i=starting line index
1051 # token (the keyword)
1052 # count = number of this token in this subgroup
1053 # j = index in group where token changes
1055 # These vars will contain values for the most recently seen line:
1056 my ( $line_type, $CODE_type, $K_first, $K_last );
1058 my $number_of_groups_seen = 0;
1060 ####################
1061 # helper subroutines
1062 ####################
1064 my $insert_blank_after = sub {
1066 $rhash_of_desires->{$i} = 1;
1068 if ( defined( $rhash_of_desires->{$ip} )
1069 && $rhash_of_desires->{$ip} == 2 )
1071 $rhash_of_desires->{$ip} = 0;
1076 my $split_into_sub_groups = sub {
1078 # place blanks around long sub-groups of keywords
1080 return unless ($Opt_blanks_inside);
1082 # loop over sub-groups, index k
1083 push @subgroup, scalar @group;
1085 my $kend = @subgroup - 1;
1086 for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
1088 # index j runs through all keywords found
1089 my $j_b = $subgroup[ $k - 1 ];
1090 my $j_e = $subgroup[$k] - 1;
1092 # index i is the actual line number of a keyword
1093 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
1094 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
1095 my $num = $count_e - $count_b + 1;
1097 # This subgroup runs from line $ib to line $ie-1, but may contain
1099 if ( $num >= $Opt_size_min ) {
1101 # if there are blank lines, we require that at least $num lines
1102 # be non-blank up to the boundary with the next subgroup.
1103 my $nog_b = my $nog_e = 1;
1104 if ( @iblanks && !$Opt_blanks_delete ) {
1105 my $j_bb = $j_b + $num - 1;
1106 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
1107 $nog_b = $count_bb - $count_b + 1 == $num;
1109 my $j_ee = $j_e - ( $num - 1 );
1110 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
1111 $nog_e = $count_e - $count_ee + 1 == $num;
1113 if ( $nog_b && $k > $kbeg ) {
1114 $insert_blank_after->( $i_b - 1 );
1116 if ( $nog_e && $k < $kend ) {
1117 my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
1118 $insert_blank_after->( $i_ep - 1 );
1124 my $delete_if_blank = sub {
1127 # delete line $i if it is blank
1128 return unless ( $i >= 0 && $i < @{$rlines} );
1129 my $line_type = $rlines->[$i]->{_line_type};
1130 return if ( $line_type ne 'CODE' );
1131 my $code_type = $rlines->[$i]->{_code_type};
1132 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
1136 my $delete_inner_blank_lines = sub {
1138 # always remove unwanted trailing blank lines from our list
1139 return unless (@iblanks);
1140 while ( my $ibl = pop(@iblanks) ) {
1141 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
1145 # now mark mark interior blank lines for deletion if requested
1146 return unless ($Opt_blanks_delete);
1148 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
1152 my $end_group = sub {
1154 # end a group of keywords
1155 my ($bad_ending) = @_;
1156 if ( defined($ibeg) && $ibeg >= 0 ) {
1158 # then handle sufficiently large groups
1159 if ( $count >= $Opt_size_min ) {
1161 $number_of_groups_seen++;
1163 # do any blank deletions regardless of the count
1164 $delete_inner_blank_lines->();
1167 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
1169 # patch for hash bang line which is not currently marked as
1170 # a comment; mark it as a comment
1171 if ( $ibeg == 1 && !$code_type ) {
1172 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
1174 if ( $line_text && $line_text =~ /^#/ );
1177 # Do not insert a blank after a comment
1178 # (this could be subject to a flag in the future)
1179 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
1180 if ( $Opt_blanks_before == INSERT ) {
1181 $insert_blank_after->( $ibeg - 1 );
1184 elsif ( $Opt_blanks_before == DELETE ) {
1185 $delete_if_blank->( $ibeg - 1 );
1190 # We will only put blanks before code lines. We could loosen
1191 # this rule a little, but we have to be very careful because
1192 # for example we certainly don't want to drop a blank line
1193 # after a line like this:
1195 if ( $line_type eq 'CODE' && defined($K_first) ) {
1197 # - Do not put a blank before a line of different level
1198 # - Do not put a blank line if we ended the search badly
1199 # - Do not put a blank at the end of the file
1200 # - Do not put a blank line before a hanging side comment
1201 my $level = $rLL->[$K_first]->[_LEVEL_];
1202 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
1204 if ( $level == $level_beg
1207 && $iend < @{$rlines}
1208 && $CODE_type ne 'HSC' )
1210 if ( $Opt_blanks_after == INSERT ) {
1211 $insert_blank_after->($iend);
1213 elsif ( $Opt_blanks_after == DELETE ) {
1214 $delete_if_blank->( $iend + 1 );
1219 $split_into_sub_groups->();
1222 # reset for another group
1232 my $find_container_end = sub {
1234 # If the keyword lines ends with an open token, find the closing token
1235 # '$K_closing' so that we can easily skip past the contents of the
1237 return if ( $K_last <= $K_first );
1239 my $type_last = $rLL->[$KK]->[_TYPE_];
1240 my $tok_last = $rLL->[$KK]->[_TOKEN_];
1241 if ( $type_last eq '#' ) {
1242 $KK = $self->K_previous_nonblank($KK);
1243 $tok_last = $rLL->[$KK]->[_TOKEN_];
1245 if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
1247 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
1248 my $lev = $rLL->[$KK]->[_LEVEL_];
1249 if ( $lev == $level_beg ) {
1250 $K_closing = $K_closing_container->{$type_sequence};
1255 my $add_to_group = sub {
1256 my ( $i, $token, $level ) = @_;
1258 # End the previous group if we have reached the maximum
1260 if ( $Opt_size_max && @group >= $Opt_size_max ) {
1264 if ( @group == 0 ) {
1266 $level_beg = $level;
1274 if ( !@group || $token ne $group[-1]->[1] ) {
1275 push @subgroup, scalar(@group);
1277 push @group, [ $i, $token, $count ];
1279 # remember if this line ends in an open container
1280 $find_container_end->();
1285 ###################################
1286 # loop over all lines of the source
1287 ###################################
1290 foreach my $line_of_tokens ( @{$rlines} ) {
1294 if ( $Opt_repeat_count > 0
1295 && $number_of_groups_seen >= $Opt_repeat_count );
1300 $line_type = $line_of_tokens->{_line_type};
1302 # always end a group at non-CODE
1303 if ( $line_type ne 'CODE' ) { $end_group->(); next }
1305 $CODE_type = $line_of_tokens->{_code_type};
1307 # end any group at a format skipping line
1308 if ( $CODE_type && $CODE_type eq 'FS' ) {
1313 # continue in a verbatim (VB) type; it may be quoted text
1314 if ( $CODE_type eq 'VB' ) {
1315 if ( $ibeg >= 0 ) { $iend = $i; }
1319 # and continue in blank (BL) types
1320 if ( $CODE_type eq 'BL' ) {
1323 push @{iblanks}, $i;
1325 # propagate current subgroup token
1326 my $tok = $group[-1]->[1];
1327 push @group, [ $i, $tok, $count ];
1332 # examine the first token of this line
1333 my $rK_range = $line_of_tokens->{_rK_range};
1334 ( $K_first, $K_last ) = @{$rK_range};
1335 if ( !defined($K_first) ) {
1337 # Unexpected blank line..shouldn't happen
1338 # $rK_range should be defined for line type CODE
1340 "Programming Error: Unexpected Blank Line in sub 'keyword_group_scan'. Ignoring"
1342 return $rhash_of_desires;
1345 my $level = $rLL->[$K_first]->[_LEVEL_];
1346 my $type = $rLL->[$K_first]->[_TYPE_];
1347 my $token = $rLL->[$K_first]->[_TOKEN_];
1348 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
1350 # see if this is a code type we seek (i.e. comment)
1352 && $Opt_comment_pattern
1353 && $CODE_type =~ /$Opt_comment_pattern/o )
1356 my $tok = $CODE_type;
1358 # Continuing a group
1359 if ( $ibeg >= 0 && $level == $level_beg ) {
1360 $add_to_group->( $i, $tok, $level );
1366 # first end old group if any; we might be starting new
1367 # keywords at different level
1368 if ( $ibeg > 0 ) { $end_group->(); }
1369 $add_to_group->( $i, $tok, $level );
1374 # See if it is a keyword we seek, but never start a group in a
1375 # continuation line; the code may be badly formatted.
1378 && $token =~ /$Opt_pattern/o )
1381 # Continuing a keyword group
1382 if ( $ibeg >= 0 && $level == $level_beg ) {
1383 $add_to_group->( $i, $token, $level );
1386 # Start new keyword group
1389 # first end old group if any; we might be starting new
1390 # keywords at different level
1391 if ( $ibeg > 0 ) { $end_group->(); }
1392 $add_to_group->( $i, $token, $level );
1397 # This is not one of our keywords, but we are in a keyword group
1398 # so see if we should continue or quit
1399 elsif ( $ibeg >= 0 ) {
1401 # - bail out on a large level change; we may have walked into a
1402 # data structure or anoymous sub code.
1403 if ( $level > $level_beg + 1 || $level < $level_beg ) {
1408 # - keep going on a continuation line of the same level, since
1409 # it is probably a continuation of our previous keyword,
1410 # - and keep going past hanging side comments because we never
1411 # want to interrupt them.
1412 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
1413 || $CODE_type eq 'HSC' )
1419 # - continue if if we are within in a container which started with
1420 # the line of the previous keyword.
1421 if ( defined($K_closing) && $K_first <= $K_closing ) {
1423 # continue if entire line is within container
1424 if ( $K_last <= $K_closing ) { $iend = $i; next }
1426 # continue at ); or }; or ];
1427 my $KK = $K_closing + 1;
1428 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
1429 if ( $KK < $K_last ) {
1430 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
1431 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
1444 # - end the group if none of the above
1449 # not in a keyword group; continue
1453 # end of loop over all lines
1455 return $rhash_of_desires;
1460 # Loop over old lines to set new line break points
1463 my $rlines = $self->{rlines};
1465 # Note for RT#118553, leave only one newline at the end of a file.
1466 # Example code to do this is in comments below:
1467 # my $Opt_trim_ending_blank_lines = 0;
1468 # if ($Opt_trim_ending_blank_lines) {
1469 # while ( my $line_of_tokens = pop @{$rlines} ) {
1470 # my $line_type = $line_of_tokens->{_line_type};
1471 # if ( $line_type eq 'CODE' ) {
1472 # my $CODE_type = $line_of_tokens->{_code_type};
1473 # next if ( $CODE_type eq 'BL' );
1475 # push @{$rlines}, $line_of_tokens;
1480 # But while this would be a trivial update, it would have very undesirable
1481 # side effects when perltidy is run from within an editor on a small snippet.
1482 # So this is best done with a separate filter, such
1483 # as 'delete_ending_blank_lines.pl' in the examples folder.
1485 # Flag to prevent blank lines when POD occurs in a format skipping sect.
1486 my $in_format_skipping_section;
1488 # set locations for blanks around long runs of keywords
1489 my $rwant_blank_line_after = $self->keyword_group_scan();
1493 foreach my $line_of_tokens ( @{$rlines} ) {
1496 # insert blank lines requested for keyword sequences
1498 && defined( $rwant_blank_line_after->{ $i - 1 } )
1499 && $rwant_blank_line_after->{ $i - 1 } == 1 )
1501 $self->want_blank_line();
1504 my $last_line_type = $line_type;
1505 $line_type = $line_of_tokens->{_line_type};
1506 my $input_line = $line_of_tokens->{_line_text};
1508 # _line_type codes are:
1509 # SYSTEM - system-specific code before hash-bang line
1510 # CODE - line of perl code (including comments)
1511 # POD_START - line starting pod, such as '=head'
1512 # POD - pod documentation text
1513 # POD_END - last line of pod section, '=cut'
1514 # HERE - text of here-document
1515 # HERE_END - last line of here-doc (target word)
1516 # FORMAT - format section
1517 # FORMAT_END - last line of format section, '.'
1518 # DATA_START - __DATA__ line
1519 # DATA - unidentified text following __DATA__
1520 # END_START - __END__ line
1521 # END - unidentified text following __END__
1522 # ERROR - we are in big trouble, probably not a perl script
1524 # put a blank line after an =cut which comes before __END__ and __DATA__
1525 # (required by podchecker)
1526 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
1527 $file_writer_object->reset_consecutive_blank_lines();
1528 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
1529 $self->want_blank_line();
1533 # handle line of code..
1534 if ( $line_type eq 'CODE' ) {
1536 my $CODE_type = $line_of_tokens->{_code_type};
1537 $in_format_skipping_section = $CODE_type eq 'FS';
1539 # Handle blank lines
1540 if ( $CODE_type eq 'BL' ) {
1542 # If keep-old-blank-lines is zero, we delete all
1543 # old blank lines and let the blank line rules generate any
1546 # We also delete lines requested by the keyword-group logic
1547 my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
1548 && $rwant_blank_line_after->{$i} == 2 );
1550 # But the keep-old-blank-lines flag has priority over kgb flags
1551 $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
1553 if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
1555 $file_writer_object->write_blank_code_line(
1556 $rOpts_keep_old_blank_lines == 2 );
1557 $last_line_leading_type = 'b';
1563 # let logger see all non-blank lines of code
1564 my $output_line_number = get_output_line_number();
1565 black_box( $line_of_tokens, $output_line_number );
1568 # Handle Format Skipping (FS) and Verbatim (VB) Lines
1569 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
1570 $self->write_unindented_line("$input_line");
1571 $file_writer_object->reset_consecutive_blank_lines();
1575 # Handle block comment to be deleted
1576 elsif ( $CODE_type eq 'DEL' ) {
1581 # Handle all other lines of code
1582 $self->print_line_of_tokens($line_of_tokens);
1585 # handle line of non-code..
1591 if ( $line_type =~ /^POD/ ) {
1593 # Pod docs should have a preceding blank line. But stay
1594 # out of __END__ and __DATA__ sections, because
1595 # the user may be using this section for any purpose whatsoever
1596 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
1597 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
1598 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
1600 && !$in_format_skipping_section
1601 && $line_type eq 'POD_START'
1602 && !$saw_END_or_DATA_ )
1604 $self->want_blank_line();
1608 # leave the blank counters in a predictable state
1609 # after __END__ or __DATA__
1610 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
1611 $file_writer_object->reset_consecutive_blank_lines();
1612 $saw_END_or_DATA_ = 1;
1615 # write unindented non-code line
1616 if ( !$skip_line ) {
1617 if ($tee_line) { $file_writer_object->tee_on() }
1618 $self->write_unindented_line($input_line);
1619 if ($tee_line) { $file_writer_object->tee_off() }
1626 { ## Beginning of routine to check line hashes
1628 my %valid_line_hash;
1632 # These keys are defined for each line in the formatter
1633 # Each line must have exactly these quantities
1634 my @valid_line_keys = qw(
1637 _guessed_indentation_level
1644 _square_bracket_depth
1646 _ended_in_blank_token
1655 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1658 sub check_line_hashes {
1660 $self->check_self_hash();
1661 my $rlines = $self->{rlines};
1662 foreach my $rline ( @{$rlines} ) {
1663 my $iline = $rline->{_line_number};
1664 my $line_type = $rline->{_line_type};
1665 check_keys( $rline, \%valid_line_hash,
1666 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1671 } ## End check line hashes
1675 # We are caching tokenized lines as they arrive and converting them to the
1676 # format needed for the final formatting.
1677 my ( $self, $line_of_tokens_old ) = @_;
1678 my $rLL = $self->{rLL};
1679 my $Klimit = $self->{Klimit};
1680 my $rlines_new = $self->{rlines};
1683 my $line_of_tokens = {};
1688 _guessed_indentation_level
1694 _square_bracket_depth
1699 $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
1702 # Data needed by Logger
1703 $line_of_tokens->{_level_0} = 0;
1704 $line_of_tokens->{_ci_level_0} = 0;
1705 $line_of_tokens->{_nesting_blocks_0} = "";
1706 $line_of_tokens->{_nesting_tokens_0} = "";
1708 # Needed to avoid trimming quotes
1709 $line_of_tokens->{_ended_in_blank_token} = undef;
1711 my $line_type = $line_of_tokens_old->{_line_type};
1712 my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
1713 if ( $line_type eq 'CODE' ) {
1715 my $rtokens = $line_of_tokens_old->{_rtokens};
1716 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
1717 my $rblock_type = $line_of_tokens_old->{_rblock_type};
1718 my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
1719 my $rcontainer_environment =
1720 $line_of_tokens_old->{_rcontainer_environment};
1721 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
1722 my $rlevels = $line_of_tokens_old->{_rlevels};
1723 my $rslevels = $line_of_tokens_old->{_rslevels};
1724 my $rci_levels = $line_of_tokens_old->{_rci_levels};
1725 my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
1726 my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
1728 my $jmax = @{$rtokens} - 1;
1730 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
1731 foreach my $j ( 0 .. $jmax ) {
1733 # Clip negative nesting depths to zero to avoid problems.
1734 # Negative values can occur in files with unbalanced containers
1735 my $slevel = $rslevels->[$j];
1736 if ( $slevel < 0 ) { $slevel = 0 }
1741 _BLOCK_TYPE_, _CONTAINER_TYPE_,
1742 _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
1743 _LEVEL_, _LEVEL_TRUE_,
1744 _SLEVEL_, _CI_LEVEL_,
1748 $rtokens->[$j], $rtoken_type->[$j],
1749 $rblock_type->[$j], $rcontainer_type->[$j],
1750 $rcontainer_environment->[$j], $rtype_sequence->[$j],
1751 $rlevels->[$j], $rlevels->[$j],
1752 $slevel, $rci_levels->[$j],
1755 push @{$rLL}, \@tokary;
1758 $Klimit = @{$rLL} - 1;
1760 # Need to remember if we can trim the input line
1761 $line_of_tokens->{_ended_in_blank_token} =
1762 $rtoken_type->[$jmax] eq 'b';
1764 $line_of_tokens->{_level_0} = $rlevels->[0];
1765 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
1766 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
1767 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
1771 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
1772 $line_of_tokens->{_code_type} = "";
1773 $self->{Klimit} = $Klimit;
1775 push @{$rlines_new}, $line_of_tokens;
1779 sub initialize_whitespace_hashes {
1781 # initialize these global hashes, which control the use of
1782 # whitespace around tokens:
1787 # %space_after_keyword
1789 # Many token types are identical to the tokens themselves.
1790 # See the tokenizer for a complete list. Here are some special types:
1792 # f = semicolon in for statement
1795 # Note that :: is excluded since it should be contained in an identifier
1796 # Note that '->' is excluded because it never gets space
1797 # parentheses and brackets are excluded since they are handled specially
1798 # curly braces are included but may be overridden by logic, such as
1801 # NEW_TOKENS: create a whitespace rule here. This can be as
1802 # simple as adding your new letter to @spaces_both_sides, for
1805 my @opening_type = qw< L { ( [ >;
1806 @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
1808 my @closing_type = qw< R } ) ] >;
1809 @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
1811 my @spaces_both_sides = qw#
1812 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
1813 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
1814 &&= ||= //= <=> A k f w F n C Y U G v
1817 my @spaces_left_side = qw<
1818 t ! ~ m p { \ h pp mm Z j
1820 push( @spaces_left_side, '#' ); # avoids warning message
1822 my @spaces_right_side = qw<
1823 ; } ) ] R J ++ -- **=
1825 push( @spaces_right_side, ',' ); # avoids warning message
1827 # Note that we are in a BEGIN block here. Later in processing
1828 # the values of %want_left_space and %want_right_space
1829 # may be overridden by any user settings specified by the
1830 # -wls and -wrs parameters. However the binary_whitespace_rules
1831 # are hardwired and have priority.
1832 @want_left_space{@spaces_both_sides} =
1833 (1) x scalar(@spaces_both_sides);
1834 @want_right_space{@spaces_both_sides} =
1835 (1) x scalar(@spaces_both_sides);
1836 @want_left_space{@spaces_left_side} =
1837 (1) x scalar(@spaces_left_side);
1838 @want_right_space{@spaces_left_side} =
1839 (-1) x scalar(@spaces_left_side);
1840 @want_left_space{@spaces_right_side} =
1841 (-1) x scalar(@spaces_right_side);
1842 @want_right_space{@spaces_right_side} =
1843 (1) x scalar(@spaces_right_side);
1844 $want_left_space{'->'} = WS_NO;
1845 $want_right_space{'->'} = WS_NO;
1846 $want_left_space{'**'} = WS_NO;
1847 $want_right_space{'**'} = WS_NO;
1848 $want_right_space{'CORE::'} = WS_NO;
1850 # These binary_ws_rules are hardwired and have priority over the above
1851 # settings. It would be nice to allow adjustment by the user,
1852 # but it would be complicated to specify.
1854 # hash type information must stay tightly bound
1856 $binary_ws_rules{'i'}{'L'} = WS_NO;
1857 $binary_ws_rules{'i'}{'{'} = WS_YES;
1858 $binary_ws_rules{'k'}{'{'} = WS_YES;
1859 $binary_ws_rules{'U'}{'{'} = WS_YES;
1860 $binary_ws_rules{'i'}{'['} = WS_NO;
1861 $binary_ws_rules{'R'}{'L'} = WS_NO;
1862 $binary_ws_rules{'R'}{'{'} = WS_NO;
1863 $binary_ws_rules{'t'}{'L'} = WS_NO;
1864 $binary_ws_rules{'t'}{'{'} = WS_NO;
1865 $binary_ws_rules{'}'}{'L'} = WS_NO;
1866 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
1867 $binary_ws_rules{'$'}{'L'} = WS_NO;
1868 $binary_ws_rules{'$'}{'{'} = WS_NO;
1869 $binary_ws_rules{'@'}{'L'} = WS_NO;
1870 $binary_ws_rules{'@'}{'{'} = WS_NO;
1871 $binary_ws_rules{'='}{'L'} = WS_YES;
1872 $binary_ws_rules{'J'}{'J'} = WS_YES;
1874 # the following includes ') {'
1875 # as in : if ( xxx ) { yyy }
1876 $binary_ws_rules{']'}{'L'} = WS_NO;
1877 $binary_ws_rules{']'}{'{'} = WS_NO;
1878 $binary_ws_rules{')'}{'{'} = WS_YES;
1879 $binary_ws_rules{')'}{'['} = WS_NO;
1880 $binary_ws_rules{']'}{'['} = WS_NO;
1881 $binary_ws_rules{']'}{'{'} = WS_NO;
1882 $binary_ws_rules{'}'}{'['} = WS_NO;
1883 $binary_ws_rules{'R'}{'['} = WS_NO;
1885 $binary_ws_rules{']'}{'++'} = WS_NO;
1886 $binary_ws_rules{']'}{'--'} = WS_NO;
1887 $binary_ws_rules{')'}{'++'} = WS_NO;
1888 $binary_ws_rules{')'}{'--'} = WS_NO;
1890 $binary_ws_rules{'R'}{'++'} = WS_NO;
1891 $binary_ws_rules{'R'}{'--'} = WS_NO;
1893 $binary_ws_rules{'i'}{'Q'} = WS_YES;
1894 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
1896 # FIXME: we could to split 'i' into variables and functions
1897 # and have no space for functions but space for variables. For now,
1898 # I have a special patch in the special rules below
1899 $binary_ws_rules{'i'}{'('} = WS_NO;
1901 $binary_ws_rules{'w'}{'('} = WS_NO;
1902 $binary_ws_rules{'w'}{'{'} = WS_YES;
1905 } ## end initialize_whitespace_hashes
1907 sub set_whitespace_flags {
1909 # This routine examines each pair of nonblank tokens and
1910 # sets a flag indicating if white space is needed.
1912 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
1913 # BEFORE token $j is needed, with the following values:
1915 # WS_NO = -1 do not want a space before token $j
1916 # WS_OPTIONAL= 0 optional space or $j is a whitespace
1917 # WS_YES = 1 want a space before token $j
1921 my $rLL = $self->{rLL};
1923 my $rwhitespace_flags = [];
1925 my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
1926 $token, $type, $block_type, $input_line_no );
1927 my $j_tight_closing_paren = -1;
1935 $last_block_type = '';
1936 $last_input_line_no = 0;
1938 my $jmax = @{$rLL} - 1;
1942 # This is some logic moved to a sub to avoid deep nesting of if stmts
1943 my $ws_in_container = sub {
1947 if ( $j + 1 > $jmax ) { return (WS_NO) }
1949 # Patch to count '-foo' as single token so that
1950 # each of $a{-foo} and $a{foo} and $a{'foo'} do
1951 # not get spaces with default formatting.
1955 && $last_token eq '{'
1956 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
1958 # $j_next is where a closing token should be if
1959 # the container has a single token
1960 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
1962 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
1966 if ( $j_next > $jmax ) { return WS_NO }
1967 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
1968 my $type_next = $rLL->[$j_next]->[_TYPE_];
1970 # for tightness = 1, if there is just one token
1971 # within the matching pair, we will keep it tight
1973 $tok_next eq $matching_token{$last_token}
1975 # but watch out for this: [ [ ] (misc.t)
1976 && $last_token ne $token
1978 # double diamond is usually spaced
1984 # remember where to put the space for the closing paren
1985 $j_tight_closing_paren = $j_next;
1991 # main loop over all tokens to define the whitespace flags
1992 for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
1994 my $rtokh = $rLL->[$j];
1997 $rwhitespace_flags->[$j] = WS_OPTIONAL;
1999 if ( $rtokh->[_TYPE_] eq 'b' ) {
2003 # set a default value, to be changed as needed
2005 $last_token = $token;
2007 $last_block_type = $block_type;
2008 $last_input_line_no = $input_line_no;
2009 $token = $rtokh->[_TOKEN_];
2010 $type = $rtokh->[_TYPE_];
2011 $block_type = $rtokh->[_BLOCK_TYPE_];
2012 $input_line_no = $rtokh->[_LINE_INDEX_];
2014 #---------------------------------------------------------------
2015 # Whitespace Rules Section 1:
2016 # Handle space on the inside of opening braces.
2017 #---------------------------------------------------------------
2020 if ( $is_opening_type{$last_type} ) {
2022 $j_tight_closing_paren = -1;
2024 # let us keep empty matched braces together: () {} []
2026 if ( $token eq $matching_token{$last_token} ) {
2036 # we're considering the right of an opening brace
2037 # tightness = 0 means always pad inside with space
2038 # tightness = 1 means pad inside if "complex"
2039 # tightness = 2 means never pad inside with space
2042 if ( $last_type eq '{'
2043 && $last_token eq '{'
2044 && $last_block_type )
2046 $tightness = $rOpts_block_brace_tightness;
2048 else { $tightness = $tightness{$last_token} }
2050 #=============================================================
2051 # Patch for test problem <<snippets/fabrice_bug.in>>
2052 # We must always avoid spaces around a bare word beginning
2054 # my $before = ${^PREMATCH};
2055 # Because all of the following cause an error in perl:
2056 # my $before = ${ ^PREMATCH };
2057 # my $before = ${ ^PREMATCH};
2058 # my $before = ${^PREMATCH };
2059 # So if brace tightness flag is -bt=0 we must temporarily reset
2060 # to bt=1. Note that here we must set tightness=1 and not 2 so
2061 # that the closing space
2062 # is also avoided (via the $j_tight_closing_paren flag in coding)
2063 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2065 #=============================================================
2067 if ( $tightness <= 0 ) {
2070 elsif ( $tightness > 1 ) {
2074 $ws = $ws_in_container->($j);
2077 } # end setting space flag inside opening tokens
2080 if FORMATTER_DEBUG_FLAG_WHITE;
2082 #---------------------------------------------------------------
2083 # Whitespace Rules Section 2:
2084 # Handle space on inside of closing brace pairs.
2085 #---------------------------------------------------------------
2088 if ( $is_closing_type{$type} ) {
2090 if ( $j == $j_tight_closing_paren ) {
2092 $j_tight_closing_paren = -1;
2097 if ( !defined($ws) ) {
2100 if ( $type eq '}' && $token eq '}' && $block_type ) {
2101 $tightness = $rOpts_block_brace_tightness;
2103 else { $tightness = $tightness{$token} }
2105 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2108 } # end setting space flag inside closing tokens
2112 if FORMATTER_DEBUG_FLAG_WHITE;
2114 #---------------------------------------------------------------
2115 # Whitespace Rules Section 3:
2116 # Use the binary rule table.
2117 #---------------------------------------------------------------
2118 if ( !defined($ws) ) {
2119 $ws = $binary_ws_rules{$last_type}{$type};
2123 if FORMATTER_DEBUG_FLAG_WHITE;
2125 #---------------------------------------------------------------
2126 # Whitespace Rules Section 4:
2127 # Handle some special cases.
2128 #---------------------------------------------------------------
2129 if ( $token eq '(' ) {
2131 # This will have to be tweaked as tokenization changes.
2132 # We usually want a space at '} (', for example:
2133 # <<snippets/space1.in>>
2134 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2137 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2138 # At present, the above & block is marked as type L/R so this case
2139 # won't go through here.
2140 if ( $last_type eq '}' ) { $ws = WS_YES }
2142 # NOTE: some older versions of Perl had occasional problems if
2143 # spaces are introduced between keywords or functions and opening
2144 # parens. So the default is not to do this except is certain
2145 # cases. The current Perl seems to tolerate spaces.
2147 # Space between keyword and '('
2148 elsif ( $last_type eq 'k' ) {
2150 unless ( $rOpts_space_keyword_paren
2151 || $space_after_keyword{$last_token} );
2154 # Space between function and '('
2155 # -----------------------------------------------------
2156 # 'w' and 'i' checks for something like:
2157 # myfun( &myfun( ->myfun(
2158 # -----------------------------------------------------
2159 elsif (( $last_type =~ /^[wUG]$/ )
2160 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
2162 $ws = WS_NO unless ($rOpts_space_function_paren);
2165 # space between something like $i and ( in <<snippets/space2.in>>
2166 # for $i ( 0 .. 20 ) {
2167 # FIXME: eventually, type 'i' needs to be split into multiple
2168 # token types so this can be a hardwired rule.
2169 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2173 # allow constant function followed by '()' to retain no space
2174 elsif ($last_type eq 'C'
2175 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2181 # patch for SWITCH/CASE: make space at ']{' optional
2182 # since the '{' might begin a case or when block
2183 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2187 # keep space between 'sub' and '{' for anonymous sub definition
2188 if ( $type eq '{' ) {
2189 if ( $last_token eq 'sub' ) {
2193 # this is needed to avoid no space in '){'
2194 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
2196 # avoid any space before the brace or bracket in something like
2197 # @opts{'a','b',...}
2198 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
2203 elsif ( $type eq 'i' ) {
2205 # never a space before ->
2206 if ( $token =~ /^\-\>/ ) {
2211 # retain any space between '-' and bare word
2212 elsif ( $type eq 'w' || $type eq 'C' ) {
2213 $ws = WS_OPTIONAL if $last_type eq '-';
2215 # never a space before ->
2216 if ( $token =~ /^\-\>/ ) {
2221 # retain any space between '-' and bare word; for example
2222 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2223 # $myhash{USER-NAME}='steve';
2224 elsif ( $type eq 'm' || $type eq '-' ) {
2225 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2228 # always space before side comment
2229 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2231 # always preserver whatever space was used after a possible
2232 # filehandle (except _) or here doc operator
2235 && ( ( $last_type eq 'Z' && $last_token ne '_' )
2236 || $last_type eq 'h' )
2242 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
2243 # allow a space between a backslash and single or double quote
2244 # to avoid fooling html formatters
2245 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
2246 if ($rOpts_space_backslash_quote) {
2247 if ( $rOpts_space_backslash_quote == 1 ) {
2250 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2251 else { } # shouldnt happen
2260 if FORMATTER_DEBUG_FLAG_WHITE;
2262 #---------------------------------------------------------------
2263 # Whitespace Rules Section 5:
2264 # Apply default rules not covered above.
2265 #---------------------------------------------------------------
2267 # If we fall through to here, look at the pre-defined hash tables for
2268 # the two tokens, and:
2269 # if (they are equal) use the common value
2270 # if (either is zero or undef) use the other
2271 # if (either is -1) use it
2285 if ( !defined($ws) ) {
2286 my $wl = $want_left_space{$type};
2287 my $wr = $want_right_space{$last_type};
2288 if ( !defined($wl) ) { $wl = 0 }
2289 if ( !defined($wr) ) { $wr = 0 }
2290 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
2293 if ( !defined($ws) ) {
2296 "WS flag is undefined for tokens $last_token $token\n");
2299 # Treat newline as a whitespace. Otherwise, we might combine
2300 # 'Send' and '-recipients' here according to the above rules:
2301 # <<snippets/space3.in>>
2302 # my $msg = new Fax::Send
2303 # -recipients => $to,
2305 if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
2310 && ( $last_type !~ /^[Zh]$/ ) )
2313 # If this happens, we have a non-fatal but undesirable
2314 # hole in the above rules which should be patched.
2316 "WS flag is zero for tokens $last_token $token\n");
2319 $rwhitespace_flags->[$j] = $ws;
2321 FORMATTER_DEBUG_FLAG_WHITE && do {
2322 my $str = substr( $last_token, 0, 15 );
2323 $str .= ' ' x ( 16 - length($str) );
2324 if ( !defined($ws_1) ) { $ws_1 = "*" }
2325 if ( !defined($ws_2) ) { $ws_2 = "*" }
2326 if ( !defined($ws_3) ) { $ws_3 = "*" }
2327 if ( !defined($ws_4) ) { $ws_4 = "*" }
2329 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
2333 if ( $rOpts->{'tight-secret-operators'} ) {
2334 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
2336 return $rwhitespace_flags;
2337 } ## end sub set_whitespace_flags
2339 sub respace_tokens {
2342 return if $rOpts->{'indent-only'};
2344 # This routine makes all necessary changes to the tokenization after the
2345 # file has been read. This consists mostly of inserting and deleting spaces
2346 # according to the selected parameters. In a few cases non-space characters
2347 # are added, deleted or modified.
2349 # The old tokens are copied one-by-one, with changes, from the old
2350 # linear storage array to a new array.
2352 my $rLL = $self->{rLL};
2353 my $Klimit_old = $self->{Klimit};
2354 my $rlines = $self->{rlines};
2355 my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
2357 my $rLL_new = []; # This is the new array
2360 my $Kmax = @{$rLL} - 1;
2362 # Set the whitespace flags, which indicate the token spacing preference.
2363 my $rwhitespace_flags = $self->set_whitespace_flags();
2365 # we will be setting token lengths as we go
2366 my $cumulative_length = 0;
2368 # We also define these hash indexes giving container token array indexes
2369 # as a function of the container sequence numbers. For example,
2370 my $K_opening_container = {}; # opening [ { or (
2371 my $K_closing_container = {}; # closing ] } or )
2372 my $K_opening_ternary = {}; # opening ? of ternary
2373 my $K_closing_ternary = {}; # closing : of ternary
2375 # List of new K indexes of phantom semicolons
2376 # This will be needed if we want to undo them for iterations
2377 my $rK_phantom_semicolons = [];
2379 # Temporary hashes for adding semicolons
2380 ##my $rKfirst_new = {};
2382 # a sub to link preceding nodes forward to a new node type
2383 my $link_back = sub {
2384 my ( $Ktop, $key ) = @_;
2386 my $Kprev = $Ktop - 1;
2388 && !defined( $rLL_new->[$Kprev]->[$key] ) )
2390 $rLL_new->[$Kprev]->[$key] = $Ktop;
2395 # A sub to store one token in the new array
2396 # All new tokens must be stored by this sub so that it can update
2397 # all data structures on the fly.
2398 my $last_nonblank_type = ';';
2399 my $last_nonblank_token = ';';
2400 my $last_nonblank_block_type = '';
2401 my $store_token = sub {
2404 # This will be the index of this item in the new array
2405 my $KK_new = @{$rLL_new};
2407 # check for a sequenced item (i.e., container or ?/:)
2408 my $type_sequence = $item->[_TYPE_SEQUENCE_];
2409 if ($type_sequence) {
2411 $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
2413 my $token = $item->[_TOKEN_];
2414 if ( $is_opening_token{$token} ) {
2416 $K_opening_container->{$type_sequence} = $KK_new;
2418 elsif ( $is_closing_token{$token} ) {
2420 $K_closing_container->{$type_sequence} = $KK_new;
2423 # These are not yet used but could be useful
2425 if ( $token eq '?' ) {
2426 $K_opening_ternary->{$type_sequence} = $KK_new;
2428 elsif ( $token eq ':' ) {
2429 $K_closing_ternary->{$type_sequence} = $KK_new;
2433 Fault("Ugh: shouldn't happen");
2438 # find the length of this token
2439 my $token_length = length( $item->[_TOKEN_] );
2441 # and update the cumulative length
2442 $cumulative_length += $token_length;
2444 # Save the length sum to just AFTER this token
2445 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
2447 my $type = $item->[_TYPE_];
2449 # trim side comments
2450 if ( $type eq '#' ) {
2451 $item->[_TOKEN_] =~ s/\s*$//;
2454 if ( $type && $type ne 'b' && $type ne '#' ) {
2455 $last_nonblank_type = $type;
2456 $last_nonblank_token = $item->[_TOKEN_];
2457 $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
2460 # and finally, add this item to the new array
2461 push @{$rLL_new}, $item;
2464 my $store_token_and_space = sub {
2465 my ( $item, $want_space ) = @_;
2467 # store a token with preceding space if requested and needed
2469 # First store the space
2472 && $rLL_new->[-1]->[_TYPE_] ne 'b'
2473 && $rOpts_add_whitespace )
2475 my $rcopy = copy_token_as_type( $item, 'b', ' ' );
2476 $rcopy->[_LINE_INDEX_] =
2477 $rLL_new->[-1]->[_LINE_INDEX_];
2478 $store_token->($rcopy);
2482 $store_token->($item);
2488 my $Kn = $self->K_next_nonblank($KK);
2489 while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
2491 $Kn = $self->K_next_nonblank($Kn);
2496 my $add_phantom_semicolon = sub {
2500 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
2501 return unless ( defined($Kp) );
2503 # we are only adding semicolons for certain block types
2504 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
2506 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
2507 || $block_type =~ /^(sub|package)/
2508 || $block_type =~ /^\w+\:$/ );
2510 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
2512 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
2513 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
2515 # Do not add a semicolon if...
2519 # it would follow a comment (and be isolated)
2520 $previous_nonblank_type eq '#'
2522 # it follows a code block ( because they are not always wanted
2523 # there and may add clutter)
2524 || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
2526 # it would follow a label
2527 || $previous_nonblank_type eq 'J'
2529 # it would be inside a 'format' statement (and cause syntax error)
2530 || ( $previous_nonblank_type eq 'k'
2531 && $previous_nonblank_token =~ /format/ )
2533 # if it would prevent welding two containers
2534 || $rpaired_to_inner_container->{$type_sequence}
2538 # We will insert an empty semicolon here as a placeholder. Later, if
2539 # it becomes the last token on a line, we will bring it to life. The
2540 # advantage of doing this is that (1) we just have to check line
2541 # endings, and (2) the phantom semicolon has zero width and therefore
2542 # won't cause needless breaks of one-line blocks.
2544 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
2545 && $want_left_space{';'} == WS_NO )
2548 # convert the blank into a semicolon..
2549 # be careful: we are working on the new stack top
2550 # on a token which has been stored.
2551 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
2553 # Convert the existing blank to:
2554 # a phantom semicolon for one_line_block option = 0 or 1
2555 # a real semicolon for one_line_block option = 2
2556 my $tok = $rOpts_one_line_block_semicolons == 2 ? ';' : '';
2558 $rLL_new->[$Ktop]->[_TOKEN_] = $tok; # zero length if phantom
2559 $rLL_new->[$Ktop]->[_TYPE_] = ';';
2560 $rLL_new->[$Ktop]->[_SLEVEL_] =
2561 $rLL->[$KK]->[_SLEVEL_];
2563 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
2565 # Then store a new blank
2566 $store_token->($rcopy);
2570 # insert a new token
2571 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
2572 $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
2573 $store_token->($rcopy);
2574 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
2580 # Check that a quote looks okay
2581 # This sub works but needs to by sync'd with the log file output
2582 # before it can be used.
2583 my ( $KK, $Kfirst ) = @_;
2584 my $token = $rLL->[$KK]->[_TOKEN_];
2585 note_embedded_tab() if ( $token =~ "\t" );
2587 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
2588 return unless ( defined($Kp) );
2589 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
2590 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
2592 my $previous_nonblank_type_2 = 'b';
2593 my $previous_nonblank_token_2 = "";
2594 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
2595 if ( defined($Kpp) ) {
2596 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
2597 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
2600 my $Kn = $self->K_next_nonblank($KK);
2601 my $next_nonblank_token = "";
2602 if ( defined($Kn) ) {
2603 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
2606 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
2607 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
2609 # make note of something like '$var = s/xxx/yyy/;'
2610 # in case it should have been '$var =~ s/xxx/yyy/;'
2612 $token =~ /^(s|tr|y|m|\/)/
2613 && $previous_nonblank_token =~ /^(=|==|!=)$/
2615 # preceded by simple scalar
2616 && $previous_nonblank_type_2 eq 'i'
2617 && $previous_nonblank_token_2 =~ /^\$/
2619 # followed by some kind of termination
2620 # (but give complaint if we can not see far enough ahead)
2621 && $next_nonblank_token =~ /^[; \)\}]$/
2623 # scalar is not declared
2624 && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
2627 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
2629 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
2634 # Main loop over all lines of the file
2639 # Testing option to break qw. Do not use; it can make a mess.
2640 my $ALLOW_BREAK_MULTILINE_QW = 0;
2641 my $in_multiline_qw;
2642 foreach my $line_of_tokens ( @{$rlines} ) {
2644 $input_line_number = $line_of_tokens->{_line_number};
2645 my $last_line_type = $line_type;
2646 $line_type = $line_of_tokens->{_line_type};
2647 next unless ( $line_type eq 'CODE' );
2648 my $last_CODE_type = $CODE_type;
2649 $CODE_type = $line_of_tokens->{_code_type};
2650 my $rK_range = $line_of_tokens->{_rK_range};
2651 my ( $Kfirst, $Klast ) = @{$rK_range};
2652 next unless defined($Kfirst);
2654 # Check for correct sequence of token indexes...
2655 # An error here means that sub write_line() did not correctly
2656 # package the tokenized lines as it received them.
2657 if ( defined($last_K_out) ) {
2658 if ( $Kfirst != $last_K_out + 1 ) {
2660 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
2665 if ( $Kfirst != 0 ) {
2666 Fault("Program Bug: first K is $Kfirst but should be 0");
2669 $last_K_out = $Klast;
2671 # Handle special lines of code
2672 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
2674 # CODE_types are as follows.
2676 # 'VB' = Verbatim - line goes out verbatim
2677 # 'FS' = Format Skipping - line goes out verbatim, no blanks
2678 # 'IO' = Indent Only - only indentation may be changed
2679 # 'NIN' = No Internal Newlines - line does not get broken
2680 # 'HSC'=Hanging Side Comment - fix this hanging side comment
2681 # 'BC'=Block Comment - an ordinary full line comment
2682 # 'SBC'=Static Block Comment - a block comment which does not get
2684 # 'SBCX'=Static Block Comment Without Leading Space
2685 # 'DEL'=Delete this line
2686 # 'VER'=VERSION statement
2687 # '' or (undefined) - no restructions
2689 # For a hanging side comment we insert an empty quote before
2690 # the comment so that it becomes a normal side comment and
2691 # will be aligned by the vertical aligner
2692 if ( $CODE_type eq 'HSC' ) {
2694 # Safety Check: This must be a line with one token (a comment)
2695 my $rtoken_vars = $rLL->[$Kfirst];
2696 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
2698 # Note that even if the flag 'noadd-whitespace' is set, we
2699 # will make an exception here and allow a blank to be
2700 # inserted to push the comment to the right. We can think
2701 # of this as an adjustment of indentation rather than
2702 # whitespace between tokens. This will also prevent the
2703 # hanging side comment from getting converted to a block
2704 # comment if whitespace gets deleted, as for example with
2705 # the -extrude and -mangle options.
2706 my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
2707 $store_token->($rcopy);
2708 $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
2709 $store_token->($rcopy);
2710 $store_token->($rtoken_vars);
2715 # This line was mis-marked by sub scan_comment
2717 "Program bug. A hanging side comment has been mismarked"
2722 # Copy tokens unchanged
2723 foreach my $KK ( $Kfirst .. $Klast ) {
2724 $store_token->( $rLL->[$KK] );
2729 # Handle normal line..
2731 # Insert any essential whitespace between lines
2732 # if last line was normal CODE.
2733 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
2734 # because comments may disappear.
2735 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
2736 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
2737 my $Kp = $self->K_previous_code( undef, $rLL_new );
2738 if ( $last_line_type eq 'CODE'
2739 && $type_next ne 'b'
2742 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
2743 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
2745 my ( $token_pp, $type_pp );
2746 my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
2747 if ( defined($Kpp) ) {
2748 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
2749 $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
2757 is_essential_whitespace(
2758 $token_pp, $type_pp, $token_p,
2759 $type_p, $token_next, $type_next,
2764 # Copy this first token as blank, but use previous line number
2765 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
2766 $rcopy->[_LINE_INDEX_] =
2767 $rLL_new->[-1]->[_LINE_INDEX_];
2768 $store_token->($rcopy);
2772 # loop to copy all tokens on this line, with any changes
2774 for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
2775 $rtoken_vars = $rLL->[$KK];
2776 my $token = $rtoken_vars->[_TOKEN_];
2777 my $type = $rtoken_vars->[_TYPE_];
2778 my $last_type_sequence = $type_sequence;
2779 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
2781 # Handle a blank space ...
2782 if ( $type eq 'b' ) {
2784 # Delete it if not wanted by whitespace rules
2785 # or we are deleting all whitespace
2786 # Note that whitespace flag is a flag indicating whether a
2787 # white space BEFORE the token is needed
2788 next if ( $KK >= $Klast ); # skip terminal blank
2789 my $Knext = $KK + 1;
2790 my $ws = $rwhitespace_flags->[$Knext];
2792 || $rOpts_delete_old_whitespace )
2795 # FIXME: maybe switch to using _new
2796 my $Kp = $self->K_previous_nonblank($KK);
2797 next unless defined($Kp);
2798 my $token_p = $rLL->[$Kp]->[_TOKEN_];
2799 my $type_p = $rLL->[$Kp]->[_TYPE_];
2801 my ( $token_pp, $type_pp );
2803 #my $Kpp = $K_previous_nonblank->($Kp);
2804 my $Kpp = $self->K_previous_nonblank($Kp);
2805 if ( defined($Kpp) ) {
2806 $token_pp = $rLL->[$Kpp]->[_TOKEN_];
2807 $type_pp = $rLL->[$Kpp]->[_TYPE_];
2813 my $token_next = $rLL->[$Knext]->[_TOKEN_];
2814 my $type_next = $rLL->[$Knext]->[_TYPE_];
2816 my $do_not_delete = is_essential_whitespace(
2817 $token_pp, $type_pp, $token_p,
2818 $type_p, $token_next, $type_next,
2821 next unless ($do_not_delete);
2824 # make it just one character if allowed
2825 if ($rOpts_add_whitespace) {
2826 $rtoken_vars->[_TOKEN_] = ' ';
2828 $store_token->($rtoken_vars);
2832 # Handle a nonblank token...
2834 # check for a qw quote
2835 if ( $type eq 'q' ) {
2837 # trim blanks from right of qw quotes
2838 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
2841 $rtoken_vars->[_TOKEN_] = $token;
2842 note_embedded_tab() if ( $token =~ "\t" );
2844 if ($in_multiline_qw) {
2846 # If we are at the end of a multiline qw ..
2847 if ( $in_multiline_qw == $KK ) {
2849 # Split off the closing delimiter character
2850 # so that the formatter can put a line break there if necessary
2852 my $part2 = substr( $part1, -1, 1, "" );
2856 copy_token_as_type( $rtoken_vars, 'q', $part1 );
2857 $store_token->($rcopy);
2859 $rtoken_vars->[_TOKEN_] = $token;
2862 $in_multiline_qw = undef;
2864 # store without preceding blank
2865 $store_token->($rtoken_vars);
2869 # continuing a multiline qw
2870 $store_token->($rtoken_vars);
2877 # we are encountered new qw token...see if multiline
2878 my $K_end = $K_end_q->($KK);
2879 if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
2881 # Starting multiline qw...
2882 # set flag equal to the ending K
2883 $in_multiline_qw = $K_end;
2885 # Split off the leading part
2886 # so that the formatter can put a line break there if necessary
2887 if ( $token =~ /^(qw\s*.)(.*)$/ ) {
2892 copy_token_as_type( $rtoken_vars, 'q',
2894 $store_token_and_space->(
2895 $rcopy, $rwhitespace_flags->[$KK] == WS_YES
2898 $rtoken_vars->[_TOKEN_] = $token;
2900 # Second part goes without intermediate blank
2901 $store_token->($rtoken_vars);
2908 # this is a new single token qw -
2909 # store with possible preceding blank
2910 $store_token_and_space->(
2911 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
2916 } ## end if ( $type eq 'q' )
2918 # Modify certain tokens here for whitespace
2919 # The following is not yet done, but could be:
2921 elsif ( $type =~ /^[wit]$/ ) {
2923 # Examples: <<snippets/space1.in>>
2924 # change '$ var' to '$var' etc
2925 # '-> new' to '->new'
2926 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
2928 $rtoken_vars->[_TOKEN_] = $token;
2931 # Split identifiers with leading arrows, inserting blanks if
2932 # necessary. It is easier and safer here than in the
2933 # tokenizer. For example '->new' becomes two tokens, '->' and
2934 # 'new' with a possible blank between.
2936 # Note: there is a related patch in sub set_whitespace_flags
2937 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
2938 my $token_save = $1;
2939 my $type_save = $type;
2941 # store a blank to left of arrow if necessary
2942 my $Kprev = $self->K_previous_nonblank($KK);
2943 if ( defined($Kprev)
2944 && $rLL->[$Kprev]->[_TYPE_] ne 'b'
2945 && $rOpts_add_whitespace
2946 && $want_left_space{'->'} == WS_YES )
2949 copy_token_as_type( $rtoken_vars, 'b', ' ' );
2950 $store_token->($rcopy);
2953 # then store the arrow
2954 my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
2955 $store_token->($rcopy);
2957 # then reset the current token to be the remainder,
2958 # and reset the whitespace flag according to the arrow
2959 $token = $rtoken_vars->[_TOKEN_] = $token_save;
2960 $type = $rtoken_vars->[_TYPE_] = $type_save;
2961 $store_token->($rtoken_vars);
2965 if ( $token =~ /$SUB_PATTERN/ ) {
2967 # -spp = 0 : no space before opening prototype paren
2968 # -spp = 1 : stable (follow input spacing)
2969 # -spp = 2 : always space before opening prototype paren
2970 my $spp = $rOpts->{'space-prototype-paren'};
2971 if ( defined($spp) ) {
2972 if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
2973 elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
2976 # one space max, and no tabs
2977 $token =~ s/\s+/ /g;
2978 $rtoken_vars->[_TOKEN_] = $token;
2981 # trim identifiers of trailing blanks which can occur
2982 # under some unusual circumstances, such as if the
2983 # identifier 'witch' has trailing blanks on input here:
2987 # () # prototype may be on new line ...
2989 if ( $type eq 'i' ) {
2990 $token =~ s/\s+$//g;
2991 $rtoken_vars->[_TOKEN_] = $token;
2995 # change 'LABEL :' to 'LABEL:'
2996 elsif ( $type eq 'J' ) {
2998 $rtoken_vars->[_TOKEN_] = $token;
3001 # patch to add space to something like "x10"
3002 # This avoids having to split this token in the pre-tokenizer
3003 elsif ( $type eq 'n' ) {
3004 if ( $token =~ /^x\d+/ ) {
3006 $rtoken_vars->[_TOKEN_] = $token;
3010 # check a quote for problems
3011 elsif ( $type eq 'Q' ) {
3012 $check_Q->( $KK, $Kfirst );
3016 elsif ( $type eq ';' ) {
3018 # Remove unnecessary semicolons, but not after bare
3019 # blocks, where it could be unsafe if the brace is
3022 $rOpts->{'delete-semicolons'}
3025 $last_nonblank_type eq '}'
3027 $is_block_without_semicolon{
3028 $last_nonblank_block_type}
3029 || $last_nonblank_block_type =~ /$SUB_PATTERN/
3030 || $last_nonblank_block_type =~ /^\w+:$/ )
3032 || $last_nonblank_type eq ';'
3037 # This looks like a deletable semicolon, but even if a
3038 # semicolon can be deleted it is necessarily best to do so.
3039 # We apply these additional rules for deletion:
3040 # - Always ok to delete a ';' at the end of a line
3041 # - Never delete a ';' before a '#' because it would
3042 # promote it to a block comment.
3043 # - If a semicolon is not at the end of line, then only
3044 # delete if it is followed by another semicolon or closing
3045 # token. This includes the comment rule. It may take
3046 # two passes to get to a final state, but it is a little
3047 # safer. For example, keep the first semicolon here:
3048 # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
3049 # It is not required but adds some clarity.
3050 my $ok_to_delete = 1;
3051 if ( $KK < $Klast ) {
3052 my $Kn = $self->K_next_nonblank($KK);
3053 if ( defined($Kn) && $Kn <= $Klast ) {
3054 my $next_nonblank_token_type =
3055 $rLL->[$Kn]->[_TYPE_];
3056 $ok_to_delete = $next_nonblank_token_type eq ';'
3057 || $next_nonblank_token_type eq '}';
3061 if ($ok_to_delete) {
3062 note_deleted_semicolon();
3066 write_logfile_entry("Extra ';'\n");
3071 elsif ($type_sequence) {
3073 # if ( $is_opening_token{$token} ) {
3076 if ( $is_closing_token{$token} ) {
3078 # Insert a tentative missing semicolon if the next token is
3079 # a closing block brace
3084 # not preceded by a ';'
3085 && $last_nonblank_type ne ';'
3087 # and this is not a VERSION stmt (is all one line, we are not
3088 # inserting semicolons on one-line blocks)
3089 && $CODE_type ne 'VER'
3091 # and we are allowed to add semicolons
3092 && $rOpts->{'add-semicolons'}
3095 $add_phantom_semicolon->($KK);
3100 # Store this token with possible previous blank
3101 $store_token_and_space->(
3102 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
3108 # Reset memory to be the new array
3109 $self->{rLL} = $rLL_new;
3110 $self->set_rLL_max_index();
3111 $self->{K_opening_container} = $K_opening_container;
3112 $self->{K_closing_container} = $K_closing_container;
3113 $self->{K_opening_ternary} = $K_opening_ternary;
3114 $self->{K_closing_ternary} = $K_closing_ternary;
3115 $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
3117 # make sure the new array looks okay
3118 $self->check_token_array();
3120 # reset the token limits of each line
3121 $self->resync_lines_and_tokens();
3128 my $Last_line_had_side_comment;
3129 my $In_format_skipping_section;
3130 my $Saw_VERSION_in_this_file;
3134 my $rlines = $self->{rlines};
3136 $Last_line_had_side_comment = undef;
3137 $In_format_skipping_section = undef;
3138 $Saw_VERSION_in_this_file = undef;
3140 # Loop over all lines
3141 foreach my $line_of_tokens ( @{$rlines} ) {
3142 my $line_type = $line_of_tokens->{_line_type};
3143 next unless ( $line_type eq 'CODE' );
3144 my $CODE_type = $self->get_CODE_type($line_of_tokens);
3145 $line_of_tokens->{_code_type} = $CODE_type;
3151 my ( $self, $line_of_tokens ) = @_;
3153 # We are looking at a line of code and setting a flag to
3154 # describe any special processing that it requires
3156 # Possible CODE_types are as follows.
3158 # 'VB' = Verbatim - line goes out verbatim
3159 # 'IO' = Indent Only - line goes out unchanged except for indentation
3160 # 'NIN' = No Internal Newlines - line does not get broken
3161 # 'HSC'=Hanging Side Comment - fix this hanging side comment
3162 # 'BC'=Block Comment - an ordinary full line comment
3163 # 'SBC'=Static Block Comment - a block comment which does not get
3165 # 'SBCX'=Static Block Comment Without Leading Space
3166 # 'DEL'=Delete this line
3167 # 'VER'=VERSION statement
3168 # '' or (undefined) - no restructions
3170 my $rLL = $self->{rLL};
3171 my $Klimit = $self->{Klimit};
3173 my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
3174 my $no_internal_newlines = 1 - $rOpts_add_newlines;
3175 if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
3177 # extract what we need for this line..
3179 # Global value for error messages:
3180 $input_line_number = $line_of_tokens->{_line_number};
3182 my $rK_range = $line_of_tokens->{_rK_range};
3183 my ( $Kfirst, $Klast ) = @{$rK_range};
3185 if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
3186 my $input_line = $line_of_tokens->{_line_text};
3187 my $in_continued_quote = my $starting_in_quote =
3188 $line_of_tokens->{_starting_in_quote};
3189 my $in_quote = $line_of_tokens->{_ending_in_quote};
3190 my $ending_in_quote = $in_quote;
3191 my $guessed_indentation_level =
3192 $line_of_tokens->{_guessed_indentation_level};
3194 my $is_static_block_comment = 0;
3196 # Handle a continued quote..
3197 if ($in_continued_quote) {
3199 # A line which is entirely a quote or pattern must go out
3200 # verbatim. Note: the \n is contained in $input_line.
3202 if ( ( $input_line =~ "\t" ) ) {
3203 note_embedded_tab();
3205 $Last_line_had_side_comment = 0;
3210 my $is_block_comment =
3211 ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
3213 # Write line verbatim if we are in a formatting skip section
3214 if ($In_format_skipping_section) {
3215 $Last_line_had_side_comment = 0;
3217 # Note: extra space appended to comment simplifies pattern matching
3218 if ( $is_block_comment
3219 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
3220 /$format_skipping_pattern_end/o )
3222 $In_format_skipping_section = 0;
3223 write_logfile_entry("Exiting formatting skip section\n");
3228 # See if we are entering a formatting skip section
3229 if ( $rOpts_format_skipping
3230 && $is_block_comment
3231 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
3232 /$format_skipping_pattern_begin/o )
3234 $In_format_skipping_section = 1;
3235 write_logfile_entry("Entering formatting skip section\n");
3236 $Last_line_had_side_comment = 0;
3240 # ignore trailing blank tokens (they will get deleted later)
3241 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
3245 # Handle a blank line..
3247 $Last_line_had_side_comment = 0;
3251 # see if this is a static block comment (starts with ## by default)
3252 my $is_static_block_comment_without_leading_space = 0;
3253 if ( $is_block_comment
3254 && $rOpts->{'static-block-comments'}
3255 && $input_line =~ /$static_block_comment_pattern/o )
3257 $is_static_block_comment = 1;
3258 $is_static_block_comment_without_leading_space =
3259 substr( $input_line, 0, 1 ) eq '#';
3262 # Check for comments which are line directives
3263 # Treat exactly as static block comments without leading space
3264 # reference: perlsyn, near end, section Plain Old Comments (Not!)
3265 # example: '# line 42 "new_filename.plx"'
3268 && $input_line =~ /^\# \s*
3270 (?:\s("?)([^"]+)\2)? \s*
3274 $is_static_block_comment = 1;
3275 $is_static_block_comment_without_leading_space = 1;
3278 # look for hanging side comment
3281 && $Last_line_had_side_comment # last line had side comment
3282 && $input_line =~ /^\s/ # there is some leading space
3283 && !$is_static_block_comment # do not make static comment hanging
3284 && $rOpts->{'hanging-side-comments'} # user is allowing
3285 # hanging side comments
3289 $Last_line_had_side_comment = 1;
3293 # remember if this line has a side comment
3294 $Last_line_had_side_comment =
3295 ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
3297 # Handle a block (full-line) comment..
3298 if ($is_block_comment) {
3300 if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
3302 # TRIM COMMENTS -- This could be turned off as a option
3303 $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
3305 if ($is_static_block_comment_without_leading_space) {
3308 elsif ($is_static_block_comment) {
3316 # Patch needed for MakeMaker. Do not break a statement
3317 # in which $VERSION may be calculated. See MakeMaker.pm;
3318 # this is based on the coding in it.
3319 # The first line of a file that matches this will be eval'd:
3320 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
3322 # *VERSION = \'1.01';
3323 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
3324 # We will pass such a line straight through without breaking
3325 # it unless -npvl is used.
3327 # Patch for problem reported in RT #81866, where files
3328 # had been flattened into a single line and couldn't be
3329 # tidied without -npvl. There are two parts to this patch:
3330 # First, it is not done for a really long line (80 tokens for now).
3331 # Second, we will only allow up to one semicolon
3332 # before the VERSION. We need to allow at least one semicolon
3333 # for statements like this:
3334 # require Exporter; our $VERSION = $Exporter::VERSION;
3335 # where both statements must be on a single line for MakeMaker
3337 my $is_VERSION_statement = 0;
3338 if ( !$Saw_VERSION_in_this_file
3341 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
3343 $Saw_VERSION_in_this_file = 1;
3344 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
3351 sub find_nested_pairs {
3354 my $rLL = $self->{rLL};
3355 return unless ( defined($rLL) && @{$rLL} );
3357 # We define an array of pairs of nested containers
3360 # We also set the following hash values to identify container pairs for
3361 # which the opening and closing tokens are adjacent in the token stream:
3362 # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
3363 # $seqno_in are the seqence numbers of the outer and inner containers of
3364 # the pair We need these later to decide if we can insert a missing
3366 my $rpaired_to_inner_container = {};
3368 # This local hash remembers if an outer container has a close following
3370 # The key is the outer sequence number
3371 # The value is the token_hash of the inner container
3373 my %has_close_following_opening;
3375 # Names of calling routines can either be marked as 'i' or 'w',
3376 # and they may invoke a sub call with an '->'. We will consider
3377 # any consecutive string of such types as a single unit when making
3378 # weld decisions. We also allow a leading !
3379 my $is_name_type = {
3389 return $type && $is_name_type->{$type};
3393 my $last_last_container;
3394 my $last_nonblank_token_vars;
3397 my $nonblank_token_count = 0;
3399 # loop over all tokens
3400 foreach my $rtoken_vars ( @{$rLL} ) {
3402 my $type = $rtoken_vars->[_TYPE_];
3404 next if ( $type eq 'b' );
3406 # long identifier-like items are counted as a single item
3407 $nonblank_token_count++
3408 unless ( $is_name->($type)
3409 && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
3411 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
3412 if ($type_sequence) {
3414 my $token = $rtoken_vars->[_TOKEN_];
3416 if ( $is_opening_token{$token} ) {
3418 # following previous opening token ...
3419 if ( $last_container
3420 && $is_opening_token{ $last_container->[_TOKEN_] } )
3423 # adjacent to this one
3424 my $tok_diff = $nonblank_token_count - $last_count;
3426 my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
3429 || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
3432 # remember this pair...
3433 my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
3434 my $inner_seqno = $type_sequence;
3435 $has_close_following_opening{$outer_seqno} =
3441 elsif ( $is_closing_token{$token} ) {
3443 # if the corresponding opening token had an adjacent opening
3444 if ( $has_close_following_opening{$type_sequence}
3445 && $is_closing_token{ $last_container->[_TOKEN_] }
3446 && $has_close_following_opening{$type_sequence}
3447 ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
3450 # The closing weld tokens must be adjacent
3451 # NOTE: so intermediate commas and semicolons
3452 # can currently block a weld. This is something
3453 # that could be fixed in the future by including
3454 # a flag to delete un-necessary commas and semicolons.
3455 my $tok_diff = $nonblank_token_count - $last_count;
3457 if ( $tok_diff == 1 ) {
3459 # This is a closely nested pair ..
3460 my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
3461 my $outer_seqno = $type_sequence;
3462 $rpaired_to_inner_container->{$outer_seqno} =
3465 push @nested_pairs, [ $inner_seqno, $outer_seqno ];
3470 $last_last_container = $last_container;
3471 $last_container = $rtoken_vars;
3472 $last_count = $nonblank_token_count;
3474 $last_nonblank_token_vars = $rtoken_vars;
3476 $self->{rnested_pairs} = \@nested_pairs;
3477 $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
3483 # a debug routine, not normally used
3484 my ( $self, $msg ) = @_;
3485 my $rLL = $self->{rLL};
3486 my $nvars = @{$rLL};
3487 print STDERR "$msg\n";
3488 print STDERR "ntokens=$nvars\n";
3489 print STDERR "K\t_TOKEN_\t_TYPE_\n";
3492 foreach my $item ( @{$rLL} ) {
3493 print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
3499 sub get_old_line_index {
3500 my ( $self, $K ) = @_;
3501 my $rLL = $self->{rLL};
3502 return 0 unless defined($K);
3503 return $rLL->[$K]->[_LINE_INDEX_];
3506 sub get_old_line_count {
3507 my ( $self, $Kbeg, $Kend ) = @_;
3508 my $rLL = $self->{rLL};
3509 return 0 unless defined($Kbeg);
3510 return 0 unless defined($Kend);
3511 return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
3515 my ( $self, $KK, $rLL ) = @_;
3517 # return the index K of the next nonblank, non-comment token
3518 return unless ( defined($KK) && $KK >= 0 );
3520 # use the standard array unless given otherwise
3521 $rLL = $self->{rLL} unless ( defined($rLL) );
3524 while ( $Knnb < $Num ) {
3525 if ( !defined( $rLL->[$Knnb] ) ) {
3526 Fault("Undefined entry for k=$Knnb");
3528 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
3529 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
3538 sub K_next_nonblank {
3539 my ( $self, $KK, $rLL ) = @_;
3541 # return the index K of the next nonblank token
3542 return unless ( defined($KK) && $KK >= 0 );
3544 # use the standard array unless given otherwise
3545 $rLL = $self->{rLL} unless ( defined($rLL) );
3548 while ( $Knnb < $Num ) {
3549 if ( !defined( $rLL->[$Knnb] ) ) {
3550 Fault("Undefined entry for k=$Knnb");
3552 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
3558 sub K_previous_code {
3560 # return the index K of the previous nonblank, non-comment token
3561 # Call with $KK=undef to start search at the top of the array
3562 my ( $self, $KK, $rLL ) = @_;
3564 # use the standard array unless given otherwise
3565 $rLL = $self->{rLL} unless ( defined($rLL) );
3567 if ( !defined($KK) ) { $KK = $Num }
3568 elsif ( $KK > $Num ) {
3570 # The caller should make the first call with KK_new=undef to
3573 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
3577 while ( $Kpnb >= 0 ) {
3578 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
3579 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
3588 sub K_previous_nonblank {
3590 # return index of previous nonblank token before item K;
3591 # Call with $KK=undef to start search at the top of the array
3592 my ( $self, $KK, $rLL ) = @_;
3594 # use the standard array unless given otherwise
3595 $rLL = $self->{rLL} unless ( defined($rLL) );
3597 if ( !defined($KK) ) { $KK = $Num }
3598 elsif ( $KK > $Num ) {
3600 # The caller should make the first call with KK_new=undef to
3603 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
3607 while ( $Kpnb >= 0 ) {
3608 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
3614 sub map_containers {
3616 # Maps the container hierarchy
3618 my $rLL = $self->{rLL};
3619 return unless ( defined($rLL) && @{$rLL} );
3621 my $K_opening_container = $self->{K_opening_container};
3622 my $K_closing_container = $self->{K_closing_container};
3623 my $rcontainer_map = $self->{rcontainer_map};
3625 # loop over containers
3626 my @stack; # stack of container sequence numbers
3628 while ( defined($KNEXT) ) {
3630 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
3631 my $rtoken_vars = $rLL->[$KK];
3632 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
3633 if ( !$type_sequence ) {
3634 next if ( $KK == 0 ); # first token in file may not be container
3635 Fault("sequence = $type_sequence not defined at K=$KK");
3638 my $token = $rtoken_vars->[_TOKEN_];
3639 if ( $is_opening_token{$token} ) {
3641 $rcontainer_map->{$type_sequence} = $stack[-1];
3643 push @stack, $type_sequence;
3645 if ( $is_closing_token{$token} ) {
3647 my $seqno = pop @stack;
3648 if ( $seqno != $type_sequence ) {
3650 # shouldn't happen unless file is garbage
3656 # the stack should be empty for a good file
3659 # unbalanced containers; file probably bad
3667 sub mark_short_nested_blocks {
3669 # This routine looks at the entire file and marks any short nested blocks
3670 # which should not be broken. The results are stored in the hash
3671 # $rshort_nested->{$type_sequence}
3672 # which will be true if the container should remain intact.
3674 # For example, consider the following line:
3676 # sub cxt_two { sort { $a <=> $b } test_if_list() }
3678 # The 'sort' block is short and nested within an outer sub block.
3679 # Normally, the existance of the 'sort' block will force the sub block to
3680 # break open, but this is not always desirable. Here we will set a flag for
3681 # the sort block to prevent this. To give the user control, we will
3682 # follow the input file formatting. If either of the blocks is broken in
3683 # the input file then we will allow it to remain broken. Otherwise we will
3684 # set a flag to keep it together in later formatting steps.
3686 # The flag which is set here will be checked in two places:
3687 # 'sub print_line_of_tokens' and 'sub starting_one_line_block'
3690 my $rLL = $self->{rLL};
3691 return unless ( defined($rLL) && @{$rLL} );
3693 return unless ( $rOpts->{'one-line-block-nesting'} );
3695 my $K_opening_container = $self->{K_opening_container};
3696 my $K_closing_container = $self->{K_closing_container};
3697 my $rbreak_container = $self->{rbreak_container};
3698 my $rshort_nested = $self->{rshort_nested};
3699 my $rcontainer_map = $self->{rcontainer_map};
3700 my $rlines = $self->{rlines};
3702 # Variables needed for estimating line lengths
3703 my $starting_indent;
3704 my $starting_lentot;
3707 my $excess_length_to_K = sub {
3710 # Estimate the length from the line start to a given token
3711 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
3713 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
3714 return ($excess_length);
3717 my $is_broken_block = sub {
3719 # a block is broken if the input line numbers of the braces differ
3721 my $K_opening = $K_opening_container->{$seqno};
3722 return unless ( defined($K_opening) );
3723 my $K_closing = $K_closing_container->{$seqno};
3724 return unless ( defined($K_closing) );
3725 return $rbreak_container->{$seqno}
3726 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
3727 $rLL->[$K_opening]->[_LINE_INDEX_];
3730 # loop over all containers
3731 my @open_block_stack;
3734 while ( defined($KNEXT) ) {
3736 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
3737 my $rtoken_vars = $rLL->[$KK];
3738 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
3739 if ( !$type_sequence ) {
3740 next if ( $KK == 0 ); # first token in file may not be container
3742 # an error here is most likely due to a recent programming change
3743 Fault("sequence = $type_sequence not defined at K=$KK");
3746 # We are just looking at code blocks
3747 my $token = $rtoken_vars->[_TOKEN_];
3748 my $type = $rtoken_vars->[_TYPE_];
3749 next unless ( $type eq $token );
3750 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
3751 next unless ($block_type);
3753 # Keep a stack of all acceptable block braces seen.
3754 # Only consider blocks entirely on one line so dump the stack when line
3756 my $iline_last = $iline;
3757 $iline = $rLL->[$KK]->[_LINE_INDEX_];
3758 if ( $iline != $iline_last ) { @open_block_stack = () }
3760 if ( $token eq '}' ) {
3761 if (@open_block_stack) { pop @open_block_stack }
3763 next unless ( $token eq '{' );
3765 # block must be balanced (bad scripts may be unbalanced)
3766 my $K_opening = $K_opening_container->{$type_sequence};
3767 my $K_closing = $K_closing_container->{$type_sequence};
3768 next unless ( defined($K_opening) && defined($K_closing) );
3770 # require that this block be entirely on one line
3771 next if ( $is_broken_block->($type_sequence) );
3773 # See if this block fits on one line of allowed length (which may
3774 # be different from the input script)
3776 $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3777 $starting_indent = 0;
3778 if ( !$rOpts_variable_maximum_line_length ) {
3779 my $level = $rLL->[$KK]->[_LEVEL_];
3780 $starting_indent = $rOpts_indent_columns * $level;
3783 # Dump the stack if block is too long and skip this block
3784 if ( $excess_length_to_K->($K_closing) > 0 ) {
3785 @open_block_stack = ();
3789 # OK, Block passes tests, remember it
3790 push @open_block_stack, $type_sequence;
3792 # We are only marking nested code blocks,
3793 # so check for a previous block on the stack
3794 next unless ( @open_block_stack > 1 );
3796 # Looks OK, mark this as a short nested block
3797 $rshort_nested->{$type_sequence} = 1;
3803 sub weld_containers {
3805 # do any welding operations
3808 # initialize weld length hashes needed later for checking line lengths
3809 # TODO: These should eventually be stored in $self rather than be package vars
3810 %weld_len_left_closing = ();
3811 %weld_len_right_closing = ();
3812 %weld_len_left_opening = ();
3813 %weld_len_right_opening = ();
3815 return if ( $rOpts->{'indent-only'} );
3816 return unless ($rOpts_add_newlines);
3818 if ( $rOpts->{'weld-nested-containers'} ) {
3820 # if called, weld_nested_containers must be called before other weld
3821 # operations. # This is because weld_nested_containers could overwrite
3822 # hash values written by weld_cuddled_blocks and weld_nested_quotes.
3823 $self->weld_nested_containers();
3825 $self->weld_nested_quotes();
3828 # Note that weld_nested_containers() changes the _LEVEL_ values, so
3829 # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
3831 # Here is a good test case to Be sure that both cuddling and welding
3832 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
3836 # if ($BOLD_MATH) { (
3837 # $labels, $comment,
3838 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
3840 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
3844 $self->weld_cuddled_blocks();
3849 sub cumulative_length_before_K {
3850 my ( $self, $KK ) = @_;
3851 my $rLL = $self->{rLL};
3852 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3855 sub cumulative_length_after_K {
3856 my ( $self, $KK ) = @_;
3857 my $rLL = $self->{rLL};
3858 return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
3861 sub weld_cuddled_blocks {
3864 # This routine implements the -cb flag by finding the appropriate
3865 # closing and opening block braces and welding them together.
3866 return unless ( %{$rcuddled_block_types} );
3868 my $rLL = $self->{rLL};
3869 return unless ( defined($rLL) && @{$rLL} );
3870 my $rbreak_container = $self->{rbreak_container};
3872 my $K_opening_container = $self->{K_opening_container};
3873 my $K_closing_container = $self->{K_closing_container};
3875 my $length_to_opening_seqno = sub {
3877 my $KK = $K_opening_container->{$seqno};
3878 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3881 my $length_to_closing_seqno = sub {
3883 my $KK = $K_closing_container->{$seqno};
3884 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3888 my $is_broken_block = sub {
3890 # a block is broken if the input line numbers of the braces differ
3891 # we can only cuddle between broken blocks
3893 my $K_opening = $K_opening_container->{$seqno};
3894 return unless ( defined($K_opening) );
3895 my $K_closing = $K_closing_container->{$seqno};
3896 return unless ( defined($K_closing) );
3897 return $rbreak_container->{$seqno}
3898 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
3899 $rLL->[$K_opening]->[_LINE_INDEX_];
3902 # A stack to remember open chains at all levels:
3903 # $in_chain[$level] = [$chain_type, $type_sequence];
3905 my $CBO = $rOpts->{'cuddled-break-option'};
3907 # loop over structure items to find cuddled pairs
3910 while ( defined($KNEXT) ) {
3912 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
3913 my $rtoken_vars = $rLL->[$KK];
3914 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
3915 if ( !$type_sequence ) {
3916 next if ( $KK == 0 ); # first token in file may not be container
3917 Fault("sequence = $type_sequence not defined at K=$KK");
3920 # We use the original levels because they get changed by sub
3921 # 'weld_nested_containers'. So if this were to be called before that
3922 # routine, the levels would be wrong and things would go bad.
3923 my $last_level = $level;
3924 $level = $rtoken_vars->[_LEVEL_TRUE_];
3926 if ( $level < $last_level ) { $in_chain[$last_level] = undef }
3927 elsif ( $level > $last_level ) { $in_chain[$level] = undef }
3929 # We are only looking at code blocks
3930 my $token = $rtoken_vars->[_TOKEN_];
3931 my $type = $rtoken_vars->[_TYPE_];
3932 next unless ( $type eq $token );
3934 if ( $token eq '{' ) {
3936 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
3937 if ( !$block_type ) {
3939 # patch for unrecognized block types which may not be labeled
3940 my $Kp = $self->K_previous_nonblank($KK);
3941 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
3942 $Kp = $self->K_previous_nonblank($Kp);
3945 $block_type = $rLL->[$Kp]->[_TOKEN_];
3947 if ( $in_chain[$level] ) {
3949 # we are in a chain and are at an opening block brace.
3950 # See if we are welding this opening brace with the previous
3951 # block brace. Get their identification numbers:
3952 my $closing_seqno = $in_chain[$level]->[1];
3953 my $opening_seqno = $type_sequence;
3955 # The preceding block must be on multiple lines so that its
3956 # closing brace will start a new line.
3957 if ( !$is_broken_block->($closing_seqno) ) {
3958 next unless ( $CBO == 2 );
3959 $rbreak_container->{$closing_seqno} = 1;
3962 # we will let the trailing block be either broken or intact
3963 ## && $is_broken_block->($opening_seqno);
3965 # We can weld the closing brace to its following word ..
3966 my $Ko = $K_closing_container->{$closing_seqno};
3967 my $Kon = $self->K_next_nonblank($Ko);
3969 # ..unless it is a comment
3970 if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
3972 $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
3973 $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
3974 $weld_len_right_closing{$closing_seqno} = $dlen;
3976 # Set flag that we want to break the next container
3977 # so that the cuddled line is balanced.
3978 $rbreak_container->{$opening_seqno} = 1
3985 # We are not in a chain. Start a new chain if we see the
3986 # starting block type.
3987 if ( $rcuddled_block_types->{$block_type} ) {
3988 $in_chain[$level] = [ $block_type, $type_sequence ];
3992 $in_chain[$level] = [ $block_type, $type_sequence ];
3996 elsif ( $token eq '}' ) {
3997 if ( $in_chain[$level] ) {
3999 # We are in a chain at a closing brace. See if this chain
4001 my $Knn = $self->K_next_code($KK);
4004 my $chain_type = $in_chain[$level]->[0];
4005 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
4007 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
4011 # Note that we do not weld yet because we must wait until
4012 # we we are sure that an opening brace for this follows.
4013 $in_chain[$level]->[1] = $type_sequence;
4015 else { $in_chain[$level] = undef }
4023 sub weld_nested_containers {
4026 # This routine implements the -wn flag by "welding together"
4027 # the nested closing and opening tokens which were previously
4028 # identified by sub 'find_nested_pairs'. "welding" simply
4029 # involves setting certain hash values which will be checked
4030 # later during formatting.
4032 my $rLL = $self->{rLL};
4033 my $Klimit = $self->get_rLL_max_index();
4034 my $rnested_pairs = $self->{rnested_pairs};
4035 my $rlines = $self->{rlines};
4036 my $K_opening_container = $self->{K_opening_container};
4037 my $K_closing_container = $self->{K_closing_container};
4039 # Return unless there are nested pairs to weld
4040 return unless defined($rnested_pairs) && @{$rnested_pairs};
4042 # This array will hold the sequence numbers of the tokens to be welded.
4045 # Variables needed for estimating line lengths
4046 my $starting_indent;
4047 my $starting_lentot;
4049 # A tolerance to the length for length estimates. In some rare cases
4050 # this can avoid problems where a final weld slightly exceeds the
4051 # line length and gets broken in a bad spot.
4054 my $excess_length_to_K = sub {
4057 # Estimate the length from the line start to a given token
4058 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
4060 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
4061 return ($excess_length);
4064 my $length_to_opening_seqno = sub {
4066 my $KK = $K_opening_container->{$seqno};
4067 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
4071 my $length_to_closing_seqno = sub {
4073 my $KK = $K_closing_container->{$seqno};
4074 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
4079 # _oo=outer opening, i.e. first of { {
4080 # _io=inner opening, i.e. second of { {
4081 # _oc=outer closing, i.e. second of } {
4082 # _ic=inner closing, i.e. first of } }
4086 # We are working from outermost to innermost pairs so that
4087 # level changes will be complete when we arrive at the inner pairs.
4089 while ( my $item = pop( @{$rnested_pairs} ) ) {
4090 my ( $inner_seqno, $outer_seqno ) = @{$item};
4092 my $Kouter_opening = $K_opening_container->{$outer_seqno};
4093 my $Kinner_opening = $K_opening_container->{$inner_seqno};
4094 my $Kouter_closing = $K_closing_container->{$outer_seqno};
4095 my $Kinner_closing = $K_closing_container->{$inner_seqno};
4097 my $outer_opening = $rLL->[$Kouter_opening];
4098 my $inner_opening = $rLL->[$Kinner_opening];
4099 my $outer_closing = $rLL->[$Kouter_closing];
4100 my $inner_closing = $rLL->[$Kinner_closing];
4102 my $iline_oo = $outer_opening->[_LINE_INDEX_];
4103 my $iline_io = $inner_opening->[_LINE_INDEX_];
4105 # Set flag saying if this pair starts a new weld
4106 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
4108 # Set flag saying if this pair is adjacent to the previous nesting pair
4109 # (even if previous pair was rejected as a weld)
4110 my $touch_previous_pair =
4111 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
4112 $previous_pair = $item;
4114 # Set a flag if we should not weld. It sometimes looks best not to weld
4115 # when the opening and closing tokens are very close. However, there
4116 # is a danger that we will create a "blinker", which oscillates between
4117 # two semi-stable states, if we do not weld. So the rules for
4118 # not welding have to be carefully defined and tested.
4120 if ( !$touch_previous_pair ) {
4122 # If this pair is not adjacent to the previous pair (skipped or
4123 # not), then measure lengths from the start of line of oo
4125 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
4126 my ( $Kfirst, $Klast ) = @{$rK_range};
4128 $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
4129 $starting_indent = 0;
4130 if ( !$rOpts_variable_maximum_line_length ) {
4131 my $level = $rLL->[$Kfirst]->[_LEVEL_];
4132 $starting_indent = $rOpts_indent_columns * $level;
4135 # DO-NOT-WELD RULE 1:
4136 # Do not weld something that looks like the start of a two-line
4137 # function call, like this: <<snippets/wn6.in>>
4138 # $trans->add_transformation(
4139 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
4140 # We will look for a semicolon after the closing paren.
4142 # We want to weld something complex, like this though
4143 # my $compass = uc( opposite_direction( line_to_canvas_direction(
4144 # @{ $coords[0] }, @{ $coords[1] } ) ) );
4145 # Otherwise we will get a 'blinker'
4147 my $iline_oc = $outer_closing->[_LINE_INDEX_];
4148 if ( $iline_oc <= $iline_oo + 1 ) {
4150 # Look for following semicolon...
4151 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
4152 my $next_nonblank_type =
4153 defined($Knext_nonblank)
4154 ? $rLL->[$Knext_nonblank]->[_TYPE_]
4156 if ( $next_nonblank_type eq ';' ) {
4158 # Then do not weld if no other containers between inner
4159 # opening and closing.
4160 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
4161 if ( $Knext_seq_item == $Kinner_closing ) {
4168 my $iline_ic = $inner_closing->[_LINE_INDEX_];
4170 # DO-NOT-WELD RULE 2:
4171 # Do not weld an opening paren to an inner one line brace block
4172 # We will just use old line numbers for this test and require
4173 # iterations if necessary for convergence
4175 # For example, otherwise we could cause the opening paren
4176 # in the following example to separate from the caller name
4179 # $_[0]->code_handler
4180 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
4182 # Here is another example where we do not want to weld:
4183 # $wrapped->add_around_modifier(
4184 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
4186 # If the one line sub block gets broken due to length or by the
4187 # user, then we can weld. The result will then be:
4188 # $wrapped->add_around_modifier( sub {
4189 # push @tracelog => 'around 1';
4193 if ( $iline_ic == $iline_io ) {
4195 my $token_oo = $outer_opening->[_TOKEN_];
4196 my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
4197 my $token_io = $inner_opening->[_TOKEN_];
4198 $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
4201 # DO-NOT-WELD RULE 3:
4202 # Do not weld if this makes our line too long
4203 $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
4205 # DO-NOT-WELD RULE 4; implemented for git#10:
4206 # Do not weld an opening -ce brace if the next container is on a single
4207 # line, different from the opening brace. (This is very rare). For
4208 # example, given the following with -ce, we will avoid joining the {
4212 # [ $_, length($_) ]
4215 # because this would produce a terminal one-line block:
4217 # } else { [ $_, length($_) ] }
4219 # which may not be what is desired. But given this input:
4221 # } else { [ $_, length($_) ] }
4223 # then we will do the weld and retain the one-line block
4224 if ( $rOpts->{'cuddled-else'} ) {
4225 my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
4226 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
4227 my $io_line = $inner_opening->[_LINE_INDEX_];
4228 my $ic_line = $inner_closing->[_LINE_INDEX_];
4229 my $oo_line = $outer_opening->[_LINE_INDEX_];
4231 ( $oo_line < $io_line && $ic_line == $io_line );
4237 # After neglecting a pair, we start measuring from start of point io
4239 $self->cumulative_length_before_K($Kinner_opening);
4240 $starting_indent = 0;
4241 if ( !$rOpts_variable_maximum_line_length ) {
4242 my $level = $inner_opening->[_LEVEL_];
4243 $starting_indent = $rOpts_indent_columns * $level;
4246 # Normally, a broken pair should not decrease indentation of
4247 # intermediate tokens:
4248 ## if ( $last_pair_broken ) { next }
4249 # However, for long strings of welded tokens, such as '{{{{{{...'
4250 # we will allow broken pairs to also remove indentation.
4251 # This will keep very long strings of opening and closing
4252 # braces from marching off to the right. We will do this if the
4253 # number of tokens in a weld before the broken weld is 4 or more.
4254 # This rule will mainly be needed for test scripts, since typical
4255 # welds have fewer than about 4 welded tokens.
4256 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
4259 # otherwise start new weld ...
4260 elsif ($starting_new_weld) {
4264 # ... or extend current weld
4266 unshift @{ $welds[-1] }, $inner_seqno;
4269 # After welding, reduce the indentation level if all intermediate tokens
4270 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
4271 if ( $dlevel != 0 ) {
4272 my $Kstart = $Kinner_opening;
4273 my $Kstop = $Kinner_closing;
4274 for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
4275 $rLL->[$KK]->[_LEVEL_] += $dlevel;
4280 # Define weld lengths needed later to set line breaks
4281 foreach my $item (@welds) {
4283 # sweep from inner to outer
4288 foreach my $outer_seqno ( @{$item} ) {
4292 $length_to_opening_seqno->($inner_seqno) -
4293 $length_to_opening_seqno->($outer_seqno);
4296 $length_to_closing_seqno->($outer_seqno) -
4297 $length_to_closing_seqno->($inner_seqno);
4299 $len_open += $dlen_opening;
4300 $len_close += $dlen_closing;
4304 $weld_len_left_closing{$outer_seqno} = $len_close;
4305 $weld_len_right_opening{$outer_seqno} = $len_open;
4307 $inner_seqno = $outer_seqno;
4310 # sweep from outer to inner
4311 foreach my $seqno ( reverse @{$item} ) {
4312 $weld_len_right_closing{$seqno} =
4313 $len_close - $weld_len_left_closing{$seqno};
4314 $weld_len_left_opening{$seqno} =
4315 $len_open - $weld_len_right_opening{$seqno};
4319 #####################################
4321 #####################################
4325 foreach my $weld (@welds) {
4326 print "\nWeld number $count has seq: (@{$weld})\n";
4327 foreach my $seq ( @{$weld} ) {
4330 left_opening=$weld_len_left_opening{$seq};
4331 right_opening=$weld_len_right_opening{$seq};
4332 left_closing=$weld_len_left_closing{$seq};
4333 right_closing=$weld_len_right_closing{$seq};
4343 sub weld_nested_quotes {
4346 my $rLL = $self->{rLL};
4347 return unless ( defined($rLL) && @{$rLL} );
4349 my $K_opening_container = $self->{K_opening_container};
4350 my $K_closing_container = $self->{K_closing_container};
4351 my $rlines = $self->{rlines};
4353 my $is_single_quote = sub {
4354 my ( $Kbeg, $Kend, $quote_type ) = @_;
4355 foreach my $K ( $Kbeg .. $Kend ) {
4356 my $test_type = $rLL->[$K]->[_TYPE_];
4357 next if ( $test_type eq 'b' );
4358 return if ( $test_type ne $quote_type );
4363 my $excess_line_length = sub {
4364 my ( $KK, $Ktest ) = @_;
4366 # what is the excess length if we add token $Ktest to the line with $KK?
4367 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
4368 my $rK_range = $rlines->[$iline]->{_rK_range};
4369 my ( $Kfirst, $Klast ) = @{$rK_range};
4370 my $starting_lentot =
4371 $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
4372 my $starting_indent = 0;
4374 if ( !$rOpts_variable_maximum_line_length ) {
4375 my $level = $rLL->[$Kfirst]->[_LEVEL_];
4376 $starting_indent = $rOpts_indent_columns * $level;
4379 my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
4381 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
4382 return $excess_length;
4385 # look for single qw quotes nested in containers
4387 while ( defined($KNEXT) ) {
4389 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
4390 my $rtoken_vars = $rLL->[$KK];
4391 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
4392 if ( !$outer_seqno ) {
4393 next if ( $KK == 0 ); # first token in file may not be container
4394 Fault("sequence = $outer_seqno not defined at K=$KK");
4397 my $token = $rtoken_vars->[_TOKEN_];
4398 if ( $is_opening_token{$token} ) {
4400 # see if the next token is a quote of some type
4401 my $Kn = $self->K_next_nonblank($KK);
4403 my $next_token = $rLL->[$Kn]->[_TOKEN_];
4404 my $next_type = $rLL->[$Kn]->[_TYPE_];
4406 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
4407 && $next_token =~ /^q/ );
4409 # The token before the closing container must also be a quote
4410 my $K_closing = $K_closing_container->{$outer_seqno};
4411 my $Kt_end = $self->K_previous_nonblank($K_closing);
4412 next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
4414 # Do not weld to single-line quotes. Nothing is gained, and it may
4416 next if ( $Kt_end == $Kn );
4418 # Only weld to quotes delimited with container tokens. This is
4419 # because welding to arbitrary quote delimiters can produce code
4420 # which is less readable than without welding.
4421 my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
4423 unless ( $is_closing_token{$closing_delimiter}
4424 || $closing_delimiter eq '>' );
4426 # Now make sure that there is just a single quote in the container
4428 unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
4430 # If welded, the line must not exceed allowed line length
4431 # Assume old line breaks for this estimate.
4432 next if ( $excess_line_length->( $KK, $Kn ) > 0 );
4435 # FIXME: Are these always correct?
4436 $weld_len_left_closing{$outer_seqno} = 1;
4437 $weld_len_right_opening{$outer_seqno} = 2;
4439 # QW PATCH 1 (Testing)
4440 # undo CI for welded quotes
4441 foreach my $K ( $Kn .. $Kt_end ) {
4442 $rLL->[$K]->[_CI_LEVEL_] = 0;
4445 # Change the level of a closing qw token to be that of the outer
4446 # containing token. This will allow -lp indentation to function
4447 # correctly in the vertical aligner.
4448 $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
4456 my ( $seqno, $type_or_tok ) = @_;
4458 # Given the sequence number of a token, and the token or its type,
4459 # return the length of any weld to its left
4463 if ( $is_closing_type{$type_or_tok} ) {
4464 $weld_len = $weld_len_left_closing{$seqno};
4466 elsif ( $is_opening_type{$type_or_tok} ) {
4467 $weld_len = $weld_len_left_opening{$seqno};
4470 if ( !defined($weld_len) ) { $weld_len = 0 }
4474 sub weld_len_right {
4476 my ( $seqno, $type_or_tok ) = @_;
4478 # Given the sequence number of a token, and the token or its type,
4479 # return the length of any weld to its right
4483 if ( $is_closing_type{$type_or_tok} ) {
4484 $weld_len = $weld_len_right_closing{$seqno};
4486 elsif ( $is_opening_type{$type_or_tok} ) {
4487 $weld_len = $weld_len_right_opening{$seqno};
4490 if ( !defined($weld_len) ) { $weld_len = 0 }
4494 sub weld_len_left_to_go {
4497 # Given the index of a token in the 'to_go' array
4498 # return the length of any weld to its left
4499 return if ( $i < 0 );
4501 weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
4505 sub weld_len_right_to_go {
4508 # Given the index of a token in the 'to_go' array
4509 # return the length of any weld to its right
4510 return if ( $i < 0 );
4511 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
4513 weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
4517 sub link_sequence_items {
4519 # This has been merged into 'respace_tokens' but retained for reference
4521 my $rlines = $self->{rlines};
4522 my $rLL = $self->{rLL};
4524 # We walk the token list and make links to the next sequence item.
4525 # We also define these hashes to container tokens using sequence number as
4527 my $K_opening_container = {}; # opening [ { or (
4528 my $K_closing_container = {}; # closing ] } or )
4529 my $K_opening_ternary = {}; # opening ? of ternary
4530 my $K_closing_ternary = {}; # closing : of ternary
4532 # sub to link preceding nodes forward to a new node type
4533 my $link_back = sub {
4534 my ( $Ktop, $key ) = @_;
4536 my $Kprev = $Ktop - 1;
4538 && !defined( $rLL->[$Kprev]->[$key] ) )
4540 $rLL->[$Kprev]->[$key] = $Ktop;
4545 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
4547 $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
4549 my $type = $rLL->[$KK]->[_TYPE_];
4551 next if ( $type eq 'b' );
4553 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
4554 if ($type_sequence) {
4556 $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
4558 my $token = $rLL->[$KK]->[_TOKEN_];
4559 if ( $is_opening_token{$token} ) {
4561 $K_opening_container->{$type_sequence} = $KK;
4563 elsif ( $is_closing_token{$token} ) {
4565 $K_closing_container->{$type_sequence} = $KK;
4568 # These are not yet used but could be useful
4570 if ( $token eq '?' ) {
4571 $K_opening_ternary->{$type_sequence} = $KK;
4573 elsif ( $token eq ':' ) {
4574 $K_closing_ternary->{$type_sequence} = $KK;
4578 Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
4585 $self->{K_opening_container} = $K_opening_container;
4586 $self->{K_closing_container} = $K_closing_container;
4587 $self->{K_opening_ternary} = $K_opening_ternary;
4588 $self->{K_closing_ternary} = $K_closing_ternary;
4592 sub sum_token_lengths {
4595 # This has been merged into 'respace_tokens' but retained for reference
4596 my $rLL = $self->{rLL};
4597 my $cumulative_length = 0;
4598 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
4600 # now set the length of this token
4601 my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
4603 $cumulative_length += $token_length;
4605 # Save the length sum to just AFTER this token
4606 $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
4612 sub resync_lines_and_tokens {
4615 my $rLL = $self->{rLL};
4616 my $Klimit = $self->{Klimit};
4617 my $rlines = $self->{rlines};
4619 # Re-construct the arrays of tokens associated with the original input lines
4620 # since they have probably changed due to inserting and deleting blanks
4621 # and a few other tokens.
4625 # This is the next token and its line index:
4628 if ( defined($rLL) && @{$rLL} ) {
4629 $Kmax = @{$rLL} - 1;
4630 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
4633 my $get_inext = sub {
4634 if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
4636 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
4641 # Remember the most recently output token index
4645 foreach my $line_of_tokens ( @{$rlines} ) {
4647 my $line_type = $line_of_tokens->{_line_type};
4648 if ( $line_type eq 'CODE' ) {
4652 $inext = $get_inext->();
4653 while ( defined($inext) && $inext <= $iline ) {
4654 push @{K_array}, $Knext;
4656 $inext = $get_inext->();
4659 # Delete any terminal blank token
4661 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
4666 # Define the range of K indexes for the line:
4667 # $Kfirst = index of first token on line
4668 # $Klast_out = index of last token on line
4669 my ( $Kfirst, $Klast );
4671 $Kfirst = $K_array[0];
4672 $Klast = $K_array[-1];
4673 $Klast_out = $Klast;
4676 # It is only safe to trim the actual line text if the input
4677 # line had a terminal blank token. Otherwise, we may be
4679 if ( $line_of_tokens->{_ended_in_blank_token} ) {
4680 $line_of_tokens->{_line_text} =~ s/\s+$//;
4682 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
4684 # Deleting semicolons can create new empty code lines
4685 # which should be marked as blank
4686 if ( !defined($Kfirst) ) {
4687 my $code_type = $line_of_tokens->{_code_type};
4688 if ( !$code_type ) {
4689 $line_of_tokens->{_code_type} = 'BL';
4695 # There shouldn't be any nodes beyond the last one unless we start
4696 # allowing 'link_after' calls
4697 if ( defined($inext) ) {
4699 Fault("unexpected tokens at end of file when reconstructing lines");
4707 my $rlines = $self->{rlines};
4708 foreach my $line ( @{$rlines} ) {
4709 my $input_line = $line->{_line_text};
4710 $self->write_unindented_line($input_line);
4715 sub finish_formatting {
4717 my ( $self, $severe_error ) = @_;
4719 # The file has been tokenized and is ready to be formatted.
4720 # All of the relevant data is stored in $self, ready to go.
4722 # output file verbatim if severe error or no formatting requested
4723 if ( $severe_error || $rOpts->{notidy} ) {
4724 $self->dump_verbatim();
4729 # Make a pass through the lines, looking at lines of CODE and identifying
4730 # special processing needs, such format skipping sections marked by
4732 $self->scan_comments();
4734 # Find nested pairs of container tokens for any welding. This information
4735 # is also needed for adding semicolons, so it is split apart from the
4737 $self->find_nested_pairs();
4739 # Make sure everything looks good
4740 $self->check_line_hashes();
4742 # Future: Place to Begin future Iteration Loop
4743 # foreach my $it_count(1..$maxit) {
4745 # Future: We must reset some things after the first iteration.
4747 # - resetting levels if there was any welding
4748 # - resetting any phantom semicolons
4749 # - dealing with any line numbering issues so we can relate final lines
4750 # line numbers with input line numbers.
4752 # If ($it_count>1) {
4753 # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
4757 # Make a pass through all tokens, adding or deleting any whitespace as
4758 # required. Also make any other changes, such as adding semicolons.
4759 # All token changes must be made here so that the token data structure
4760 # remains fixed for the rest of this iteration.
4761 $self->respace_tokens();
4763 # Make a hierarchical map of the containers
4764 $self->map_containers();
4766 # Implement any welding needed for the -wn or -cb options
4767 $self->weld_containers();
4769 # Locate small nested blocks which should not be broken
4770 $self->mark_short_nested_blocks();
4772 # Finishes formatting and write the result to the line sink.
4773 # Eventually this call should just change the 'rlines' data according to the
4774 # new line breaks and then return so that we can do an internal iteration
4775 # before continuing with the next stages of formatting.
4776 $self->break_lines();
4778 ############################################################
4779 # A possible future decomposition of 'break_lines()' follows.
4781 # - allow perltidy to do an internal iteration which eliminates
4782 # many unnecessary steps, such as re-parsing and vertical alignment.
4783 # This will allow iterations to be automatic.
4784 # - consolidate all length calculations to allow utf8 alignment
4785 ############################################################
4787 # Future: Check for convergence of beginning tokens on CODE lines
4789 # Future: End of Iteration Loop
4791 # Future: add_padding($rargs);
4793 # Future: add_closing_side_comments($rargs);
4795 # Future: vertical_alignment($rargs);
4797 # Future: output results
4799 # A final routine to tie up any loose ends
4804 sub create_one_line_block {
4805 ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
4810 sub destroy_one_line_block {
4811 $index_start_one_line_block = UNDEFINED_INDEX;
4812 $semicolons_before_block_self_destruct = 0;
4816 sub leading_spaces_to_go {
4818 # return the number of indentation spaces for a token in the output stream;
4819 # these were previously stored by 'set_leading_whitespace'.
4822 if ( $ii < 0 ) { $ii = 0 }
4823 return get_spaces( $leading_spaces_to_go[$ii] );
4829 # return the number of leading spaces associated with an indentation
4830 # variable $indentation is either a constant number of spaces or an object
4831 # with a get_spaces method.
4832 my $indentation = shift;
4833 return ref($indentation) ? $indentation->get_spaces() : $indentation;
4836 sub get_recoverable_spaces {
4838 # return the number of spaces (+ means shift right, - means shift left)
4839 # that we would like to shift a group of lines with the same indentation
4840 # to get them to line up with their opening parens
4841 my $indentation = shift;
4842 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
4845 sub get_available_spaces_to_go {
4848 my $item = $leading_spaces_to_go[$ii];
4850 # return the number of available leading spaces associated with an
4851 # indentation variable. $indentation is either a constant number of
4852 # spaces or an object with a get_available_spaces method.
4853 return ref($item) ? $item->get_available_spaces() : 0;
4856 sub new_lp_indentation_item {
4858 # this is an interface to the IndentationItem class
4859 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
4861 # A negative level implies not to store the item in the item_list
4863 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
4865 my $item = Perl::Tidy::IndentationItem->new(
4867 $ci_level, $available_spaces,
4868 $index, $gnu_sequence_number,
4869 $align_paren, $max_gnu_stack_index,
4870 $line_start_index_to_go,
4873 if ( $level >= 0 ) {
4874 $gnu_item_list[$max_gnu_item_index] = $item;
4880 sub set_leading_whitespace {
4882 # This routine defines leading whitespace
4883 # given: the level and continuation_level of a token,
4884 # define: space count of leading string which would apply if it
4885 # were the first token of a new line.
4887 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
4889 # Adjust levels if necessary to recycle whitespace:
4890 # given $level_abs, the absolute level
4891 # define $level, a possibly reduced level for whitespace
4892 my $level = $level_abs;
4893 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
4894 if ( $level_abs < $whitespace_last_level ) {
4895 pop(@whitespace_level_stack);
4897 if ( !@whitespace_level_stack ) {
4898 push @whitespace_level_stack, $level_abs;
4900 elsif ( $level_abs > $whitespace_last_level ) {
4901 $level = $whitespace_level_stack[-1] +
4902 ( $level_abs - $whitespace_last_level );
4905 # 1 Try to break at a block brace
4907 $level > $rOpts_whitespace_cycle
4908 && $last_nonblank_type eq '{'
4909 && $last_nonblank_token eq '{'
4912 # 2 Then either a brace or bracket
4913 || ( $level > $rOpts_whitespace_cycle + 1
4914 && $last_nonblank_token =~ /^[\{\[]$/ )
4916 # 3 Then a paren too
4917 || $level > $rOpts_whitespace_cycle + 2
4922 push @whitespace_level_stack, $level;
4924 $level = $whitespace_level_stack[-1];
4926 $whitespace_last_level = $level_abs;
4928 # modify for -bli, which adds one continuation indentation for
4930 if ( $rOpts_brace_left_and_indent
4931 && $max_index_to_go == 0
4932 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
4937 # patch to avoid trouble when input file has negative indentation.
4938 # other logic should catch this error.
4939 if ( $level < 0 ) { $level = 0 }
4941 #-------------------------------------------
4942 # handle the standard indentation scheme
4943 #-------------------------------------------
4944 unless ($rOpts_line_up_parentheses) {
4946 $ci_level * $rOpts_continuation_indentation +
4947 $level * $rOpts_indent_columns;
4949 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
4951 if ($in_continued_quote) {
4955 $leading_spaces_to_go[$max_index_to_go] = $space_count;
4956 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
4960 #-------------------------------------------------------------
4961 # handle case of -lp indentation..
4962 #-------------------------------------------------------------
4964 # The continued_quote flag means that this is the first token of a
4965 # line, and it is the continuation of some kind of multi-line quote
4966 # or pattern. It requires special treatment because it must have no
4967 # added leading whitespace. So we create a special indentation item
4968 # which is not in the stack.
4969 if ($in_continued_quote) {
4970 my $space_count = 0;
4971 my $available_space = 0;
4972 $level = -1; # flag to prevent storing in item_list
4973 $leading_spaces_to_go[$max_index_to_go] =
4974 $reduced_spaces_to_go[$max_index_to_go] =
4975 new_lp_indentation_item( $space_count, $level, $ci_level,
4976 $available_space, 0 );
4980 # get the top state from the stack
4981 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
4982 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
4983 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
4985 my $type = $types_to_go[$max_index_to_go];
4986 my $token = $tokens_to_go[$max_index_to_go];
4987 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
4989 if ( $type eq '{' || $type eq '(' ) {
4991 $gnu_comma_count{ $total_depth + 1 } = 0;
4992 $gnu_arrow_count{ $total_depth + 1 } = 0;
4994 # If we come to an opening token after an '=' token of some type,
4995 # see if it would be helpful to 'break' after the '=' to save space
4996 my $last_equals = $last_gnu_equals{$total_depth};
4997 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
4999 # find the position if we break at the '='
5000 my $i_test = $last_equals;
5001 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
5004 ##my $too_close = ($i_test==$max_index_to_go-1);
5006 my $test_position = total_line_length( $i_test, $max_index_to_go );
5007 my $mll = maximum_line_length($i_test);
5011 # the equals is not just before an open paren (testing)
5014 # if we are beyond the midpoint
5015 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
5017 # or we are beyond the 1/4 point and there was an old
5018 # break at the equals
5020 $gnu_position_predictor >
5021 $mll - $rOpts_maximum_line_length * 3 / 4
5023 $old_breakpoint_to_go[$last_equals]
5024 || ( $last_equals > 0
5025 && $old_breakpoint_to_go[ $last_equals - 1 ] )
5026 || ( $last_equals > 1
5027 && $types_to_go[ $last_equals - 1 ] eq 'b'
5028 && $old_breakpoint_to_go[ $last_equals - 2 ] )
5034 # then make the switch -- note that we do not set a real
5035 # breakpoint here because we may not really need one; sub
5036 # scan_list will do that if necessary
5037 $line_start_index_to_go = $i_test + 1;
5038 $gnu_position_predictor = $test_position;
5044 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
5046 # Check for decreasing depth ..
5047 # Note that one token may have both decreasing and then increasing
5048 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
5049 # in this example we would first go back to (1,0) then up to (2,0)
5051 if ( $level < $current_level || $ci_level < $current_ci_level ) {
5053 # loop to find the first entry at or completely below this level
5054 my ( $lev, $ci_lev );
5056 if ($max_gnu_stack_index) {
5058 # save index of token which closes this level
5059 $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
5061 # Undo any extra indentation if we saw no commas
5062 my $available_spaces =
5063 $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
5065 my $comma_count = 0;
5066 my $arrow_count = 0;
5067 if ( $type eq '}' || $type eq ')' ) {
5068 $comma_count = $gnu_comma_count{$total_depth};
5069 $arrow_count = $gnu_arrow_count{$total_depth};
5070 $comma_count = 0 unless $comma_count;
5071 $arrow_count = 0 unless $arrow_count;
5073 $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
5074 $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
5076 if ( $available_spaces > 0 ) {
5078 if ( $comma_count <= 0 || $arrow_count > 0 ) {
5080 my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
5082 $gnu_stack[$max_gnu_stack_index]
5083 ->get_sequence_number();
5085 # Be sure this item was created in this batch. This
5086 # should be true because we delete any available
5087 # space from open items at the end of each batch.
5088 if ( $gnu_sequence_number != $seqno
5089 || $i > $max_gnu_item_index )
5092 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
5094 report_definite_bug();
5098 if ( $arrow_count == 0 ) {
5100 ->permanently_decrease_available_spaces(
5105 ->tentatively_decrease_available_spaces(
5108 foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
5110 ->decrease_SPACES($available_spaces);
5117 --$max_gnu_stack_index;
5118 $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
5119 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
5121 # stop when we reach a level at or below the current level
5122 if ( $lev <= $level && $ci_lev <= $ci_level ) {
5124 $gnu_stack[$max_gnu_stack_index]->get_spaces();
5125 $current_level = $lev;
5126 $current_ci_level = $ci_lev;
5131 # reached bottom of stack .. should never happen because
5132 # only negative levels can get here, and $level was forced
5133 # to be positive above.
5136 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
5138 report_definite_bug();
5144 # handle increasing depth
5145 if ( $level > $current_level || $ci_level > $current_ci_level ) {
5147 # Compute the standard incremental whitespace. This will be
5148 # the minimum incremental whitespace that will be used. This
5149 # choice results in a smooth transition between the gnu-style
5150 # and the standard style.
5151 my $standard_increment =
5152 ( $level - $current_level ) * $rOpts_indent_columns +
5153 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
5155 # Now we have to define how much extra incremental space
5156 # ("$available_space") we want. This extra space will be
5157 # reduced as necessary when long lines are encountered or when
5158 # it becomes clear that we do not have a good list.
5159 my $available_space = 0;
5160 my $align_paren = 0;
5163 # initialization on empty stack..
5164 if ( $max_gnu_stack_index == 0 ) {
5165 $space_count = $level * $rOpts_indent_columns;
5168 # if this is a BLOCK, add the standard increment
5169 elsif ($last_nonblank_block_type) {
5170 $space_count += $standard_increment;
5173 # if last nonblank token was not structural indentation,
5174 # just use standard increment
5175 elsif ( $last_nonblank_type ne '{' ) {
5176 $space_count += $standard_increment;
5179 # otherwise use the space to the first non-blank level change token
5182 $space_count = $gnu_position_predictor;
5184 my $min_gnu_indentation =
5185 $gnu_stack[$max_gnu_stack_index]->get_spaces();
5187 $available_space = $space_count - $min_gnu_indentation;
5188 if ( $available_space >= $standard_increment ) {
5189 $min_gnu_indentation += $standard_increment;
5191 elsif ( $available_space > 1 ) {
5192 $min_gnu_indentation += $available_space + 1;
5194 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
5195 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
5196 $min_gnu_indentation += 2;
5199 $min_gnu_indentation += 1;
5203 $min_gnu_indentation += $standard_increment;
5205 $available_space = $space_count - $min_gnu_indentation;
5207 if ( $available_space < 0 ) {
5208 $space_count = $min_gnu_indentation;
5209 $available_space = 0;
5214 # update state, but not on a blank token
5215 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
5217 $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
5219 ++$max_gnu_stack_index;
5220 $gnu_stack[$max_gnu_stack_index] =
5221 new_lp_indentation_item( $space_count, $level, $ci_level,
5222 $available_space, $align_paren );
5224 # If the opening paren is beyond the half-line length, then
5225 # we will use the minimum (standard) indentation. This will
5226 # help avoid problems associated with running out of space
5227 # near the end of a line. As a result, in deeply nested
5228 # lists, there will be some indentations which are limited
5229 # to this minimum standard indentation. But the most deeply
5230 # nested container will still probably be able to shift its
5231 # parameters to the right for proper alignment, so in most
5232 # cases this will not be noticeable.
5233 if ( $available_space > 0 && $space_count > $halfway ) {
5234 $gnu_stack[$max_gnu_stack_index]
5235 ->tentatively_decrease_available_spaces($available_space);
5240 # Count commas and look for non-list characters. Once we see a
5241 # non-list character, we give up and don't look for any more commas.
5242 if ( $type eq '=>' ) {
5243 $gnu_arrow_count{$total_depth}++;
5245 # tentatively treating '=>' like '=' for estimating breaks
5246 # TODO: this could use some experimentation
5247 $last_gnu_equals{$total_depth} = $max_index_to_go;
5250 elsif ( $type eq ',' ) {
5251 $gnu_comma_count{$total_depth}++;
5254 elsif ( $is_assignment{$type} ) {
5255 $last_gnu_equals{$total_depth} = $max_index_to_go;
5258 # this token might start a new line
5259 # if this is a non-blank..
5260 if ( $type ne 'b' ) {
5265 # this is the first nonblank token of the line
5266 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
5268 # or previous character was one of these:
5269 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
5271 # or previous character was opening and this does not close it
5272 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
5273 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
5275 # or this token is one of these:
5276 || $type =~ /^([\.]|\|\||\&\&)$/
5278 # or this is a closing structure
5279 || ( $last_nonblank_type_to_go eq '}'
5280 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
5282 # or previous token was keyword 'return'
5283 || ( $last_nonblank_type_to_go eq 'k'
5284 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
5286 # or starting a new line at certain keywords is fine
5288 && $is_if_unless_and_or_last_next_redo_return{$token} )
5290 # or this is after an assignment after a closing structure
5292 $is_assignment{$last_nonblank_type_to_go}
5294 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
5296 # and it is significantly to the right
5297 || $gnu_position_predictor > $halfway
5302 check_for_long_gnu_style_lines();
5303 $line_start_index_to_go = $max_index_to_go;
5305 # back up 1 token if we want to break before that type
5306 # otherwise, we may strand tokens like '?' or ':' on a line
5307 if ( $line_start_index_to_go > 0 ) {
5308 if ( $last_nonblank_type_to_go eq 'k' ) {
5310 if ( $want_break_before{$last_nonblank_token_to_go} ) {
5311 $line_start_index_to_go--;
5314 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
5315 $line_start_index_to_go--;
5321 # remember the predicted position of this token on the output line
5322 if ( $max_index_to_go > $line_start_index_to_go ) {
5323 $gnu_position_predictor =
5324 total_line_length( $line_start_index_to_go, $max_index_to_go );
5327 $gnu_position_predictor =
5328 $space_count + $token_lengths_to_go[$max_index_to_go];
5331 # store the indentation object for this token
5332 # this allows us to manipulate the leading whitespace
5333 # (in case we have to reduce indentation to fit a line) without
5334 # having to change any token values
5335 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
5336 $reduced_spaces_to_go[$max_index_to_go] =
5337 ( $max_gnu_stack_index > 0 && $ci_level )
5338 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
5339 : $gnu_stack[$max_gnu_stack_index];
5343 sub check_for_long_gnu_style_lines {
5345 # look at the current estimated maximum line length, and
5346 # remove some whitespace if it exceeds the desired maximum
5348 # this is only for the '-lp' style
5349 return unless ($rOpts_line_up_parentheses);
5351 # nothing can be done if no stack items defined for this line
5352 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
5354 # see if we have exceeded the maximum desired line length
5355 # keep 2 extra free because they are needed in some cases
5356 # (result of trial-and-error testing)
5358 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
5360 return if ( $spaces_needed <= 0 );
5362 # We are over the limit, so try to remove a requested number of
5363 # spaces from leading whitespace. We are only allowed to remove
5364 # from whitespace items created on this batch, since others have
5365 # already been used and cannot be undone.
5366 my @candidates = ();
5369 # loop over all whitespace items created for the current batch
5370 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
5371 my $item = $gnu_item_list[$i];
5373 # item must still be open to be a candidate (otherwise it
5374 # cannot influence the current token)
5375 next if ( $item->get_closed() >= 0 );
5377 my $available_spaces = $item->get_available_spaces();
5379 if ( $available_spaces > 0 ) {
5380 push( @candidates, [ $i, $available_spaces ] );
5384 return unless (@candidates);
5386 # sort by available whitespace so that we can remove whitespace
5387 # from the maximum available first
5388 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
5390 # keep removing whitespace until we are done or have no more
5391 foreach my $candidate (@candidates) {
5392 my ( $i, $available_spaces ) = @{$candidate};
5393 my $deleted_spaces =
5394 ( $available_spaces > $spaces_needed )
5396 : $available_spaces;
5398 # remove the incremental space from this item
5399 $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
5403 # update the leading whitespace of this item and all items
5404 # that came after it
5405 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
5407 my $old_spaces = $gnu_item_list[$i]->get_spaces();
5408 if ( $old_spaces >= $deleted_spaces ) {
5409 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
5412 # shouldn't happen except for code bug:
5414 my $level = $gnu_item_list[$i_debug]->get_level();
5415 my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
5416 my $old_level = $gnu_item_list[$i]->get_level();
5417 my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
5419 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
5421 report_definite_bug();
5424 $gnu_position_predictor -= $deleted_spaces;
5425 $spaces_needed -= $deleted_spaces;
5426 last unless ( $spaces_needed > 0 );
5431 sub finish_lp_batch {
5433 # This routine is called once after each output stream batch is
5434 # finished to undo indentation for all incomplete -lp
5435 # indentation levels. It is too risky to leave a level open,
5436 # because then we can't backtrack in case of a long line to follow.
5437 # This means that comments and blank lines will disrupt this
5438 # indentation style. But the vertical aligner may be able to
5439 # get the space back if there are side comments.
5441 # this is only for the 'lp' style
5442 return unless ($rOpts_line_up_parentheses);
5444 # nothing can be done if no stack items defined for this line
5445 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
5447 # loop over all whitespace items created for the current batch
5448 foreach my $i ( 0 .. $max_gnu_item_index ) {
5449 my $item = $gnu_item_list[$i];
5451 # only look for open items
5452 next if ( $item->get_closed() >= 0 );
5454 # Tentatively remove all of the available space
5455 # (The vertical aligner will try to get it back later)
5456 my $available_spaces = $item->get_available_spaces();
5457 if ( $available_spaces > 0 ) {
5459 # delete incremental space for this item
5461 ->tentatively_decrease_available_spaces($available_spaces);
5463 # Reduce the total indentation space of any nodes that follow
5464 # Note that any such nodes must necessarily be dependents
5466 foreach ( $i + 1 .. $max_gnu_item_index ) {
5467 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
5474 sub reduce_lp_indentation {
5476 # reduce the leading whitespace at token $i if possible by $spaces_needed
5477 # (a large value of $spaces_needed will remove all excess space)
5478 # NOTE: to be called from scan_list only for a sequence of tokens
5479 # contained between opening and closing parens/braces/brackets
5481 my ( $i, $spaces_wanted ) = @_;
5482 my $deleted_spaces = 0;
5484 my $item = $leading_spaces_to_go[$i];
5485 my $available_spaces = $item->get_available_spaces();
5488 $available_spaces > 0
5489 && ( ( $spaces_wanted <= $available_spaces )
5490 || !$item->get_have_child() )
5494 # we'll remove these spaces, but mark them as recoverable
5496 $item->tentatively_decrease_available_spaces($spaces_wanted);
5499 return $deleted_spaces;
5502 sub token_sequence_length {
5504 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
5505 # returns 0 if $ibeg > $iend (shouldn't happen)
5506 my ( $ibeg, $iend ) = @_;
5507 return 0 if ( $iend < 0 || $ibeg > $iend );
5508 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
5509 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
5512 sub total_line_length {
5514 # return length of a line of tokens ($ibeg .. $iend)
5515 my ( $ibeg, $iend ) = @_;
5516 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
5519 sub maximum_line_length_for_level {
5521 # return maximum line length for line starting with a given level
5522 my $maximum_line_length = $rOpts_maximum_line_length;
5524 # Modify if -vmll option is selected
5525 if ($rOpts_variable_maximum_line_length) {
5527 if ( $level < 0 ) { $level = 0 }
5528 $maximum_line_length += $level * $rOpts_indent_columns;
5530 return $maximum_line_length;
5533 sub maximum_line_length {
5535 # return maximum line length for line starting with the token at given index
5537 return maximum_line_length_for_level( $levels_to_go[$ii] );
5540 sub excess_line_length {
5542 # return number of characters by which a line of tokens ($ibeg..$iend)
5543 # exceeds the allowable line length.
5544 my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
5546 # Include left and right weld lengths unless requested not to
5547 my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
5548 my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
5550 return total_line_length( $ibeg, $iend ) + $wl + $wr -
5551 maximum_line_length($ibeg);
5556 # flush buffer and write any informative messages
5560 $file_writer_object->decrement_output_line_number()
5561 ; # fix up line number since it was incremented
5562 we_are_at_the_last_line();
5563 if ( $added_semicolon_count > 0 ) {
5564 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
5566 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
5567 write_logfile_entry("$added_semicolon_count $what added:\n");
5568 write_logfile_entry(
5569 " $first at input line $first_added_semicolon_at\n");
5571 if ( $added_semicolon_count > 1 ) {
5572 write_logfile_entry(
5573 " Last at input line $last_added_semicolon_at\n");
5575 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
5576 write_logfile_entry("\n");
5579 if ( $deleted_semicolon_count > 0 ) {
5580 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
5582 ( $deleted_semicolon_count > 1 )
5585 write_logfile_entry(
5586 "$deleted_semicolon_count unnecessary $what deleted:\n");
5587 write_logfile_entry(
5588 " $first at input line $first_deleted_semicolon_at\n");
5590 if ( $deleted_semicolon_count > 1 ) {
5591 write_logfile_entry(
5592 " Last at input line $last_deleted_semicolon_at\n");
5594 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
5595 write_logfile_entry("\n");
5598 if ( $embedded_tab_count > 0 ) {
5599 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
5601 ( $embedded_tab_count > 1 )
5602 ? "quotes or patterns"
5603 : "quote or pattern";
5604 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
5605 write_logfile_entry(
5606 "This means the display of this script could vary with device or software\n"
5608 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
5610 if ( $embedded_tab_count > 1 ) {
5611 write_logfile_entry(
5612 " Last at input line $last_embedded_tab_at\n");
5614 write_logfile_entry("\n");
5617 if ($first_tabbing_disagreement) {
5618 write_logfile_entry(
5619 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
5623 if ($in_tabbing_disagreement) {
5624 write_logfile_entry(
5625 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
5630 if ($last_tabbing_disagreement) {
5632 write_logfile_entry(
5633 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
5637 write_logfile_entry("No indentation disagreement seen\n");
5640 if ($first_tabbing_disagreement) {
5641 write_logfile_entry(
5642 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
5645 write_logfile_entry("\n");
5647 $vertical_aligner_object->report_anything_unusual();
5649 $file_writer_object->report_line_length_errors();
5656 # This routine is called to check the Opts hash after it is defined
5659 initialize_whitespace_hashes();
5660 initialize_bond_strength_hashes();
5662 make_static_block_comment_pattern();
5663 make_static_side_comment_pattern();
5664 make_closing_side_comment_prefix();
5665 make_closing_side_comment_list_pattern();
5666 $format_skipping_pattern_begin =
5667 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
5668 $format_skipping_pattern_end =
5669 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
5671 # If closing side comments ARE selected, then we can safely
5672 # delete old closing side comments unless closing side comment
5673 # warnings are requested. This is a good idea because it will
5674 # eliminate any old csc's which fall below the line count threshold.
5675 # We cannot do this if warnings are turned on, though, because we
5676 # might delete some text which has been added. So that must
5677 # be handled when comments are created.
5678 if ( $rOpts->{'closing-side-comments'} ) {
5679 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
5680 $rOpts->{'delete-closing-side-comments'} = 1;
5684 # If closing side comments ARE NOT selected, but warnings ARE
5685 # selected and we ARE DELETING csc's, then we will pretend to be
5686 # adding with a huge interval. This will force the comments to be
5687 # generated for comparison with the old comments, but not added.
5688 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
5689 if ( $rOpts->{'delete-closing-side-comments'} ) {
5690 $rOpts->{'delete-closing-side-comments'} = 0;
5691 $rOpts->{'closing-side-comments'} = 1;
5692 $rOpts->{'closing-side-comment-interval'} = 100000000;
5696 make_sub_matching_pattern();
5698 make_block_brace_vertical_tightness_pattern();
5699 make_blank_line_pattern();
5700 make_keyword_group_list_pattern();
5702 # Make initial list of desired one line block types
5703 # They will be modified by 'prepare_cuddled_block_types'
5704 %want_one_line_block = %is_sort_map_grep_eval;
5706 prepare_cuddled_block_types();
5707 if ( $rOpts->{'dump-cuddled-block-list'} ) {
5708 dump_cuddled_block_list(*STDOUT);
5712 if ( $rOpts->{'line-up-parentheses'} ) {
5714 if ( $rOpts->{'indent-only'}
5715 || !$rOpts->{'add-newlines'}
5716 || !$rOpts->{'delete-old-newlines'} )
5719 -----------------------------------------------------------------------
5720 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
5722 The -lp indentation logic requires that perltidy be able to coordinate
5723 arbitrarily large numbers of line breakpoints. This isn't possible
5724 with these flags. Sometimes an acceptable workaround is to use -wocb=3
5725 -----------------------------------------------------------------------
5727 $rOpts->{'line-up-parentheses'} = 0;
5731 # At present, tabs are not compatible with the line-up-parentheses style
5732 # (it would be possible to entab the total leading whitespace
5733 # just prior to writing the line, if desired).
5734 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
5736 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
5738 $rOpts->{'tabs'} = 0;
5741 # Likewise, tabs are not compatible with outdenting..
5742 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
5744 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
5746 $rOpts->{'tabs'} = 0;
5749 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
5751 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
5753 $rOpts->{'tabs'} = 0;
5756 if ( !$rOpts->{'space-for-semicolon'} ) {
5757 $want_left_space{'f'} = -1;
5760 if ( $rOpts->{'space-terminal-semicolon'} ) {
5761 $want_left_space{';'} = 1;
5764 # implement outdenting preferences for keywords
5765 %outdent_keyword = ();
5766 my @okw = split_words( $rOpts->{'outdent-keyword-okl'} );
5768 @okw = qw(next last redo goto return); # defaults
5771 # FUTURE: if not a keyword, assume that it is an identifier
5773 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
5774 $outdent_keyword{$_} = 1;
5777 Warn("ignoring '$_' in -okwl list; not a perl keyword");
5781 # implement user whitespace preferences
5782 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
5783 @want_left_space{@q} = (1) x scalar(@q);
5786 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
5787 @want_right_space{@q} = (1) x scalar(@q);
5790 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
5791 @want_left_space{@q} = (-1) x scalar(@q);
5794 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
5795 @want_right_space{@q} = (-1) x scalar(@q);
5797 if ( $rOpts->{'dump-want-left-space'} ) {
5798 dump_want_left_space(*STDOUT);
5802 if ( $rOpts->{'dump-want-right-space'} ) {
5803 dump_want_right_space(*STDOUT);
5807 # default keywords for which space is introduced before an opening paren
5808 # (at present, including them messes up vertical alignment)
5809 my @sak = qw(my local our and or err eq ne if else elsif until
5810 unless while for foreach return switch case given when catch);
5811 @space_after_keyword{@sak} = (1) x scalar(@sak);
5813 # first remove any or all of these if desired
5814 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
5816 # -nsak='*' selects all the above keywords
5817 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
5818 @space_after_keyword{@q} = (0) x scalar(@q);
5821 # then allow user to add to these defaults
5822 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
5823 @space_after_keyword{@q} = (1) x scalar(@q);
5826 # implement user break preferences
5827 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
5828 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
5829 . : ? && || and or err xor
5832 my $break_after = sub {
5834 foreach my $tok (@toks) {
5835 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
5836 my $lbs = $left_bond_strength{$tok};
5837 my $rbs = $right_bond_strength{$tok};
5838 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
5839 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
5845 my $break_before = sub {
5847 foreach my $tok (@toks) {
5848 my $lbs = $left_bond_strength{$tok};
5849 my $rbs = $right_bond_strength{$tok};
5850 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
5851 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
5857 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
5858 $break_before->(@all_operators)
5859 if ( $rOpts->{'break-before-all-operators'} );
5861 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
5862 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
5864 # make note if breaks are before certain key types
5865 %want_break_before = ();
5866 foreach my $tok ( @all_operators, ',' ) {
5867 $want_break_before{$tok} =
5868 $left_bond_strength{$tok} < $right_bond_strength{$tok};
5871 # Coordinate ?/: breaks, which must be similar
5872 if ( !$want_break_before{':'} ) {
5873 $want_break_before{'?'} = $want_break_before{':'};
5874 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
5875 $left_bond_strength{'?'} = NO_BREAK;
5878 # Define here tokens which may follow the closing brace of a do statement
5879 # on the same line, as in:
5880 # } while ( $something);
5881 my @dof = qw(until while unless if ; : );
5883 @is_do_follower{@dof} = (1) x scalar(@dof);
5885 # What tokens may follow the closing brace of an if or elsif block?
5886 # Not used. Previously used for cuddled else, but no longer needed.
5887 %is_if_brace_follower = ();
5889 # nothing can follow the closing curly of an else { } block:
5890 %is_else_brace_follower = ();
5892 # what can follow a multi-line anonymous sub definition closing curly:
5893 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
5895 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
5897 # what can follow a one-line anonymous sub closing curly:
5898 # one-line anonymous subs also have ']' here...
5899 # see tk3.t and PP.pm
5900 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
5902 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
5904 # What can follow a closing curly of a block
5905 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
5906 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
5907 my @obf = qw# ; : => or and && || ) #;
5909 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
5911 $right_bond_strength{'{'} = WEAK;
5912 $left_bond_strength{'{'} = VERY_STRONG;
5914 # make -l=0 equal to -l=infinite
5915 if ( !$rOpts->{'maximum-line-length'} ) {
5916 $rOpts->{'maximum-line-length'} = 1000000;
5919 # make -lbl=0 equal to -lbl=infinite
5920 if ( !$rOpts->{'long-block-line-count'} ) {
5921 $rOpts->{'long-block-line-count'} = 1000000;
5924 my $enc = $rOpts->{'character-encoding'};
5925 if ( $enc && $enc !~ /^(none|utf8)$/i ) {
5927 Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
5931 my $ole = $rOpts->{'output-line-ending'};
5940 # Patch for RT #99514, a memoization issue.
5941 # Normally, the user enters one of 'dos', 'win', etc, and we change the
5942 # value in the options parameter to be the corresponding line ending
5943 # character. But, if we are using memoization, on later passes through
5944 # here the option parameter will already have the desired ending
5945 # character rather than the keyword 'dos', 'win', etc. So
5946 # we must check to see if conversion has already been done and, if so,
5947 # bypass the conversion step.
5948 my %endings_inverted = (
5949 "\015\012" => 'dos',
5950 "\015\012" => 'win',
5955 if ( defined( $endings_inverted{$ole} ) ) {
5957 # we already have valid line ending, nothing more to do
5961 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
5962 my $str = join " ", keys %endings;
5964 Unrecognized line ending '$ole'; expecting one of: $str
5967 if ( $rOpts->{'preserve-line-endings'} ) {
5968 Warn("Ignoring -ple; conflicts with -ole\n");
5969 $rOpts->{'preserve-line-endings'} = undef;
5974 # hashes used to simplify setting whitespace
5976 '{' => $rOpts->{'brace-tightness'},
5977 '}' => $rOpts->{'brace-tightness'},
5978 '(' => $rOpts->{'paren-tightness'},
5979 ')' => $rOpts->{'paren-tightness'},
5980 '[' => $rOpts->{'square-bracket-tightness'},
5981 ']' => $rOpts->{'square-bracket-tightness'},
5990 if ( $rOpts->{'ignore-old-breakpoints'} ) {
5991 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
5992 Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
5995 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
5996 Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
6000 # Note: there are additional parameters that can be made inactive by
6001 # -iob, but they are on by default so we would generate excessive
6002 # warnings if we noted them. They are:
6003 # $rOpts->{'break-at-old-keyword-breakpoints'}
6004 # $rOpts->{'break-at-old-logical-breakpoints'}
6005 # $rOpts->{'break-at-old-ternary-breakpoints'}
6006 # $rOpts->{'break-at-old-attribute-breakpoints'}
6009 # frequently used parameters
6010 $rOpts_add_newlines = $rOpts->{'add-newlines'};
6011 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
6012 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
6013 $rOpts_block_brace_vertical_tightness =
6014 $rOpts->{'block-brace-vertical-tightness'};
6015 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
6016 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
6017 $rOpts_break_at_old_ternary_breakpoints =
6018 $rOpts->{'break-at-old-ternary-breakpoints'};
6019 $rOpts_break_at_old_attribute_breakpoints =
6020 $rOpts->{'break-at-old-attribute-breakpoints'};
6021 $rOpts_break_at_old_comma_breakpoints =
6022 $rOpts->{'break-at-old-comma-breakpoints'};
6023 $rOpts_break_at_old_keyword_breakpoints =
6024 $rOpts->{'break-at-old-keyword-breakpoints'};
6025 $rOpts_break_at_old_logical_breakpoints =
6026 $rOpts->{'break-at-old-logical-breakpoints'};
6027 $rOpts_break_at_old_method_breakpoints =
6028 $rOpts->{'break-at-old-method-breakpoints'};
6029 $rOpts_closing_side_comment_else_flag =
6030 $rOpts->{'closing-side-comment-else-flag'};
6031 $rOpts_closing_side_comment_maximum_text =
6032 $rOpts->{'closing-side-comment-maximum-text'};
6033 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
6034 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
6035 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
6036 $rOpts_indent_columns = $rOpts->{'indent-columns'};
6037 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
6038 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
6039 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
6040 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
6041 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
6043 $rOpts_variable_maximum_line_length =
6044 $rOpts->{'variable-maximum-line-length'};
6045 $rOpts_short_concatenation_item_length =
6046 $rOpts->{'short-concatenation-item-length'};
6048 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
6049 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
6050 $rOpts_format_skipping = $rOpts->{'format-skipping'};
6051 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
6052 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
6053 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
6054 $rOpts_ignore_side_comment_lengths =
6055 $rOpts->{'ignore-side-comment-lengths'};
6057 # Note that both opening and closing tokens can access the opening
6058 # and closing flags of their container types.
6059 %opening_vertical_tightness = (
6060 '(' => $rOpts->{'paren-vertical-tightness'},
6061 '{' => $rOpts->{'brace-vertical-tightness'},
6062 '[' => $rOpts->{'square-bracket-vertical-tightness'},
6063 ')' => $rOpts->{'paren-vertical-tightness'},
6064 '}' => $rOpts->{'brace-vertical-tightness'},
6065 ']' => $rOpts->{'square-bracket-vertical-tightness'},
6068 %closing_vertical_tightness = (
6069 '(' => $rOpts->{'paren-vertical-tightness-closing'},
6070 '{' => $rOpts->{'brace-vertical-tightness-closing'},
6071 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
6072 ')' => $rOpts->{'paren-vertical-tightness-closing'},
6073 '}' => $rOpts->{'brace-vertical-tightness-closing'},
6074 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
6077 # assume flag for '>' same as ')' for closing qw quotes
6078 %closing_token_indentation = (
6079 ')' => $rOpts->{'closing-paren-indentation'},
6080 '}' => $rOpts->{'closing-brace-indentation'},
6081 ']' => $rOpts->{'closing-square-bracket-indentation'},
6082 '>' => $rOpts->{'closing-paren-indentation'},
6085 # flag indicating if any closing tokens are indented
6086 $some_closing_token_indentation =
6087 $rOpts->{'closing-paren-indentation'}
6088 || $rOpts->{'closing-brace-indentation'}
6089 || $rOpts->{'closing-square-bracket-indentation'}
6090 || $rOpts->{'indent-closing-brace'};
6092 %opening_token_right = (
6093 '(' => $rOpts->{'opening-paren-right'},
6094 '{' => $rOpts->{'opening-hash-brace-right'},
6095 '[' => $rOpts->{'opening-square-bracket-right'},
6098 %stack_opening_token = (
6099 '(' => $rOpts->{'stack-opening-paren'},
6100 '{' => $rOpts->{'stack-opening-hash-brace'},
6101 '[' => $rOpts->{'stack-opening-square-bracket'},
6104 %stack_closing_token = (
6105 ')' => $rOpts->{'stack-closing-paren'},
6106 '}' => $rOpts->{'stack-closing-hash-brace'},
6107 ']' => $rOpts->{'stack-closing-square-bracket'},
6109 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
6110 $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
6116 # See if a pattern will compile. We have to use a string eval here,
6117 # but it should be safe because the pattern has been constructed
6120 eval "'##'=~/$pattern/";
6127 # Add keywords here which really should not be cuddled
6129 my @q = qw(if unless for foreach while);
6130 @no_cuddle{@q} = (1) x scalar(@q);
6133 sub prepare_cuddled_block_types {
6135 # the cuddled-else style, if used, is controlled by a hash that
6138 # Include keywords here which should not be cuddled
6140 my $cuddled_string = "";
6141 if ( $rOpts->{'cuddled-else'} ) {
6144 $cuddled_string = 'elsif else continue catch finally'
6145 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
6147 # This is the old equivalent but more complex version
6148 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
6150 # Add users other blocks to be cuddled
6151 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
6152 if ($cuddled_block_list) {
6153 $cuddled_string .= " " . $cuddled_block_list;
6158 # If we have a cuddled string of the form
6159 # 'try-catch-finally'
6161 # we want to prepare a hash of the form
6163 # $rcuddled_block_types = {
6170 # use -dcbl to dump this hash
6172 # Multiple such strings are input as a space or comma separated list
6174 # If we get two lists with the same leading type, such as
6175 # -cbl = "-try-catch-finally -try-catch-otherwise"
6176 # then they will get merged as follows:
6177 # $rcuddled_block_types = {
6184 # This will allow either type of chain to be followed.
6186 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
6187 my @cuddled_strings = split /\s+/, $cuddled_string;
6189 $rcuddled_block_types = {};
6191 # process each dash-separated string...
6192 my $string_count = 0;
6193 foreach my $string (@cuddled_strings) {
6194 next unless $string;
6195 my @words = split /-+/, $string; # allow multiple dashes
6197 # we could look for and report possible errors here...
6198 next unless ( @words > 0 );
6200 # allow either '-continue' or *-continue' for arbitrary starting type
6203 # a single word without dashes is a secondary block type
6205 $start = shift @words;
6208 # always make an entry for the leading word. If none follow, this
6209 # will still prevent a wildcard from matching this word.
6210 if ( !defined( $rcuddled_block_types->{$start} ) ) {
6211 $rcuddled_block_types->{$start} = {};
6214 # The count gives the original word order in case we ever want it.
6217 foreach my $word (@words) {
6219 if ( $no_cuddle{$word} ) {
6221 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
6226 $rcuddled_block_types->{$start}->{$word} =
6227 1; #"$string_count.$word_count";
6229 # git#9: Remove this word from the list of desired one-line
6231 $want_one_line_block{$word} = 0;
6238 sub dump_cuddled_block_list {
6241 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
6242 # which controls this routine
6243 # my $rcuddled_block_types = {
6254 # SIMPLFIED METHOD: the simplified method uses a wildcard for
6255 # the starting block type and puts all cuddled blocks together:
6256 # my $rcuddled_block_types = {
6265 # Both methods work, but the simplified method has proven to be adequate and
6268 my $cuddled_string = $rOpts->{'cuddled-block-list'};
6269 $cuddled_string = '' unless $cuddled_string;
6272 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
6273 $flags .= " -cbl='$cuddled_string'";
6275 unless ( $rOpts->{'cuddled-else'} ) {
6276 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
6280 ------------------------------------------------------------------------
6281 Hash of cuddled block types prepared for a run with these parameters:
6283 ------------------------------------------------------------------------
6287 $fh->print( Dumper($rcuddled_block_types) );
6290 ------------------------------------------------------------------------
6295 sub make_static_block_comment_pattern {
6297 # create the pattern used to identify static block comments
6298 $static_block_comment_pattern = '^\s*##';
6300 # allow the user to change it
6301 if ( $rOpts->{'static-block-comment-prefix'} ) {
6302 my $prefix = $rOpts->{'static-block-comment-prefix'};
6303 $prefix =~ s/^\s*//;
6304 my $pattern = $prefix;
6306 # user may give leading caret to force matching left comments only
6307 if ( $prefix !~ /^\^#/ ) {
6308 if ( $prefix !~ /^#/ ) {
6310 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
6313 $pattern = '^\s*' . $prefix;
6315 if ( bad_pattern($pattern) ) {
6317 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
6320 $static_block_comment_pattern = $pattern;
6325 sub make_format_skipping_pattern {
6326 my ( $opt_name, $default ) = @_;
6327 my $param = $rOpts->{$opt_name};
6328 unless ($param) { $param = $default }
6330 if ( $param !~ /^#/ ) {
6331 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
6333 my $pattern = '^' . $param . '\s';
6334 if ( bad_pattern($pattern) ) {
6336 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
6342 sub make_closing_side_comment_list_pattern {
6344 # turn any input list into a regex for recognizing selected block types
6345 $closing_side_comment_list_pattern = '^\w+';
6346 if ( defined( $rOpts->{'closing-side-comment-list'} )
6347 && $rOpts->{'closing-side-comment-list'} )
6349 $closing_side_comment_list_pattern =
6350 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
6355 sub make_sub_matching_pattern {
6357 $SUB_PATTERN = '^sub\s+(::|\w)';
6358 $ASUB_PATTERN = '^sub$';
6360 if ( $rOpts->{'sub-alias-list'} ) {
6362 # Note that any 'sub-alias-list' has been preprocessed to
6363 # be a trimmed, space-separated list which includes 'sub'
6364 # for example, it might be 'sub method fun'
6365 my $sub_alias_list = $rOpts->{'sub-alias-list'};
6366 $sub_alias_list =~ s/\s+/\|/g;
6367 $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
6368 $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
6373 sub make_bli_pattern {
6375 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
6376 && $rOpts->{'brace-left-and-indent-list'} )
6378 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
6381 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
6385 sub make_keyword_group_list_pattern {
6387 # turn any input list into a regex for recognizing selected block types.
6388 # Here are the defaults:
6389 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
6390 $keyword_group_list_comment_pattern = '';
6391 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
6392 && $rOpts->{'keyword-group-blanks-list'} )
6394 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
6397 foreach my $word (@words) {
6398 if ( $word =~ /^(BC|SBC)$/ ) {
6399 push @comment_list, $word;
6400 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
6403 push @keyword_list, $word;
6406 $keyword_group_list_pattern =
6407 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
6408 $keyword_group_list_comment_pattern =
6409 make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
6414 sub make_block_brace_vertical_tightness_pattern {
6416 # turn any input list into a regex for recognizing selected block types
6417 $block_brace_vertical_tightness_pattern =
6418 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6419 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
6420 && $rOpts->{'block-brace-vertical-tightness-list'} )
6422 $block_brace_vertical_tightness_pattern =
6423 make_block_pattern( '-bbvtl',
6424 $rOpts->{'block-brace-vertical-tightness-list'} );
6429 sub make_blank_line_pattern {
6431 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
6432 my $key = 'blank-lines-before-closing-block-list';
6433 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
6434 $blank_lines_before_closing_block_pattern =
6435 make_block_pattern( '-blbcl', $rOpts->{$key} );
6438 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
6439 $key = 'blank-lines-after-opening-block-list';
6440 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
6441 $blank_lines_after_opening_block_pattern =
6442 make_block_pattern( '-blaol', $rOpts->{$key} );
6447 sub make_block_pattern {
6449 # given a string of block-type keywords, return a regex to match them
6450 # The only tricky part is that labels are indicated with a single ':'
6451 # and the 'sub' token text may have additional text after it (name of
6456 # input string: "if else elsif unless while for foreach do : sub";
6457 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6461 # To distinguish between anonymous subs and named subs, use 'sub' to
6462 # indicate a named sub, and 'asub' to indicate an anonymous sub
6464 my ( $abbrev, $string ) = @_;
6465 my @list = split_words($string);
6469 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
6472 if ( $i eq 'sub' ) {
6474 elsif ( $i eq 'asub' ) {
6476 elsif ( $i eq ';' ) {
6479 elsif ( $i eq '{' ) {
6482 elsif ( $i eq ':' ) {
6483 push @words, '\w+:';
6485 elsif ( $i =~ /^\w/ ) {
6489 Warn("unrecognized block type $i after $abbrev, ignoring\n");
6492 my $pattern = '(' . join( '|', @words ) . ')$';
6493 my $sub_patterns = "";
6494 if ( $seen{'sub'} ) {
6495 $sub_patterns .= '|' . $SUB_PATTERN;
6497 if ( $seen{'asub'} ) {
6498 $sub_patterns .= '|' . $ASUB_PATTERN;
6500 if ($sub_patterns) {
6501 $pattern = '(' . $pattern . $sub_patterns . ')';
6503 $pattern = '^' . $pattern;
6507 sub make_static_side_comment_pattern {
6509 # create the pattern used to identify static side comments
6510 $static_side_comment_pattern = '^##';
6512 # allow the user to change it
6513 if ( $rOpts->{'static-side-comment-prefix'} ) {
6514 my $prefix = $rOpts->{'static-side-comment-prefix'};
6515 $prefix =~ s/^\s*//;
6516 my $pattern = '^' . $prefix;
6517 if ( bad_pattern($pattern) ) {
6519 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
6522 $static_side_comment_pattern = $pattern;
6527 sub make_closing_side_comment_prefix {
6529 # Be sure we have a valid closing side comment prefix
6530 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
6531 my $csc_prefix_pattern;
6532 if ( !defined($csc_prefix) ) {
6533 $csc_prefix = '## end';
6534 $csc_prefix_pattern = '^##\s+end';
6537 my $test_csc_prefix = $csc_prefix;
6538 if ( $test_csc_prefix !~ /^#/ ) {
6539 $test_csc_prefix = '#' . $test_csc_prefix;
6542 # make a regex to recognize the prefix
6543 my $test_csc_prefix_pattern = $test_csc_prefix;
6545 # escape any special characters
6546 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
6548 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
6550 # allow exact number of intermediate spaces to vary
6551 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
6553 # make sure we have a good pattern
6554 # if we fail this we probably have an error in escaping
6557 if ( bad_pattern($test_csc_prefix_pattern) ) {
6559 # shouldn't happen..must have screwed up escaping, above
6560 report_definite_bug();
6562 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
6565 # just warn and keep going with defaults
6566 Warn("Please consider using a simpler -cscp prefix\n");
6567 Warn("Using default -cscp instead; please check output\n");
6570 $csc_prefix = $test_csc_prefix;
6571 $csc_prefix_pattern = $test_csc_prefix_pattern;
6574 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
6575 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
6579 sub dump_want_left_space {
6583 These values are the main control of whitespace to the left of a token type;
6584 They may be altered with the -wls parameter.
6585 For a list of token types, use perltidy --dump-token-types (-dtt)
6586 1 means the token wants a space to its left
6587 -1 means the token does not want a space to its left
6588 ------------------------------------------------------------------------
6590 foreach my $key ( sort keys %want_left_space ) {
6591 print $fh "$key\t$want_left_space{$key}\n";
6596 sub dump_want_right_space {
6600 These values are the main control of whitespace to the right of a token type;
6601 They may be altered with the -wrs parameter.
6602 For a list of token types, use perltidy --dump-token-types (-dtt)
6603 1 means the token wants a space to its right
6604 -1 means the token does not want a space to its right
6605 ------------------------------------------------------------------------
6607 foreach my $key ( sort keys %want_right_space ) {
6608 print $fh "$key\t$want_right_space{$key}\n";
6613 { # begin is_essential_whitespace
6615 my %is_sort_grep_map;
6621 @q = qw(sort grep map);
6622 @is_sort_grep_map{@q} = (1) x scalar(@q);
6624 @q = qw(for foreach);
6625 @is_for_foreach{@q} = (1) x scalar(@q);
6629 sub is_essential_whitespace {
6631 # Essential whitespace means whitespace which cannot be safely deleted
6632 # without risking the introduction of a syntax error.
6633 # We are given three tokens and their types:
6634 # ($tokenl, $typel) is the token to the left of the space in question
6635 # ($tokenr, $typer) is the token to the right of the space in question
6636 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
6638 # This is a slow routine but is not needed too often except when -mangle
6641 # Note: This routine should almost never need to be changed. It is
6642 # for avoiding syntax problems rather than for formatting.
6643 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
6647 # never combine two bare words or numbers
6648 # examples: and ::ok(1)
6650 # for bla::bla:: abc
6651 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
6652 # $input eq"quit" to make $inputeq"quit"
6653 # my $size=-s::SINK if $file; <==OK but we won't do it
6654 # don't join something like: for bla::bla:: abc
6655 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
6656 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
6657 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
6659 # do not combine a number with a concatenation dot
6660 # example: pom.caputo:
6661 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
6662 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
6663 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
6665 # do not join a minus with a bare word, because you might form
6666 # a file test operator. Example from Complex.pm:
6667 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
6668 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
6670 # do not join a bare word with a minus, like between 'Send' and
6671 # '-recipients' here <<snippets/space3.in>>
6672 # my $msg = new Fax::Send
6673 # -recipients => $to,
6675 # This is the safest thing to do. If we had the token to the right of
6676 # the minus we could do a better check.
6677 || ( ( $tokenr eq '-' ) && ( $typel eq 'w' ) )
6679 # and something like this could become ambiguous without space
6681 # use constant III=>1;
6685 || ( ( $tokenl eq '-' )
6686 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
6688 # '= -' should not become =- or you will get a warning
6690 # || ($tokenr eq '-')
6692 # keep a space between a quote and a bareword to prevent the
6693 # bareword from becoming a quote modifier.
6694 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
6696 # keep a space between a token ending in '$' and any word;
6697 # this caused trouble: "die @$ if $@"
6698 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
6699 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
6701 # perl is very fussy about spaces before <<
6702 || ( $tokenr =~ /^\<\</ )
6704 # avoid combining tokens to create new meanings. Example:
6705 # $a+ +$b must not become $a++$b
6706 || ( $is_digraph{ $tokenl . $tokenr } )
6707 || ( $is_trigraph{ $tokenl . $tokenr } )
6709 # another example: do not combine these two &'s:
6710 # allow_options & &OPT_EXECCGI
6711 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
6713 # don't combine $$ or $# with any alphanumeric
6714 # (testfile mangle.t with --mangle)
6715 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
6717 # retain any space after possible filehandle
6718 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
6719 || ( $typel eq 'Z' )
6721 # Perl is sensitive to whitespace after the + here:
6722 # $b = xvals $a + 0.1 * yvals $a;
6723 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
6725 # keep paren separate in 'use Foo::Bar ()'
6729 && $tokenll eq 'use' )
6731 # keep any space between filehandle and paren:
6732 # file mangle.t with --mangle:
6733 || ( $typel eq 'Y' && $tokenr eq '(' )
6735 # retain any space after here doc operator ( hereerr.t)
6736 || ( $typel eq 'h' )
6738 # be careful with a space around ++ and --, to avoid ambiguity as to
6739 # which token it applies
6740 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
6741 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
6743 # need space after foreach my; for example, this will fail in
6744 # older versions of Perl:
6745 # foreach my$ft(@filetypes)...
6750 && $is_for_foreach{$tokenll}
6754 # must have space between grep and left paren; "grep(" will fail
6755 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
6757 # don't stick numbers next to left parens, as in:
6758 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
6759 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
6761 # We must be sure that a space between a ? and a quoted string
6762 # remains if the space before the ? remains. [Loca.pm, lockarea]
6764 # $b=join $comma ? ',' : ':', @_; # ok
6765 # $b=join $comma?',' : ':', @_; # ok!
6766 # $b=join $comma ?',' : ':', @_; # error!
6767 # Not really required:
6768 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
6770 # do not remove space between an '&' and a bare word because
6771 # it may turn into a function evaluation, like here
6772 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
6773 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
6774 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
6776 # space stacked labels (TODO: check if really necessary)
6777 || ( $typel eq 'J' && $typer eq 'J' )
6779 ; # the value of this long logic sequence is the result we want
6780 ##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
6786 my %secret_operators;
6787 my %is_leading_secret_token;
6791 # token lists for perl secret operators as compiled by Philippe Bruhat
6792 # at: https://metacpan.org/module/perlsecret
6793 %secret_operators = (
6794 'Goatse' => [qw#= ( ) =#], #=( )=
6795 'Venus1' => [qw#0 +#], # 0+
6796 'Venus2' => [qw#+ 0#], # +0
6797 'Enterprise' => [qw#) x ! !#], # ()x!!
6798 'Kite1' => [qw#~ ~ <>#], # ~~<>
6799 'Kite2' => [qw#~~ <>#], # ~~<>
6800 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
6801 'Bang bang ' => [qw#! !#], # !!
6804 # The following operators and constants are not included because they
6805 # are normally kept tight by perltidy:
6809 # Make a lookup table indexed by the first token of each operator:
6810 # first token => [list, list, ...]
6811 foreach my $value ( values(%secret_operators) ) {
6812 my $tok = $value->[0];
6813 push @{ $is_leading_secret_token{$tok} }, $value;
6817 sub new_secret_operator_whitespace {
6819 my ( $rlong_array, $rwhitespace_flags ) = @_;
6821 # Loop over all tokens in this line
6822 my ( $token, $type );
6823 my $jmax = @{$rlong_array} - 1;
6824 foreach my $j ( 0 .. $jmax ) {
6826 $token = $rlong_array->[$j]->[_TOKEN_];
6827 $type = $rlong_array->[$j]->[_TYPE_];
6829 # Skip unless this token might start a secret operator
6830 next if ( $type eq 'b' );
6831 next unless ( $is_leading_secret_token{$token} );
6833 # Loop over all secret operators with this leading token
6834 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
6836 foreach my $tok ( @{$rpattern} ) {
6841 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
6843 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
6852 # set flags to prevent spaces within this operator
6853 foreach my $jj ( $j + 1 .. $jend ) {
6854 $rwhitespace_flags->[$jj] = WS_NO;
6859 } ## End Loop over all operators
6860 } ## End loop over all tokens
6865 { # begin print_line_of_tokens
6867 my $rinput_token_array; # Current working array
6868 my $rinput_K_array; # Future working array
6871 my $guessed_indentation_level;
6873 # This should be a return variable from extract_token
6874 # These local token variables are stored by store_token_to_go:
6878 my $container_environment;
6880 my $in_continued_quote;
6882 my $no_internal_newlines;
6888 # routine to pull the jth token from the line of tokens
6890 my ( $self, $j ) = @_;
6892 my $rLL = $self->{rLL};
6893 $Ktoken_vars = $rinput_K_array->[$j];
6894 if ( !defined($Ktoken_vars) ) {
6896 # Shouldn't happen: an error here would be due to a recent program change
6897 Fault("undefined index K for j=$j");
6899 my $rtoken_vars = $rLL->[$Ktoken_vars];
6901 if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
6903 # Shouldn't happen: an error here would be due to a recent program change
6905 j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
6909 #########################################################
6910 # these are now redundant and can eventually be eliminated
6912 $token = $rtoken_vars->[_TOKEN_];
6913 $type = $rtoken_vars->[_TYPE_];
6914 $block_type = $rtoken_vars->[_BLOCK_TYPE_];
6915 $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
6916 $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
6917 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6918 $level = $rtoken_vars->[_LEVEL_];
6919 $slevel = $rtoken_vars->[_SLEVEL_];
6920 $ci_level = $rtoken_vars->[_CI_LEVEL_];
6921 #########################################################
6929 sub save_current_token {
6932 $block_type, $ci_level,
6933 $container_environment, $container_type,
6934 $in_continued_quote, $level,
6935 $no_internal_newlines, $slevel,
6937 $type_sequence, $Ktoken_vars,
6942 sub restore_current_token {
6944 $block_type, $ci_level,
6945 $container_environment, $container_type,
6946 $in_continued_quote, $level,
6947 $no_internal_newlines, $slevel,
6949 $type_sequence, $Ktoken_vars,
6957 # Returns the length of a token, given:
6958 # $token=text of the token
6960 # $not_first_token = should be TRUE if this is not the first token of
6961 # the line. It might the index of this token in an array. It is
6962 # used to test for a side comment vs a block comment.
6963 # Note: Eventually this should be the only routine determining the
6964 # length of a token in this package.
6965 my ( $token, $type, $not_first_token ) = @_;
6966 my $token_length = length($token);
6968 # We mark lengths of side comments as just 1 if we are
6969 # ignoring their lengths when setting line breaks.
6971 if ( $rOpts_ignore_side_comment_lengths
6974 return $token_length;
6979 # return length of ith token in @{$rtokens}
6981 return token_length( $rinput_token_array->[$i]->[_TOKEN_],
6982 $rinput_token_array->[$i]->[_TYPE_], $i );
6985 # Routine to place the current token into the output stream.
6986 # Called once per output token.
6987 sub store_token_to_go {
6989 my ( $self, $side_comment_follows ) = @_;
6991 my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
6994 $K_to_go[$max_index_to_go] = $Ktoken_vars;
6995 $tokens_to_go[$max_index_to_go] = $token;
6996 $types_to_go[$max_index_to_go] = $type;
6997 $nobreak_to_go[$max_index_to_go] = $flag;
6998 $old_breakpoint_to_go[$max_index_to_go] = 0;
6999 $forced_breakpoint_to_go[$max_index_to_go] = 0;
7000 $block_type_to_go[$max_index_to_go] = $block_type;
7001 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
7002 $container_environment_to_go[$max_index_to_go] = $container_environment;
7003 $ci_levels_to_go[$max_index_to_go] = $ci_level;
7004 $mate_index_to_go[$max_index_to_go] = -1;
7005 $bond_strength_to_go[$max_index_to_go] = 0;
7007 # Note: negative levels are currently retained as a diagnostic so that
7008 # the 'final indentation level' is correctly reported for bad scripts.
7009 # But this means that every use of $level as an index must be checked.
7010 # If this becomes too much of a problem, we might give up and just clip
7012 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
7013 $levels_to_go[$max_index_to_go] = $level;
7014 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
7016 # link the non-blank tokens
7017 my $iprev = $max_index_to_go - 1;
7018 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
7019 $iprev_to_go[$max_index_to_go] = $iprev;
7020 $inext_to_go[$iprev] = $max_index_to_go
7021 if ( $iprev >= 0 && $type ne 'b' );
7022 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
7024 $token_lengths_to_go[$max_index_to_go] =
7025 token_length( $token, $type, $max_index_to_go );
7027 # We keep a running sum of token lengths from the start of this batch:
7028 # summed_lengths_to_go[$i] = total length to just before token $i
7029 # summed_lengths_to_go[$i+1] = total length to just after token $i
7030 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
7031 $summed_lengths_to_go[$max_index_to_go] +
7032 $token_lengths_to_go[$max_index_to_go];
7034 # Define the indentation that this token would have if it started
7035 # a new line. We have to do this now because we need to know this
7036 # when considering one-line blocks.
7037 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
7039 # remember previous nonblank tokens seen
7040 if ( $type ne 'b' ) {
7041 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
7042 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
7043 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
7044 $last_nonblank_index_to_go = $max_index_to_go;
7045 $last_nonblank_type_to_go = $type;
7046 $last_nonblank_token_to_go = $token;
7047 if ( $type eq ',' ) {
7048 $comma_count_in_batch++;
7052 FORMATTER_DEBUG_FLAG_STORE && do {
7053 my ( $a, $b, $c ) = caller();
7055 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
7061 my ($rold_token_hash) = @_;
7062 my %new_token_hash =
7063 map { ( $_, $rold_token_hash->{$_} ) } keys %{$rold_token_hash};
7064 return \%new_token_hash;
7069 my @new = map { $_ } @{$rold};
7073 sub copy_token_as_type {
7074 my ( $rold_token, $type, $token ) = @_;
7075 if ( $type eq 'b' ) {
7076 $token = " " unless defined($token);
7078 elsif ( $type eq 'q' ) {
7079 $token = '' unless defined($token);
7081 elsif ( $type eq '->' ) {
7082 $token = '->' unless defined($token);
7084 elsif ( $type eq ';' ) {
7085 $token = ';' unless defined($token);
7089 "Programming error: copy_token_as has type $type but should be 'b' or 'q'"
7092 my $rnew_token = copy_array($rold_token);
7093 $rnew_token->[_TYPE_] = $type;
7094 $rnew_token->[_TOKEN_] = $token;
7095 $rnew_token->[_BLOCK_TYPE_] = '';
7096 $rnew_token->[_CONTAINER_TYPE_] = '';
7097 $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
7098 $rnew_token->[_TYPE_SEQUENCE_] = '';
7102 sub boolean_equals {
7103 my ( $val1, $val2 ) = @_;
7104 return ( $val1 && $val2 || !$val1 && !$val2 );
7107 sub print_line_of_tokens {
7109 my ( $self, $line_of_tokens ) = @_;
7111 # This routine is called once per input line to process all of
7112 # the tokens on that line. This is the first stage of
7115 # Full-line comments and blank lines may be processed immediately.
7117 # For normal lines of code, the tokens are stored one-by-one,
7118 # via calls to 'sub store_token_to_go', until a known line break
7119 # point is reached. Then, the batch of collected tokens is
7120 # passed along to 'sub output_line_to_go' for further
7121 # processing. This routine decides if there should be
7122 # whitespace between each pair of non-white tokens, so later
7123 # routines only need to decide on any additional line breaks.
7124 # Any whitespace is initially a single space character. Later,
7125 # the vertical aligner may expand that to be multiple space
7126 # characters if necessary for alignment.
7128 $input_line_number = $line_of_tokens->{_line_number};
7129 my $input_line = $line_of_tokens->{_line_text};
7130 my $CODE_type = $line_of_tokens->{_code_type};
7132 my $rK_range = $line_of_tokens->{_rK_range};
7133 my ( $K_first, $K_last ) = @{$rK_range};
7135 my $rLL = $self->{rLL};
7136 my $rbreak_container = $self->{rbreak_container};
7137 my $rshort_nested = $self->{rshort_nested};
7139 if ( !defined($K_first) ) {
7141 # Empty line: This can happen if tokens are deleted, for example
7142 # with the -mangle parameter
7146 $no_internal_newlines = 1 - $rOpts_add_newlines;
7148 ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
7149 my $is_static_block_comment_without_leading_space =
7150 $CODE_type eq 'SBCX';
7151 $is_static_block_comment =
7152 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
7153 my $is_hanging_side_comment = $CODE_type eq 'HSC';
7154 my $is_VERSION_statement = $CODE_type eq 'VER';
7155 if ($is_VERSION_statement) {
7156 $saw_VERSION_in_this_file = 1;
7157 $no_internal_newlines = 1;
7160 # Add interline blank if any
7161 my $last_old_nonblank_type = "b";
7162 my $first_new_nonblank_type = "b";
7163 my $first_new_nonblank_token = " ";
7164 if ( $max_index_to_go >= 0 ) {
7165 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
7166 $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
7167 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
7169 && $types_to_go[$max_index_to_go] ne 'b'
7171 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
7177 # Copy the tokens into local arrays
7178 $rinput_token_array = [];
7179 $rinput_K_array = [];
7180 $rinput_K_array = [ ( $K_first .. $K_last ) ];
7181 $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
7182 my $jmax = @{$rinput_K_array} - 1;
7184 $in_continued_quote = $starting_in_quote =
7185 $line_of_tokens->{_starting_in_quote};
7186 $in_quote = $line_of_tokens->{_ending_in_quote};
7187 $ending_in_quote = $in_quote;
7188 $guessed_indentation_level =
7189 $line_of_tokens->{_guessed_indentation_level};
7192 my $next_nonblank_token;
7193 my $next_nonblank_token_type;
7196 $container_type = "";
7197 $container_environment = "";
7198 $type_sequence = "";
7200 ######################################
7201 # Handle a block (full-line) comment..
7202 ######################################
7205 if ( $rOpts->{'tee-block-comments'} ) {
7206 $file_writer_object->tee_on();
7209 destroy_one_line_block();
7210 $self->output_line_to_go();
7212 # output a blank line before block comments
7214 # unless we follow a blank or comment line
7215 $last_line_leading_type !~ /^[#b]$/
7218 && $rOpts->{'blanks-before-comments'}
7220 # if this is NOT an empty comment line
7221 && $rinput_token_array->[0]->[_TOKEN_] ne '#'
7223 # not after a short line ending in an opening token
7224 # because we already have space above this comment.
7225 # Note that the first comment in this if block, after
7226 # the 'if (', does not get a blank line because of this.
7227 && !$last_output_short_opening_token
7229 # never before static block comments
7230 && !$is_static_block_comment
7233 $self->flush(); # switching to new output stream
7234 $file_writer_object->write_blank_code_line();
7235 $last_line_leading_type = 'b';
7238 # TRIM COMMENTS -- This could be turned off as a option
7239 $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
7242 $rOpts->{'indent-block-comments'}
7243 && ( !$rOpts->{'indent-spaced-block-comments'}
7244 || $input_line =~ /^\s+/ )
7245 && !$is_static_block_comment_without_leading_space
7248 $self->extract_token(0);
7249 $self->store_token_to_go();
7250 $self->output_line_to_go();
7253 $self->flush(); # switching to new output stream
7254 $file_writer_object->write_code_line(
7255 $rinput_token_array->[0]->[_TOKEN_] . "\n" );
7256 $last_line_leading_type = '#';
7258 if ( $rOpts->{'tee-block-comments'} ) {
7259 $file_writer_object->tee_off();
7264 # compare input/output indentation except for continuation lines
7265 # (because they have an unknown amount of initial blank space)
7266 # and lines which are quotes (because they may have been outdented)
7267 my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
7268 compare_indentation_levels( $guessed_indentation_level,
7269 $structural_indentation_level )
7270 unless ( $is_hanging_side_comment
7271 || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
7272 || $guessed_indentation_level == 0
7273 && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
7275 ##########################
7276 # Handle indentation-only
7277 ##########################
7279 # NOTE: In previous versions we sent all qw lines out immediately here.
7280 # No longer doing this: also write a line which is entirely a 'qw' list
7281 # to allow stacking of opening and closing tokens. Note that interior
7282 # qw lines will still go out at the end of this routine.
7283 if ( $CODE_type eq 'IO' ) {
7285 my $line = $input_line;
7287 # delete side comments if requested with -io, but
7288 # we will not allow deleting of closing side comments with -io
7289 # because the coding would be more complex
7290 if ( $rOpts->{'delete-side-comments'}
7291 && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
7295 foreach my $jj ( 0 .. $jmax - 1 ) {
7296 $line .= $rinput_token_array->[$jj]->[_TOKEN_];
7300 # Fix for rt #125506 Unexpected string formating
7301 # in which leading space of a terminal quote was removed
7303 $line =~ s/^\s+// unless ($in_continued_quote);
7305 $self->extract_token(0);
7309 $container_type = "";
7310 $container_environment = "";
7311 $type_sequence = "";
7312 $self->store_token_to_go();
7313 $self->output_line_to_go();
7317 ############################
7318 # Handle all other lines ...
7319 ############################
7321 #######################################################
7322 # FIXME: this should become unnecessary
7323 # making $j+2 valid simplifies coding
7325 copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
7326 push @{$rinput_token_array}, $rnew_blank;
7327 push @{$rinput_token_array}, $rnew_blank;
7328 #######################################################
7330 # If we just saw the end of an elsif block, write nag message
7331 # if we do not see another elseif or an else.
7332 if ($looking_for_else) {
7334 unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
7335 write_logfile_entry("(No else block)\n");
7337 $looking_for_else = 0;
7340 # This is a good place to kill incomplete one-line blocks
7343 ( $semicolons_before_block_self_destruct == 0 )
7344 && ( $max_index_to_go >= 0 )
7345 && ( $last_old_nonblank_type eq ';' )
7346 && ( $first_new_nonblank_token ne '}' )
7349 # Patch for RT #98902. Honor request to break at old commas.
7350 || ( $rOpts_break_at_old_comma_breakpoints
7351 && $max_index_to_go >= 0
7352 && $last_old_nonblank_type eq ',' )
7355 $forced_breakpoint_to_go[$max_index_to_go] = 1
7356 if ($rOpts_break_at_old_comma_breakpoints);
7357 destroy_one_line_block();
7358 $self->output_line_to_go();
7361 # loop to process the tokens one-by-one
7365 # We do not want a leading blank if the previous batch just got output
7367 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
7371 foreach my $j ( $jmin .. $jmax ) {
7373 # pull out the local values for this token
7374 $self->extract_token($j);
7376 if ( $type eq '#' ) {
7379 $rOpts->{'delete-side-comments'}
7381 # delete closing side comments if necessary
7382 || ( $rOpts->{'delete-closing-side-comments'}
7383 && $token =~ /$closing_side_comment_prefix_pattern/o
7384 && $last_nonblank_block_type =~
7385 /$closing_side_comment_list_pattern/o )
7388 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
7389 unstore_token_to_go();
7395 # If we are continuing after seeing a right curly brace, flush
7396 # buffer unless we see what we are looking for, as in
7398 if ( $rbrace_follower && $type ne 'b' ) {
7400 unless ( $rbrace_follower->{$token} ) {
7401 $self->output_line_to_go();
7403 $rbrace_follower = undef;
7407 ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
7410 $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
7411 $next_nonblank_token_type =
7412 $rinput_token_array->[$j_next]->[_TYPE_];
7414 # Do not allow breaks which would promote a side comment to a
7415 # block comment. In order to allow a break before an opening
7416 # or closing BLOCK, followed by a side comment, those sections
7417 # of code will handle this flag separately.
7418 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
7419 my $is_opening_BLOCK =
7423 && !$rshort_nested->{$type_sequence}
7424 && $block_type ne 't' );
7425 my $is_closing_BLOCK =
7429 && !$rshort_nested->{$type_sequence}
7430 && $block_type ne 't' );
7432 if ( $side_comment_follows
7433 && !$is_opening_BLOCK
7434 && !$is_closing_BLOCK )
7436 $no_internal_newlines = 1;
7439 # We're only going to handle breaking for code BLOCKS at this
7440 # (top) level. Other indentation breaks will be handled by
7441 # sub scan_list, which is better suited to dealing with them.
7442 if ($is_opening_BLOCK) {
7444 # Tentatively output this token. This is required before
7445 # calling starting_one_line_block. We may have to unstore
7446 # it, though, if we have to break before it.
7447 $self->store_token_to_go($side_comment_follows);
7449 # Look ahead to see if we might form a one-line block..
7451 $self->starting_one_line_block( $j, $jmax, $level, $slevel,
7452 $ci_level, $rinput_token_array );
7453 clear_breakpoint_undo_stack();
7455 # to simplify the logic below, set a flag to indicate if
7456 # this opening brace is far from the keyword which introduces it
7457 my $keyword_on_same_line = 1;
7458 if ( ( $max_index_to_go >= 0 )
7459 && ( $last_nonblank_type eq ')' )
7460 && ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) )
7462 $keyword_on_same_line = 0;
7465 # decide if user requested break before '{'
7468 # use -bl flag if not a sub block of any type
7469 $block_type !~ /^sub\b/
7470 ? $rOpts->{'opening-brace-on-new-line'}
7472 # use -sbl flag for a named sub block
7473 : $block_type !~ /$ASUB_PATTERN/
7474 ? $rOpts->{'opening-sub-brace-on-new-line'}
7476 # use -asbl flag for an anonymous sub block
7477 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
7479 # Do not break if this token is welded to the left
7480 if ( weld_len_left( $type_sequence, $token ) ) {
7484 # Break before an opening '{' ...
7490 # and we were unable to start looking for a block,
7491 && $index_start_one_line_block == UNDEFINED_INDEX
7493 # or if it will not be on same line as its keyword, so that
7494 # it will be outdented (eval.t, overload.t), and the user
7495 # has not insisted on keeping it on the right
7496 || ( !$keyword_on_same_line
7497 && !$rOpts->{'opening-brace-always-on-right'} )
7502 # but only if allowed
7503 unless ($no_internal_newlines) {
7505 # since we already stored this token, we must unstore it
7506 $self->unstore_token_to_go();
7508 # then output the line
7509 $self->output_line_to_go();
7511 # and now store this token at the start of a new line
7512 $self->store_token_to_go($side_comment_follows);
7516 # Now update for side comment
7517 if ($side_comment_follows) { $no_internal_newlines = 1 }
7519 # now output this line
7520 unless ($no_internal_newlines) {
7521 $self->output_line_to_go();
7525 elsif ($is_closing_BLOCK) {
7527 # If there is a pending one-line block ..
7528 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
7530 # we have to terminate it if..
7533 # it is too long (final length may be different from
7534 # initial estimate). note: must allow 1 space for this
7536 excess_line_length( $index_start_one_line_block,
7537 $max_index_to_go ) >= 0
7539 # or if it has too many semicolons
7540 || ( $semicolons_before_block_self_destruct == 0
7541 && $last_nonblank_type ne ';' )
7544 destroy_one_line_block();
7548 # put a break before this closing curly brace if appropriate
7549 unless ( $no_internal_newlines
7550 || $index_start_one_line_block != UNDEFINED_INDEX )
7553 # write out everything before this closing curly brace
7554 $self->output_line_to_go();
7557 # Now update for side comment
7558 if ($side_comment_follows) { $no_internal_newlines = 1 }
7560 # store the closing curly brace
7561 $self->store_token_to_go();
7563 # ok, we just stored a closing curly brace. Often, but
7564 # not always, we want to end the line immediately.
7565 # So now we have to check for special cases.
7567 # if this '}' successfully ends a one-line block..
7568 my $is_one_line_block = 0;
7570 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
7572 # Remember the type of token just before the
7573 # opening brace. It would be more general to use
7574 # a stack, but this will work for one-line blocks.
7575 $is_one_line_block =
7576 $types_to_go[$index_start_one_line_block];
7578 # we have to actually make it by removing tentative
7579 # breaks that were set within it
7580 undo_forced_breakpoint_stack(0);
7581 set_nobreaks( $index_start_one_line_block,
7582 $max_index_to_go - 1 );
7584 # then re-initialize for the next one-line block
7585 destroy_one_line_block();
7587 # then decide if we want to break after the '}' ..
7588 # We will keep going to allow certain brace followers as in:
7589 # do { $ifclosed = 1; last } unless $losing;
7591 # But make a line break if the curly ends a
7592 # significant block:
7595 $is_block_without_semicolon{$block_type}
7597 # Follow users break point for
7598 # one line block types U & G, such as a 'try' block
7599 || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
7602 # if needless semicolon follows we handle it later
7603 && $next_nonblank_token ne ';'
7606 $self->output_line_to_go()
7607 unless ($no_internal_newlines);
7611 # set string indicating what we need to look for brace follower
7613 if ( $block_type eq 'do' ) {
7614 $rbrace_follower = \%is_do_follower;
7616 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
7617 $rbrace_follower = \%is_if_brace_follower;
7619 elsif ( $block_type eq 'else' ) {
7620 $rbrace_follower = \%is_else_brace_follower;
7623 # added eval for borris.t
7624 elsif ($is_sort_map_grep_eval{$block_type}
7625 || $is_one_line_block eq 'G' )
7627 $rbrace_follower = undef;
7632 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
7634 if ($is_one_line_block) {
7635 $rbrace_follower = \%is_anon_sub_1_brace_follower;
7638 $rbrace_follower = \%is_anon_sub_brace_follower;
7642 # None of the above: specify what can follow a closing
7643 # brace of a block which is not an
7644 # if/elsif/else/do/sort/map/grep/eval
7646 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
7648 $rbrace_follower = \%is_other_brace_follower;
7651 # See if an elsif block is followed by another elsif or else;
7653 if ( $block_type eq 'elsif' ) {
7655 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
7656 $looking_for_else = 1; # ok, check on next line
7660 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
7661 write_logfile_entry("No else block :(\n");
7666 # keep going after certain block types (map,sort,grep,eval)
7667 # added eval for borris.t
7673 # if no more tokens, postpone decision until re-entring
7674 elsif ( ( $next_nonblank_token_type eq 'b' )
7675 && $rOpts_add_newlines )
7677 unless ($rbrace_follower) {
7678 $self->output_line_to_go()
7679 unless ($no_internal_newlines);
7683 elsif ($rbrace_follower) {
7685 unless ( $rbrace_follower->{$next_nonblank_token} ) {
7686 $self->output_line_to_go()
7687 unless ($no_internal_newlines);
7689 $rbrace_follower = undef;
7693 $self->output_line_to_go() unless ($no_internal_newlines);
7696 } # end treatment of closing block token
7699 elsif ( $type eq ';' ) {
7701 # kill one-line blocks with too many semicolons
7702 $semicolons_before_block_self_destruct--;
7704 ( $semicolons_before_block_self_destruct < 0 )
7705 || ( $semicolons_before_block_self_destruct == 0
7706 && $next_nonblank_token_type !~ /^[b\}]$/ )
7709 destroy_one_line_block();
7712 $self->store_token_to_go();
7714 $self->output_line_to_go()
7715 unless ( $no_internal_newlines
7716 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
7717 || ( $next_nonblank_token eq '}' ) );
7721 # handle here_doc target string
7722 elsif ( $type eq 'h' ) {
7724 # no newlines after seeing here-target
7725 $no_internal_newlines = 1;
7726 destroy_one_line_block();
7727 $self->store_token_to_go();
7730 # handle all other token types
7733 $self->store_token_to_go();
7736 # remember two previous nonblank OUTPUT tokens
7737 if ( $type ne '#' && $type ne 'b' ) {
7738 $last_last_nonblank_token = $last_nonblank_token;
7739 $last_last_nonblank_type = $last_nonblank_type;
7740 $last_nonblank_token = $token;
7741 $last_nonblank_type = $type;
7742 $last_nonblank_block_type = $block_type;
7745 # unset the continued-quote flag since it only applies to the
7746 # first token, and we want to resume normal formatting if
7747 # there are additional tokens on the line
7748 $in_continued_quote = 0;
7750 } # end of loop over all tokens in this 'line_of_tokens'
7752 # we have to flush ..
7755 # if there is a side comment
7756 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
7758 # if this line ends in a quote
7759 # NOTE: This is critically important for insuring that quoted lines
7760 # do not get processed by things like -sot and -sct
7763 # if this is a VERSION statement
7764 || $is_VERSION_statement
7766 # to keep a label at the end of a line
7769 # if we are instructed to keep all old line breaks
7770 || !$rOpts->{'delete-old-newlines'}
7773 destroy_one_line_block();
7774 $self->output_line_to_go();
7777 # mark old line breakpoints in current output stream
7778 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
7779 my $jobp = $max_index_to_go;
7780 if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
7784 $old_breakpoint_to_go[$jobp] = 1;
7787 } ## end sub print_line_of_tokens
7788 } ## end block print_line_of_tokens
7790 sub consecutive_nonblank_lines {
7791 return $file_writer_object->get_consecutive_nonblank_lines() +
7792 $vertical_aligner_object->get_cached_line_count();
7795 # sub output_line_to_go sends one logical line of tokens on down the
7796 # pipeline to the VerticalAligner package, breaking the line into continuation
7797 # lines as necessary. The line of tokens is ready to go in the "to_go"
7799 sub output_line_to_go {
7802 my $rLL = $self->{rLL};
7804 # debug stuff; this routine can be called from many points
7805 FORMATTER_DEBUG_FLAG_OUTPUT && do {
7806 my ( $a, $b, $c ) = caller;
7808 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
7810 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
7811 write_diagnostics("$output_str\n");
7814 # Do not end line in a weld
7815 return if ( weld_len_right_to_go($max_index_to_go) );
7817 # just set a tentative breakpoint if we might be in a one-line block
7818 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
7819 set_forced_breakpoint($max_index_to_go);
7823 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
7825 # tell the -lp option we are outputting a batch so it can close
7826 # any unfinished items in its stack
7829 # If this line ends in a code block brace, set breaks at any
7830 # previous closing code block braces to breakup a chain of code
7831 # blocks on one line. This is very rare but can happen for
7832 # user-defined subs. For example we might be looking at this:
7833 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
7834 my $saw_good_break = 0; # flag to force breaks even if short line
7837 # looking for opening or closing block brace
7838 $block_type_to_go[$max_index_to_go]
7840 # but not one of these which are never duplicated on a line:
7841 # until|while|for|if|elsif|else
7842 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
7845 my $lev = $nesting_depth_to_go[$max_index_to_go];
7847 # Walk backwards from the end and
7848 # set break at any closing block braces at the same level.
7849 # But quit if we are not in a chain of blocks.
7850 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
7851 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
7852 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
7854 if ( $block_type_to_go[$i] ) {
7855 if ( $tokens_to_go[$i] eq '}' ) {
7856 set_forced_breakpoint($i);
7857 $saw_good_break = 1;
7861 # quit if we see anything besides words, function, blanks
7863 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
7868 my $imax = $max_index_to_go;
7870 # trim any blank tokens
7871 if ( $max_index_to_go >= 0 ) {
7872 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
7873 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
7876 # anything left to write?
7877 if ( $imin <= $imax ) {
7879 # add a blank line before certain key types but not after a comment
7880 if ( $last_line_leading_type !~ /^[#]/ ) {
7882 my $leading_token = $tokens_to_go[$imin];
7883 my $leading_type = $types_to_go[$imin];
7885 # blank lines before subs except declarations and one-liners
7886 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
7887 $want_blank = $rOpts->{'blank-lines-before-subs'}
7888 if ( $self->terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
7891 # break before all package declarations
7892 elsif ($leading_token =~ /^(package\s)/
7893 && $leading_type eq 'i' )
7895 $want_blank = $rOpts->{'blank-lines-before-packages'};
7898 # break before certain key blocks except one-liners
7899 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
7900 $want_blank = $rOpts->{'blank-lines-before-subs'}
7901 if ( $self->terminal_type_i( $imin, $imax ) ne '}' );
7904 # Break before certain block types if we haven't had a
7905 # break at this level for a while. This is the
7906 # difficult decision..
7907 elsif ($leading_type eq 'k'
7908 && $last_line_leading_type ne 'b'
7909 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
7911 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
7912 if ( !defined($lc) ) { $lc = 0 }
7914 # patch for RT #128216: no blank line inserted at a level change
7915 if ( $levels_to_go[$imin] != $last_line_leading_level ) {
7920 $rOpts->{'blanks-before-blocks'}
7921 && $lc >= $rOpts->{'long-block-line-count'}
7922 && consecutive_nonblank_lines() >=
7923 $rOpts->{'long-block-line-count'}
7924 && $self->terminal_type_i( $imin, $imax ) ne '}';
7927 # Check for blank lines wanted before a closing brace
7928 if ( $leading_token eq '}' ) {
7929 if ( $rOpts->{'blank-lines-before-closing-block'}
7930 && $block_type_to_go[$imin]
7931 && $block_type_to_go[$imin] =~
7932 /$blank_lines_before_closing_block_pattern/ )
7934 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
7935 if ( $nblanks > $want_blank ) {
7936 $want_blank = $nblanks;
7943 # future: send blank line down normal path to VerticalAligner
7944 Perl::Tidy::VerticalAligner::flush();
7945 $file_writer_object->require_blank_code_lines($want_blank);
7949 # update blank line variables and count number of consecutive
7950 # non-blank, non-comment lines at this level
7951 $last_last_line_leading_level = $last_line_leading_level;
7952 $last_line_leading_level = $levels_to_go[$imin];
7953 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
7954 $last_line_leading_type = $types_to_go[$imin];
7955 if ( $last_line_leading_level == $last_last_line_leading_level
7956 && $last_line_leading_type ne 'b'
7957 && $last_line_leading_type ne '#'
7958 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
7960 $nonblank_lines_at_depth[$last_line_leading_level]++;
7963 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
7966 FORMATTER_DEBUG_FLAG_FLUSH && do {
7967 my ( $package, $file, $line ) = caller;
7969 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
7972 # add a couple of extra terminal blank tokens
7975 # set all forced breakpoints for good list formatting
7976 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
7978 my $old_line_count_in_batch =
7979 $self->get_old_line_count( $K_to_go[0], $K_to_go[$max_index_to_go] );
7983 || $old_line_count_in_batch > 1
7985 # must always call scan_list() with unbalanced batches because it
7986 # is maintaining some stacks
7987 || is_unbalanced_batch()
7989 # call scan_list if we might want to break at commas
7991 $comma_count_in_batch
7992 && ( $rOpts_maximum_fields_per_table > 0
7993 || $rOpts_comma_arrow_breakpoints == 0 )
7996 # call scan_list if user may want to break open some one-line
7998 || ( $comma_arrow_count_contained
7999 && $rOpts_comma_arrow_breakpoints != 3 )
8002 ## This caused problems in one version of perl for unknown reasons:
8003 ## $saw_good_break ||= scan_list();
8004 my $sgb = scan_list();
8005 $saw_good_break ||= $sgb;
8008 # let $ri_first and $ri_last be references to lists of
8009 # first and last tokens of line fragments to output..
8010 my ( $ri_first, $ri_last );
8012 # write a single line if..
8015 # we aren't allowed to add any newlines
8016 !$rOpts_add_newlines
8018 # or, we don't already have an interior breakpoint
8019 # and we didn't see a good breakpoint
8021 !$forced_breakpoint_count
8024 # and this line is 'short'
8029 @{$ri_first} = ($imin);
8030 @{$ri_last} = ($imax);
8033 # otherwise use multiple lines
8036 ( $ri_first, $ri_last, my $colon_count ) =
8037 $self->set_continuation_breaks($saw_good_break);
8039 $self->break_all_chain_tokens( $ri_first, $ri_last );
8041 break_equals( $ri_first, $ri_last );
8043 # now we do a correction step to clean this up a bit
8044 # (The only time we would not do this is for debugging)
8045 if ( $rOpts->{'recombine'} ) {
8046 ( $ri_first, $ri_last ) =
8047 recombine_breakpoints( $ri_first, $ri_last );
8050 $self->insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
8053 # do corrector step if -lp option is used
8055 if ($rOpts_line_up_parentheses) {
8056 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
8058 $self->unmask_phantom_semicolons( $ri_first, $ri_last );
8059 if ( $rOpts_one_line_block_semicolons == 0 ) {
8060 $self->delete_one_line_semicolons( $ri_first, $ri_last );
8063 # The line breaks for this batch of code have been finalized. Now we
8064 # can to package the results for further processing. We will switch
8065 # from the local '_to_go' buffer arrays (i-index) back to the global
8066 # token arrays (K-index) at this point.
8069 for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
8070 my $ibeg = $ri_first->[$n];
8071 my $Kbeg = $K_to_go[$ibeg];
8072 my $iend = $ri_last->[$n];
8073 my $Kend = $K_to_go[$iend];
8074 if ( $iend - $ibeg != $Kend - $Kbeg ) {
8075 $index_error = $n unless defined($index_error);
8078 [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
8081 # Check correctness of the mapping between the i and K token indexes
8082 if ( defined($index_error) ) {
8084 # Temporary debug code - should never get here
8085 for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
8086 my $ibeg = $ri_first->[$n];
8087 my $Kbeg = $K_to_go[$ibeg];
8088 my $iend = $ri_last->[$n];
8089 my $Kend = $K_to_go[$iend];
8090 my $idiff = $iend - $ibeg;
8091 my $Kdiff = $Kend - $Kbeg;
8093 line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
8096 Fault("Index error at line $index_error; i and K ranges differ");
8100 rlines_K => $rlines_K,
8101 do_not_pad => $do_not_pad,
8102 ibeg0 => $ri_first->[0],
8105 $self->send_lines_to_vertical_aligner($rbatch_hash);
8107 # Insert any requested blank lines after an opening brace. We have to
8108 # skip back before any side comment to find the terminal token
8110 for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
8111 next if $types_to_go[$iterm] eq '#';
8112 next if $types_to_go[$iterm] eq 'b';
8116 # write requested number of blank lines after an opening block brace
8117 if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
8118 if ( $rOpts->{'blank-lines-after-opening-block'}
8119 && $block_type_to_go[$iterm]
8120 && $block_type_to_go[$iterm] =~
8121 /$blank_lines_after_opening_block_pattern/ )
8123 my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
8124 Perl::Tidy::VerticalAligner::flush();
8125 $file_writer_object->require_blank_code_lines($nblanks);
8130 prepare_for_new_input_lines();
8135 sub note_added_semicolon {
8136 my ($line_number) = @_;
8137 $last_added_semicolon_at = $line_number;
8138 if ( $added_semicolon_count == 0 ) {
8139 $first_added_semicolon_at = $last_added_semicolon_at;
8141 $added_semicolon_count++;
8142 write_logfile_entry("Added ';' here\n");
8146 sub note_deleted_semicolon {
8147 $last_deleted_semicolon_at = $input_line_number;
8148 if ( $deleted_semicolon_count == 0 ) {
8149 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
8151 $deleted_semicolon_count++;
8152 write_logfile_entry("Deleted unnecessary ';' at line $input_line_number\n");
8156 sub note_embedded_tab {
8157 $embedded_tab_count++;
8158 $last_embedded_tab_at = $input_line_number;
8159 if ( !$first_embedded_tab_at ) {
8160 $first_embedded_tab_at = $last_embedded_tab_at;
8163 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
8164 write_logfile_entry("Embedded tabs in quote or pattern\n");
8169 sub starting_one_line_block {
8171 # after seeing an opening curly brace, look for the closing brace
8172 # and see if the entire block will fit on a line. This routine is
8173 # not always right because it uses the old whitespace, so a check
8174 # is made later (at the closing brace) to make sure we really
8175 # have a one-line block. We have to do this preliminary check,
8176 # though, because otherwise we would always break at a semicolon
8177 # within a one-line block if the block contains multiple statements.
8179 my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
8180 my $rbreak_container = $self->{rbreak_container};
8181 my $rshort_nested = $self->{rshort_nested};
8183 my $jmax_check = @{$rtoken_array};
8184 if ( $jmax_check < $jmax ) {
8185 Fault("jmax=$jmax > $jmax_check");
8188 # kill any current block - we can only go 1 deep
8189 destroy_one_line_block();
8192 # 1=distance from start of block to opening brace exceeds line length
8197 # shouldn't happen: there must have been a prior call to
8198 # store_token_to_go to put the opening brace in the output stream
8199 if ( $max_index_to_go < 0 ) {
8200 Fault("program bug: store_token_to_go called incorrectly\n");
8203 # return if block should be broken
8204 my $type_sequence = $rtoken_array->[$j]->[_TYPE_SEQUENCE_];
8205 if ( $rbreak_container->{$type_sequence} ) {
8209 my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
8211 # find the starting keyword for this block (such as 'if', 'else', ...)
8213 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
8214 $i_start = $max_index_to_go;
8217 # the previous nonblank token should start these block types
8218 elsif (( $last_last_nonblank_token_to_go eq $block_type )
8219 || ( $block_type =~ /^sub\b/ )
8220 || $block_type =~ /\(\)/ )
8222 $i_start = $last_last_nonblank_index_to_go;
8224 # For signatures and extended syntax ...
8225 # If this brace follows a parenthesized list, we should look back to
8226 # find the keyword before the opening paren because otherwise we might
8227 # form a one line block which stays intack, and cause the parenthesized
8228 # expression to break open. That looks bad. However, actually
8229 # searching for the opening paren is slow and tedius.
8230 # The actual keyword is often at the start of a line, but might not be.
8231 # For example, we might have an anonymous sub with signature list
8232 # following a =>. It is safe to mark the start anywhere before the
8233 # opening paren, so we just go back to the prevoious break (or start of
8234 # the line) if that is before the opening paren. The minor downside is
8235 # that we may very occasionally break open a block unnecessarily.
8236 if ( $tokens_to_go[$i_start] eq ')' ) {
8237 $i_start = $index_max_forced_break + 1;
8238 if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
8239 my $lev = $levels_to_go[$i_start];
8240 if ( $lev > $level ) { return 0 }
8244 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
8246 # For something like "if (xxx) {", the keyword "if" will be
8247 # just after the most recent break. This will be 0 unless
8248 # we have just killed a one-line block and are starting another.
8250 # Note: cannot use inext_index_to_go[] here because that array
8251 # is still being constructed.
8252 $i_start = $index_max_forced_break + 1;
8253 if ( $types_to_go[$i_start] eq 'b' ) {
8257 # Patch to avoid breaking short blocks defined with extended_syntax:
8258 # Strip off any trailing () which was added in the parser to mark
8259 # the opening keyword. For example, in the following
8260 # create( TypeFoo $e) {$bubba}
8261 # the blocktype would be marked as create()
8262 my $stripped_block_type = $block_type;
8263 $stripped_block_type =~ s/\(\)$//;
8265 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
8270 # patch for SWITCH/CASE to retain one-line case/when blocks
8271 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
8273 # Note: cannot use inext_index_to_go[] here because that array
8274 # is still being constructed.
8275 $i_start = $index_max_forced_break + 1;
8276 if ( $types_to_go[$i_start] eq 'b' ) {
8279 unless ( $tokens_to_go[$i_start] eq $block_type ) {
8288 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
8290 # see if length is too long to even start
8291 if ( $pos > maximum_line_length($i_start) ) {
8295 foreach my $i ( $j + 1 .. $jmax ) {
8297 # old whitespace could be arbitrarily large, so don't use it
8298 if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
8299 else { $pos += rtoken_length($i) }
8301 # ignore some small blocks
8302 my $type_sequence = $rtoken_array->[$i]->[_TYPE_SEQUENCE_];
8303 my $nobreak = $rshort_nested->{$type_sequence};
8305 # Return false result if we exceed the maximum line length,
8306 if ( $pos > maximum_line_length($i_start) ) {
8310 # keep going for non-containers
8311 elsif ( !$type_sequence ) {
8315 # return if we encounter another opening brace before finding the
8317 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
8318 && $rtoken_array->[$i]->[_TYPE_] eq '{'
8319 && $rtoken_array->[$i]->[_BLOCK_TYPE_]
8325 # if we find our closing brace..
8326 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
8327 && $rtoken_array->[$i]->[_TYPE_] eq '}'
8328 && $rtoken_array->[$i]->[_BLOCK_TYPE_]
8332 # be sure any trailing comment also fits on the line
8334 ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
8336 # Patch for one-line sort/map/grep/eval blocks with side comments:
8337 # We will ignore the side comment length for sort/map/grep/eval
8338 # because this can lead to statements which change every time
8339 # perltidy is run. Here is an example from Denis Moskowitz which
8340 # oscillates between these two states without this patch:
8343 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
8348 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
8352 # When the first line is input it gets broken apart by the main
8353 # line break logic in sub print_line_of_tokens.
8354 # When the second line is input it gets recombined by
8355 # print_line_of_tokens and passed to the output routines. The
8356 # output routines (set_continuation_breaks) do not break it apart
8357 # because the bond strengths are set to the highest possible value
8358 # for grep/map/eval/sort blocks, so the first version gets output.
8359 # It would be possible to fix this by changing bond strengths,
8360 # but they are high to prevent errors in older versions of perl.
8362 if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
8363 && !$is_sort_map_grep{$block_type} )
8366 $pos += rtoken_length($i_nonblank);
8368 if ( $i_nonblank > $i + 1 ) {
8370 # source whitespace could be anything, assume
8371 # at least one space before the hash on output
8372 if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
8375 else { $pos += rtoken_length( $i + 1 ) }
8378 if ( $pos >= maximum_line_length($i_start) ) {
8383 # ok, it's a one-line block
8384 create_one_line_block( $i_start, 20 );
8388 # just keep going for other characters
8393 # Allow certain types of new one-line blocks to form by joining
8394 # input lines. These can be safely done, but for other block types,
8395 # we keep old one-line blocks but do not form new ones. It is not
8396 # always a good idea to make as many one-line blocks as possible,
8397 # so other types are not done. The user can always use -mangle.
8398 if ( $want_one_line_block{$block_type} ) {
8399 create_one_line_block( $i_start, 1 );
8404 sub unstore_token_to_go {
8406 # remove most recent token from output stream
8408 if ( $max_index_to_go > 0 ) {
8412 $max_index_to_go = UNDEFINED_INDEX;
8417 sub want_blank_line {
8420 $file_writer_object->want_blank_line();
8424 sub write_unindented_line {
8425 my ( $self, $line ) = @_;
8427 $file_writer_object->write_line($line);
8433 # Undo continuation indentation in certain sequences
8434 # For example, we can undo continuation indentation in sort/map/grep chains
8435 # my $dat1 = pack( "n*",
8436 # map { $_, $lookup->{$_} }
8437 # sort { $a <=> $b }
8438 # grep { $lookup->{$_} ne $default } keys %$lookup );
8439 # To align the map/sort/grep keywords like this:
8440 # my $dat1 = pack( "n*",
8441 # map { $_, $lookup->{$_} }
8442 # sort { $a <=> $b }
8443 # grep { $lookup->{$_} ne $default } keys %$lookup );
8444 my ( $self, $ri_first, $ri_last ) = @_;
8445 my ( $line_1, $line_2, $lev_last );
8446 my $this_line_is_semicolon_terminated;
8447 my $max_line = @{$ri_first} - 1;
8449 # looking at each line of this batch..
8450 # We are looking at leading tokens and looking for a sequence
8451 # all at the same level and higher level than enclosing lines.
8452 foreach my $line ( 0 .. $max_line ) {
8454 my $ibeg = $ri_first->[$line];
8455 my $lev = $levels_to_go[$ibeg];
8458 # if we have started a chain..
8461 # see if it continues..
8462 if ( $lev == $lev_last ) {
8463 if ( $types_to_go[$ibeg] eq 'k'
8464 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
8467 # chain continues...
8468 # check for chain ending at end of a statement
8469 if ( $line == $max_line ) {
8471 # see of this line ends a statement
8472 my $iend = $ri_last->[$line];
8473 $this_line_is_semicolon_terminated =
8474 $types_to_go[$iend] eq ';'
8476 # with possible side comment
8477 || ( $types_to_go[$iend] eq '#'
8478 && $iend - $ibeg >= 2
8479 && $types_to_go[ $iend - 2 ] eq ';'
8480 && $types_to_go[ $iend - 1 ] eq 'b' );
8482 $line_2 = $line if ($this_line_is_semicolon_terminated);
8490 elsif ( $lev < $lev_last ) {
8492 # chain ends with previous line
8493 $line_2 = $line - 1;
8495 elsif ( $lev > $lev_last ) {
8501 # undo the continuation indentation if a chain ends
8502 if ( defined($line_2) && defined($line_1) ) {
8503 my $continuation_line_count = $line_2 - $line_1 + 1;
8504 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
8505 (0) x ($continuation_line_count)
8506 if ( $continuation_line_count >= 0 );
8507 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
8508 = @reduced_spaces_to_go[ @{$ri_first}
8509 [ $line_1 .. $line_2 ] ];
8514 # not in a chain yet..
8517 # look for start of a new sort/map/grep chain
8518 if ( $lev > $lev_last ) {
8519 if ( $types_to_go[$ibeg] eq 'k'
8520 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
8534 # If there is a single, long parameter within parens, like this:
8536 # $self->command( "/msg "
8538 # . " You said $1, but did you know that it's square was "
8539 # . $1 * $1 . " ?" );
8541 # we can remove the continuation indentation of the 2nd and higher lines
8542 # to achieve this effect, which is more pleasing:
8544 # $self->command("/msg "
8546 # . " You said $1, but did you know that it's square was "
8547 # . $1 * $1 . " ?");
8549 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
8550 my $max_line = @{$ri_first} - 1;
8552 # must be multiple lines
8553 return unless $max_line > $line_open;
8555 my $lev_start = $levels_to_go[$i_start];
8556 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
8558 # see if all additional lines in this container have continuation
8561 my $line_1 = 1 + $line_open;
8562 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
8563 my $ibeg = $ri_first->[$n];
8564 my $iend = $ri_last->[$n];
8565 if ( $ibeg eq $closing_index ) { $n--; last }
8566 return if ( $lev_start != $levels_to_go[$ibeg] );
8567 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
8568 last if ( $closing_index <= $iend );
8571 # we can reduce the indentation of all continuation lines
8572 my $continuation_line_count = $n - $line_open;
8573 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
8574 (0) x ($continuation_line_count);
8575 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
8576 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
8582 # insert $pad_spaces before token number $ipad
8583 my ( $self, $ipad, $pad_spaces ) = @_;
8584 my $rLL = $self->{rLL};
8585 if ( $pad_spaces > 0 ) {
8586 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
8588 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
8589 $tokens_to_go[$ipad] = "";
8597 # Keep token arrays in sync
8598 $self->sync_token_K($ipad);
8600 $token_lengths_to_go[$ipad] += $pad_spaces;
8601 foreach my $i ( $ipad .. $max_index_to_go ) {
8602 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
8612 my @q = qw( + - * / );
8613 @is_math_op{@q} = (1) x scalar(@q);
8616 sub set_logical_padding {
8618 # Look at a batch of lines and see if extra padding can improve the
8619 # alignment when there are certain leading operators. Here is an
8620 # example, in which some extra space is introduced before
8621 # '( $year' to make it line up with the subsequent lines:
8623 # if ( ( $Year < 1601 )
8624 # || ( $Year > 2899 )
8625 # || ( $EndYear < 1601 )
8626 # || ( $EndYear > 2899 ) )
8628 # &Error_OutOfRange;
8631 my ( $self, $ri_first, $ri_last ) = @_;
8632 my $max_line = @{$ri_first} - 1;
8634 # FIXME: move these declarations below
8635 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
8636 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
8638 # looking at each line of this batch..
8639 foreach my $line ( 0 .. $max_line - 1 ) {
8641 # see if the next line begins with a logical operator
8642 $ibeg = $ri_first->[$line];
8643 $iend = $ri_last->[$line];
8644 $ibeg_next = $ri_first->[ $line + 1 ];
8645 $tok_next = $tokens_to_go[$ibeg_next];
8646 $type_next = $types_to_go[$ibeg_next];
8648 $has_leading_op_next = ( $tok_next =~ /^\w/ )
8649 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
8650 : $is_chain_operator{$type_next}; # and, or
8652 next unless ($has_leading_op_next);
8654 # next line must not be at lesser depth
8656 if ( $nesting_depth_to_go[$ibeg] >
8657 $nesting_depth_to_go[$ibeg_next] );
8659 # identify the token in this line to be padded on the left
8662 # handle lines at same depth...
8663 if ( $nesting_depth_to_go[$ibeg] ==
8664 $nesting_depth_to_go[$ibeg_next] )
8667 # if this is not first line of the batch ...
8670 # and we have leading operator..
8671 next if $has_leading_op;
8673 # Introduce padding if..
8674 # 1. the previous line is at lesser depth, or
8675 # 2. the previous line ends in an assignment
8676 # 3. the previous line ends in a 'return'
8677 # 4. the previous line ends in a comma
8678 # Example 1: previous line at lesser depth
8679 # if ( ( $Year < 1601 ) # <- we are here but
8680 # || ( $Year > 2899 ) # list has not yet
8681 # || ( $EndYear < 1601 ) # collapsed vertically
8682 # || ( $EndYear > 2899 ) )
8685 # Example 2: previous line ending in assignment:
8687 # $year % 4 ? 0 # <- We are here
8692 # Example 3: previous line ending in comma:
8699 # be sure levels agree (do not indent after an indented 'if')
8701 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
8703 # allow padding on first line after a comma but only if:
8704 # (1) this is line 2 and
8705 # (2) there are at more than three lines and
8706 # (3) lines 3 and 4 have the same leading operator
8707 # These rules try to prevent padding within a long
8708 # comma-separated list.
8710 if ( $types_to_go[$iendm] eq ','
8714 my $ibeg_next_next = $ri_first->[ $line + 2 ];
8715 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
8716 $ok_comma = $tok_next_next eq $tok_next;
8721 $is_assignment{ $types_to_go[$iendm] }
8723 || ( $nesting_depth_to_go[$ibegm] <
8724 $nesting_depth_to_go[$ibeg] )
8725 || ( $types_to_go[$iendm] eq 'k'
8726 && $tokens_to_go[$iendm] eq 'return' )
8729 # we will add padding before the first token
8733 # for first line of the batch..
8736 # WARNING: Never indent if first line is starting in a
8737 # continued quote, which would change the quote.
8738 next if $starting_in_quote;
8740 # if this is text after closing '}'
8741 # then look for an interior token to pad
8742 if ( $types_to_go[$ibeg] eq '}' ) {
8746 # otherwise, we might pad if it looks really good
8749 # we might pad token $ibeg, so be sure that it
8750 # is at the same depth as the next line.
8752 if ( $nesting_depth_to_go[$ibeg] !=
8753 $nesting_depth_to_go[$ibeg_next] );
8755 # We can pad on line 1 of a statement if at least 3
8756 # lines will be aligned. Otherwise, it
8757 # can look very confusing.
8759 # We have to be careful not to pad if there are too few
8760 # lines. The current rule is:
8761 # (1) in general we require at least 3 consecutive lines
8762 # with the same leading chain operator token,
8763 # (2) but an exception is that we only require two lines
8764 # with leading colons if there are no more lines. For example,
8765 # the first $i in the following snippet would get padding
8766 # by the second rule:
8768 # $i == 1 ? ( "First", "Color" )
8769 # : $i == 2 ? ( "Then", "Rarity" )
8770 # : ( "Then", "Name" );
8772 if ( $max_line > 1 ) {
8773 my $leading_token = $tokens_to_go[$ibeg_next];
8776 # never indent line 1 of a '.' series because
8777 # previous line is most likely at same level.
8778 # TODO: we should also look at the leasing_spaces
8779 # of the last output line and skip if it is same
8781 next if ( $leading_token eq '.' );
8784 foreach my $l ( 2 .. 3 ) {
8785 last if ( $line + $l > $max_line );
8786 my $ibeg_next_next = $ri_first->[ $line + $l ];
8787 if ( $tokens_to_go[$ibeg_next_next] ne
8795 next if ($tokens_differ);
8796 next if ( $count < 3 && $leading_token ne ':' );
8806 # find interior token to pad if necessary
8807 if ( !defined($ipad) ) {
8809 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
8811 # find any unclosed container
8813 unless ( $type_sequence_to_go[$i]
8814 && $self->mate_index_to_go($i) > $iend );
8816 # find next nonblank token to pad
8817 $ipad = $inext_to_go[$i];
8818 last if ( $ipad > $iend );
8823 # We cannot pad the first leading token of a file because
8824 # it could cause a bug in which the starting indentation
8825 # level is guessed incorrectly each time the code is run
8826 # though perltidy, thus causing the code to march off to
8827 # the right. For example, the following snippet would have
8830 ## ov_method mycan( $package, '(""' ), $package
8831 ## or ov_method mycan( $package, '(0+' ), $package
8832 ## or ov_method mycan( $package, '(bool' ), $package
8833 ## or ov_method mycan( $package, '(nomethod' ), $package;
8835 # If this snippet is within a block this won't happen
8836 # unless the user just processes the snippet alone within
8837 # an editor. In that case either the user will see and
8838 # fix the problem or it will be corrected next time the
8839 # entire file is processed with perltidy.
8840 next if ( $ipad == 0 && $peak_batch_size <= 1 );
8842 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
8843 ## IT DID MORE HARM THAN GOOD
8845 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
8848 ##? # do not put leading padding for just 2 lines of math
8849 ##? if ( $ipad == $ibeg
8851 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
8852 ##? && $is_math_op{$type_next}
8853 ##? && $line + 2 <= $max_line )
8855 ##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
8856 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
8857 ##? next if !$is_math_op{$type_next_next};
8860 # next line must not be at greater depth
8861 my $iend_next = $ri_last->[ $line + 1 ];
8863 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
8864 $nesting_depth_to_go[$ipad] );
8866 # lines must be somewhat similar to be padded..
8867 my $inext_next = $inext_to_go[$ibeg_next];
8868 my $type = $types_to_go[$ipad];
8869 my $type_next = $types_to_go[ $ipad + 1 ];
8871 # see if there are multiple continuation lines
8872 my $logical_continuation_lines = 1;
8873 if ( $line + 2 <= $max_line ) {
8874 my $leading_token = $tokens_to_go[$ibeg_next];
8875 my $ibeg_next_next = $ri_first->[ $line + 2 ];
8876 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
8877 && $nesting_depth_to_go[$ibeg_next] eq
8878 $nesting_depth_to_go[$ibeg_next_next] )
8880 $logical_continuation_lines++;
8884 # see if leading types match
8885 my $types_match = $types_to_go[$inext_next] eq $type;
8886 my $matches_without_bang;
8888 # if first line has leading ! then compare the following token
8889 if ( !$types_match && $type eq '!' ) {
8890 $types_match = $matches_without_bang =
8891 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
8896 # either we have multiple continuation lines to follow
8897 # and we are not padding the first token
8898 ( $logical_continuation_lines > 1 && $ipad > 0 )
8906 # and keywords must match if keyword
8909 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
8915 #----------------------begin special checks--------------
8918 # A check is needed before we can make the pad.
8919 # If we are in a list with some long items, we want each
8920 # item to stand out. So in the following example, the
8921 # first line beginning with '$casefold->' would look good
8922 # padded to align with the next line, but then it
8923 # would be indented more than the last line, so we
8927 # $casefold->{code} eq '0041'
8928 # && $casefold->{status} eq 'C'
8929 # && $casefold->{mapping} eq '0061',
8934 # It would be faster, and almost as good, to use a comma
8935 # count, and not pad if comma_count > 1 and the previous
8936 # line did not end with a comma.
8940 my $ibg = $ri_first->[ $line + 1 ];
8941 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
8943 # just use simplified formula for leading spaces to avoid
8944 # needless sub calls
8945 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
8947 # look at each line beyond the next ..
8949 foreach my $ltest ( $line + 2 .. $max_line ) {
8951 my $ibg = $ri_first->[$l];
8953 # quit looking at the end of this container
8955 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
8956 || ( $nesting_depth_to_go[$ibg] < $depth );
8958 # cannot do the pad if a later line would be
8960 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
8966 # don't pad if we end in a broken list
8967 if ( $l == $max_line ) {
8968 my $i2 = $ri_last->[$l];
8969 if ( $types_to_go[$i2] eq '#' ) {
8970 my $i1 = $ri_first->[$l];
8971 next if $self->terminal_type_i( $i1, $i2 ) eq ',';
8976 # a minus may introduce a quoted variable, and we will
8977 # add the pad only if this line begins with a bare word,
8978 # such as for the word 'Button' here:
8980 # Button => "Print letter \"~$_\"",
8981 # -command => [ sub { print "$_[0]\n" }, $_ ],
8982 # -accelerator => "Meta+$_"
8985 # On the other hand, if 'Button' is quoted, it looks best
8988 # 'Button' => "Print letter \"~$_\"",
8989 # -command => [ sub { print "$_[0]\n" }, $_ ],
8990 # -accelerator => "Meta+$_"
8992 if ( $types_to_go[$ibeg_next] eq 'm' ) {
8993 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
8996 next unless $ok_to_pad;
8998 #----------------------end special check---------------
9000 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
9001 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
9002 $pad_spaces = $length_2 - $length_1;
9004 # If the first line has a leading ! and the second does
9005 # not, then remove one space to try to align the next
9006 # leading characters, which are often the same. For example:
9008 # || $ts == $self->Holder
9009 # || $self->Holder->Type eq "Arena" )
9011 # This usually helps readability, but if there are subsequent
9012 # ! operators things will still get messed up. For example:
9014 # if ( !exists $Net::DNS::typesbyname{$qtype}
9015 # && exists $Net::DNS::classesbyname{$qtype}
9016 # && !exists $Net::DNS::classesbyname{$qclass}
9017 # && exists $Net::DNS::typesbyname{$qclass} )
9018 # We can't fix that.
9019 if ($matches_without_bang) { $pad_spaces-- }
9021 # make sure this won't change if -lp is used
9022 my $indentation_1 = $leading_spaces_to_go[$ibeg];
9023 if ( ref($indentation_1) ) {
9024 if ( $indentation_1->get_recoverable_spaces() == 0 ) {
9025 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
9026 unless ( $indentation_2->get_recoverable_spaces() == 0 )
9033 # we might be able to handle a pad of -1 by removing a blank
9035 if ( $pad_spaces < 0 ) {
9037 if ( $pad_spaces == -1 ) {
9038 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
9040 $self->pad_token( $ipad - 1, $pad_spaces );
9046 # now apply any padding for alignment
9047 if ( $ipad >= 0 && $pad_spaces ) {
9049 my $length_t = total_line_length( $ibeg, $iend );
9050 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
9052 $self->pad_token( $ipad, $pad_spaces );
9060 $has_leading_op = $has_leading_op_next;
9061 } # end of loop over lines
9066 sub correct_lp_indentation {
9068 # When the -lp option is used, we need to make a last pass through
9069 # each line to correct the indentation positions in case they differ
9070 # from the predictions. This is necessary because perltidy uses a
9071 # predictor/corrector method for aligning with opening parens. The
9072 # predictor is usually good, but sometimes stumbles. The corrector
9073 # tries to patch things up once the actual opening paren locations
9075 my ( $ri_first, $ri_last ) = @_;
9078 # Note on flag '$do_not_pad':
9079 # We want to avoid a situation like this, where the aligner inserts
9080 # whitespace before the '=' to align it with a previous '=', because
9081 # otherwise the parens might become mis-aligned in a situation like
9082 # this, where the '=' has become aligned with the previous line,
9083 # pushing the opening '(' forward beyond where we want it.
9085 # $mkFloor::currentRoom = '';
9086 # $mkFloor::c_entry = $c->Entry(
9088 # -relief => 'sunken',
9092 # We leave it to the aligner to decide how to do this.
9094 # first remove continuation indentation if appropriate
9095 my $max_line = @{$ri_first} - 1;
9097 # looking at each line of this batch..
9098 my ( $ibeg, $iend );
9099 foreach my $line ( 0 .. $max_line ) {
9100 $ibeg = $ri_first->[$line];
9101 $iend = $ri_last->[$line];
9103 # looking at each token in this output line..
9104 foreach my $i ( $ibeg .. $iend ) {
9106 # How many space characters to place before this token
9107 # for special alignment. Actual padding is done in the
9110 # looking for next unvisited indentation item
9111 my $indentation = $leading_spaces_to_go[$i];
9112 if ( !$indentation->get_marked() ) {
9113 $indentation->set_marked(1);
9115 # looking for indentation item for which we are aligning
9116 # with parens, braces, and brackets
9117 next unless ( $indentation->get_align_paren() );
9119 # skip closed container on this line
9121 my $im = max( $ibeg, $iprev_to_go[$i] );
9122 if ( $type_sequence_to_go[$im]
9123 && $mate_index_to_go[$im] <= $iend )
9129 if ( $line == 1 && $i == $ibeg ) {
9133 # Ok, let's see what the error is and try to fix it
9135 my $predicted_pos = $indentation->get_spaces();
9138 # token is mid-line - use length to previous token
9139 $actual_pos = total_line_length( $ibeg, $i - 1 );
9141 # for mid-line token, we must check to see if all
9142 # additional lines have continuation indentation,
9143 # and remove it if so. Otherwise, we do not get
9145 my $closing_index = $indentation->get_closed();
9146 if ( $closing_index > $iend ) {
9147 my $ibeg_next = $ri_first->[ $line + 1 ];
9148 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
9149 undo_lp_ci( $line, $i, $closing_index, $ri_first,
9154 elsif ( $line > 0 ) {
9156 # handle case where token starts a new line;
9157 # use length of previous line
9158 my $ibegm = $ri_first->[ $line - 1 ];
9159 my $iendm = $ri_last->[ $line - 1 ];
9160 $actual_pos = total_line_length( $ibegm, $iendm );
9164 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
9168 # token is first character of first line of batch
9169 $actual_pos = $predicted_pos;
9172 my $move_right = $actual_pos - $predicted_pos;
9174 # done if no error to correct (gnu2.t)
9175 if ( $move_right == 0 ) {
9176 $indentation->set_recoverable_spaces($move_right);
9180 # if we have not seen closure for this indentation in
9181 # this batch, we can only pass on a request to the
9183 my $closing_index = $indentation->get_closed();
9185 if ( $closing_index < 0 ) {
9186 $indentation->set_recoverable_spaces($move_right);
9190 # If necessary, look ahead to see if there is really any
9191 # leading whitespace dependent on this whitespace, and
9192 # also find the longest line using this whitespace.
9193 # Since it is always safe to move left if there are no
9194 # dependents, we only need to do this if we may have
9195 # dependent nodes or need to move right.
9197 my $right_margin = 0;
9198 my $have_child = $indentation->get_have_child();
9200 my %saw_indentation;
9202 $saw_indentation{$indentation} = $indentation;
9204 if ( $have_child || $move_right > 0 ) {
9207 if ( $i == $ibeg ) {
9208 $max_length = total_line_length( $ibeg, $iend );
9211 # look ahead at the rest of the lines of this batch..
9212 foreach my $line_t ( $line + 1 .. $max_line ) {
9213 my $ibeg_t = $ri_first->[$line_t];
9214 my $iend_t = $ri_last->[$line_t];
9215 last if ( $closing_index <= $ibeg_t );
9217 # remember all different indentation objects
9218 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
9219 $saw_indentation{$indentation_t} = $indentation_t;
9222 # remember longest line in the group
9223 my $length_t = total_line_length( $ibeg_t, $iend_t );
9224 if ( $length_t > $max_length ) {
9225 $max_length = $length_t;
9228 $right_margin = maximum_line_length($ibeg) - $max_length;
9229 if ( $right_margin < 0 ) { $right_margin = 0 }
9232 my $first_line_comma_count =
9233 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
9234 my $comma_count = $indentation->get_comma_count();
9235 my $arrow_count = $indentation->get_arrow_count();
9237 # This is a simple approximate test for vertical alignment:
9238 # if we broke just after an opening paren, brace, bracket,
9239 # and there are 2 or more commas in the first line,
9240 # and there are no '=>'s,
9241 # then we are probably vertically aligned. We could set
9242 # an exact flag in sub scan_list, but this is good
9244 my $indentation_count = keys %saw_indentation;
9245 my $is_vertically_aligned =
9247 && $first_line_comma_count > 1
9248 && $indentation_count == 1
9249 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
9251 # Make the move if possible ..
9254 # we can always move left
9257 # but we should only move right if we are sure it will
9258 # not spoil vertical alignment
9259 || ( $comma_count == 0 )
9260 || ( $comma_count > 0 && !$is_vertically_aligned )
9264 ( $move_right <= $right_margin )
9268 foreach ( keys %saw_indentation ) {
9269 $saw_indentation{$_}
9270 ->permanently_decrease_available_spaces( -$move );
9274 # Otherwise, record what we want and the vertical aligner
9275 # will try to recover it.
9277 $indentation->set_recoverable_spaces($move_right);
9285 # flush is called to output any tokens in the pipeline, so that
9286 # an alternate source of lines can be written in the correct order
9290 destroy_one_line_block();
9291 $self->output_line_to_go();
9292 Perl::Tidy::VerticalAligner::flush();
9296 sub reset_block_text_accumulator {
9298 # save text after 'if' and 'elsif' to append after 'else'
9299 if ($accumulating_text_for_block) {
9301 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
9302 push @{$rleading_block_if_elsif_text}, $leading_block_text;
9305 $accumulating_text_for_block = "";
9306 $leading_block_text = "";
9307 $leading_block_text_level = 0;
9308 $leading_block_text_length_exceeded = 0;
9309 $leading_block_text_line_number = 0;
9310 $leading_block_text_line_length = 0;
9314 sub set_block_text_accumulator {
9316 $accumulating_text_for_block = $tokens_to_go[$i];
9317 if ( $accumulating_text_for_block !~ /^els/ ) {
9318 $rleading_block_if_elsif_text = [];
9320 $leading_block_text = "";
9321 $leading_block_text_level = $levels_to_go[$i];
9322 $leading_block_text_line_number = get_output_line_number();
9323 $leading_block_text_length_exceeded = 0;
9325 # this will contain the column number of the last character
9326 # of the closing side comment
9327 $leading_block_text_line_length =
9328 length($csc_last_label) +
9329 length($accumulating_text_for_block) +
9330 length( $rOpts->{'closing-side-comment-prefix'} ) +
9331 $leading_block_text_level * $rOpts_indent_columns + 3;
9335 sub accumulate_block_text {
9338 # accumulate leading text for -csc, ignoring any side comments
9339 if ( $accumulating_text_for_block
9340 && !$leading_block_text_length_exceeded
9341 && $types_to_go[$i] ne '#' )
9344 my $added_length = $token_lengths_to_go[$i];
9345 $added_length += 1 if $i == 0;
9346 my $new_line_length = $leading_block_text_line_length + $added_length;
9348 # we can add this text if we don't exceed some limits..
9351 # we must not have already exceeded the text length limit
9352 length($leading_block_text) <
9353 $rOpts_closing_side_comment_maximum_text
9356 # the new total line length must be below the line length limit
9357 # or the new length must be below the text length limit
9358 # (ie, we may allow one token to exceed the text length limit)
9361 maximum_line_length_for_level($leading_block_text_level)
9363 || length($leading_block_text) + $added_length <
9364 $rOpts_closing_side_comment_maximum_text
9367 # UNLESS: we are adding a closing paren before the brace we seek.
9368 # This is an attempt to avoid situations where the ... to be
9369 # added are longer than the omitted right paren, as in:
9371 # foreach my $item (@a_rather_long_variable_name_here) {
9373 # } ## end foreach my $item (@a_rather_long_variable_name_here...
9376 $tokens_to_go[$i] eq ')'
9379 $i + 1 <= $max_index_to_go
9380 && $block_type_to_go[ $i + 1 ] eq
9381 $accumulating_text_for_block
9383 || ( $i + 2 <= $max_index_to_go
9384 && $block_type_to_go[ $i + 2 ] eq
9385 $accumulating_text_for_block )
9391 # add an extra space at each newline
9392 if ( $i == 0 ) { $leading_block_text .= ' ' }
9394 # add the token text
9395 $leading_block_text .= $tokens_to_go[$i];
9396 $leading_block_text_line_length = $new_line_length;
9399 # show that text was truncated if necessary
9400 elsif ( $types_to_go[$i] ne 'b' ) {
9401 $leading_block_text_length_exceeded = 1;
9402 $leading_block_text .= '...';
9409 my %is_if_elsif_else_unless_while_until_for_foreach;
9413 # These block types may have text between the keyword and opening
9414 # curly. Note: 'else' does not, but must be included to allow trailing
9415 # if/elsif text to be appended.
9416 # patch for SWITCH/CASE: added 'case' and 'when'
9418 qw(if elsif else unless while until for foreach case when catch);
9419 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
9423 sub accumulate_csc_text {
9425 # called once per output buffer when -csc is used. Accumulates
9426 # the text placed after certain closing block braces.
9427 # Defines and returns the following for this buffer:
9429 my $block_leading_text = ""; # the leading text of the last '}'
9430 my $rblock_leading_if_elsif_text;
9431 my $i_block_leading_text =
9432 -1; # index of token owning block_leading_text
9433 my $block_line_count = 100; # how many lines the block spans
9434 my $terminal_type = 'b'; # type of last nonblank token
9435 my $i_terminal = 0; # index of last nonblank token
9436 my $terminal_block_type = "";
9438 # update most recent statement label
9439 $csc_last_label = "" unless ($csc_last_label);
9440 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
9441 my $block_label = $csc_last_label;
9443 # Loop over all tokens of this batch
9444 for my $i ( 0 .. $max_index_to_go ) {
9445 my $type = $types_to_go[$i];
9446 my $block_type = $block_type_to_go[$i];
9447 my $token = $tokens_to_go[$i];
9449 # remember last nonblank token type
9450 if ( $type ne '#' && $type ne 'b' ) {
9451 $terminal_type = $type;
9452 $terminal_block_type = $block_type;
9456 my $type_sequence = $type_sequence_to_go[$i];
9457 if ( $block_type && $type_sequence ) {
9459 if ( $token eq '}' ) {
9461 # restore any leading text saved when we entered this block
9462 if ( defined( $block_leading_text{$type_sequence} ) ) {
9463 ( $block_leading_text, $rblock_leading_if_elsif_text )
9464 = @{ $block_leading_text{$type_sequence} };
9465 $i_block_leading_text = $i;
9466 delete $block_leading_text{$type_sequence};
9467 $rleading_block_if_elsif_text =
9468 $rblock_leading_if_elsif_text;
9471 if ( defined( $csc_block_label{$type_sequence} ) ) {
9472 $block_label = $csc_block_label{$type_sequence};
9473 delete $csc_block_label{$type_sequence};
9476 # if we run into a '}' then we probably started accumulating
9477 # at something like a trailing 'if' clause..no harm done.
9478 if ( $accumulating_text_for_block
9479 && $levels_to_go[$i] <= $leading_block_text_level )
9481 my $lev = $levels_to_go[$i];
9482 reset_block_text_accumulator();
9485 if ( defined( $block_opening_line_number{$type_sequence} ) )
9487 my $output_line_number = get_output_line_number();
9489 $output_line_number -
9490 $block_opening_line_number{$type_sequence} + 1;
9491 delete $block_opening_line_number{$type_sequence};
9495 # Error: block opening line undefined for this line..
9496 # This shouldn't be possible, but it is not a
9497 # significant problem.
9501 elsif ( $token eq '{' ) {
9503 my $line_number = get_output_line_number();
9504 $block_opening_line_number{$type_sequence} = $line_number;
9506 # set a label for this block, except for
9507 # a bare block which already has the label
9508 # A label can only be used on the next {
9509 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
9510 $csc_block_label{$type_sequence} = $csc_last_label;
9511 $csc_last_label = "";
9513 if ( $accumulating_text_for_block
9514 && $levels_to_go[$i] == $leading_block_text_level )
9517 if ( $accumulating_text_for_block eq $block_type ) {
9519 # save any leading text before we enter this block
9520 $block_leading_text{$type_sequence} = [
9521 $leading_block_text,
9522 $rleading_block_if_elsif_text
9524 $block_opening_line_number{$type_sequence} =
9525 $leading_block_text_line_number;
9526 reset_block_text_accumulator();
9530 # shouldn't happen, but not a serious error.
9531 # We were accumulating -csc text for block type
9532 # $accumulating_text_for_block and unexpectedly
9533 # encountered a '{' for block type $block_type.
9540 && $csc_new_statement_ok
9541 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
9542 && $token =~ /$closing_side_comment_list_pattern/o )
9544 set_block_text_accumulator($i);
9548 # note: ignoring type 'q' because of tricks being played
9549 # with 'q' for hanging side comments
9550 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
9551 $csc_new_statement_ok =
9552 ( $block_type || $type eq 'J' || $type eq ';' );
9555 && $accumulating_text_for_block
9556 && $levels_to_go[$i] == $leading_block_text_level )
9558 reset_block_text_accumulator();
9561 accumulate_block_text($i);
9566 # Treat an 'else' block specially by adding preceding 'if' and
9567 # 'elsif' text. Otherwise, the 'end else' is not helpful,
9568 # especially for cuddled-else formatting.
9569 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
9570 $block_leading_text =
9571 make_else_csc_text( $i_terminal, $terminal_block_type,
9572 $block_leading_text, $rblock_leading_if_elsif_text );
9575 # if this line ends in a label then remember it for the next pass
9576 $csc_last_label = "";
9577 if ( $terminal_type eq 'J' ) {
9578 $csc_last_label = $tokens_to_go[$i_terminal];
9581 return ( $terminal_type, $i_terminal, $i_block_leading_text,
9582 $block_leading_text, $block_line_count, $block_label );
9586 sub make_else_csc_text {
9588 # create additional -csc text for an 'else' and optionally 'elsif',
9589 # depending on the value of switch
9590 # $rOpts_closing_side_comment_else_flag:
9592 # = 0 add 'if' text to trailing else
9593 # = 1 same as 0 plus:
9594 # add 'if' to 'elsif's if can fit in line length
9595 # add last 'elsif' to trailing else if can fit in one line
9596 # = 2 same as 1 but do not check if exceed line length
9598 # $rif_elsif_text = a reference to a list of all previous closing
9599 # side comments created for this if block
9601 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
9602 my $csc_text = $block_leading_text;
9604 if ( $block_type eq 'elsif'
9605 && $rOpts_closing_side_comment_else_flag == 0 )
9610 my $count = @{$rif_elsif_text};
9611 return $csc_text unless ($count);
9613 my $if_text = '[ if' . $rif_elsif_text->[0];
9615 # always show the leading 'if' text on 'else'
9616 if ( $block_type eq 'else' ) {
9617 $csc_text .= $if_text;
9621 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
9625 my $last_elsif_text = "";
9627 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
9628 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
9631 # tentatively append one more item
9632 my $saved_text = $csc_text;
9633 if ( $block_type eq 'else' ) {
9634 $csc_text .= $last_elsif_text;
9637 $csc_text .= ' ' . $if_text;
9640 # all done if no length checks requested
9641 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
9645 # undo it if line length exceeded
9648 length($block_type) +
9649 length( $rOpts->{'closing-side-comment-prefix'} ) +
9650 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
9651 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
9652 $csc_text = $saved_text;
9657 { # sub balance_csc_text
9672 sub balance_csc_text {
9674 # Append characters to balance a closing side comment so that editors
9675 # such as vim can correctly jump through code.
9677 # input = ## end foreach my $foo ( sort { $b ...
9678 # output = ## end foreach my $foo ( sort { $b ...})
9680 # NOTE: This routine does not currently filter out structures within
9681 # quoted text because the bounce algorithms in text editors do not
9682 # necessarily do this either (a version of vim was checked and
9685 # Some complex examples which will cause trouble for some editors:
9686 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
9687 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
9688 # if ( $1 eq '{' ) {
9689 # test file test1/braces.pl has many such examples.
9693 # loop to examine characters one-by-one, RIGHT to LEFT and
9694 # build a balancing ending, LEFT to RIGHT.
9695 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
9697 my $char = substr( $csc, $pos, 1 );
9699 # ignore everything except structural characters
9700 next unless ( $matching_char{$char} );
9702 # pop most recently appended character
9703 my $top = chop($csc);
9705 # push it back plus the mate to the newest character
9706 # unless they balance each other.
9707 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
9710 # return the balanced string
9715 sub add_closing_side_comment {
9719 # add closing side comments after closing block braces if -csc used
9720 my ( $closing_side_comment, $cscw_block_comment );
9722 #---------------------------------------------------------------
9723 # Step 1: loop through all tokens of this line to accumulate
9724 # the text needed to create the closing side comments. Also see
9725 # how the line ends.
9726 #---------------------------------------------------------------
9728 my ( $terminal_type, $i_terminal, $i_block_leading_text,
9729 $block_leading_text, $block_line_count, $block_label )
9730 = accumulate_csc_text();
9732 #---------------------------------------------------------------
9733 # Step 2: make the closing side comment if this ends a block
9734 #---------------------------------------------------------------
9735 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
9737 # if this line might end in a block closure..
9739 $terminal_type eq '}'
9744 # the block is long enough
9745 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
9747 # or there is an existing comment to check
9748 || ( $have_side_comment
9749 && $rOpts->{'closing-side-comment-warnings'} )
9752 # .. and if this is one of the types of interest
9753 && $block_type_to_go[$i_terminal] =~
9754 /$closing_side_comment_list_pattern/o
9756 # .. but not an anonymous sub
9757 # These are not normally of interest, and their closing braces are
9758 # often followed by commas or semicolons anyway. This also avoids
9759 # possible erratic output due to line numbering inconsistencies
9760 # in the cases where their closing braces terminate a line.
9761 && $block_type_to_go[$i_terminal] ne 'sub'
9763 # ..and the corresponding opening brace must is not in this batch
9764 # (because we do not need to tag one-line blocks, although this
9765 # should also be caught with a positive -csci value)
9766 && $self->mate_index_to_go($i_terminal) < 0
9771 # this is the last token (line doesn't have a side comment)
9774 # or the old side comment is a closing side comment
9775 || $tokens_to_go[$max_index_to_go] =~
9776 /$closing_side_comment_prefix_pattern/o
9781 # then make the closing side comment text
9782 if ($block_label) { $block_label .= " " }
9784 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
9786 # append any extra descriptive text collected above
9787 if ( $i_block_leading_text == $i_terminal ) {
9788 $token .= $block_leading_text;
9791 $token = balance_csc_text($token)
9792 if $rOpts->{'closing-side-comments-balanced'};
9794 $token =~ s/\s*$//; # trim any trailing whitespace
9796 # handle case of existing closing side comment
9797 if ($have_side_comment) {
9799 # warn if requested and tokens differ significantly
9800 if ( $rOpts->{'closing-side-comment-warnings'} ) {
9801 my $old_csc = $tokens_to_go[$max_index_to_go];
9802 my $new_csc = $token;
9803 $new_csc =~ s/\s+//g; # trim all whitespace
9804 $old_csc =~ s/\s+//g; # trim all whitespace
9805 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
9806 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
9807 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
9808 my $new_trailing_dots = $1;
9809 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
9811 # Patch to handle multiple closing side comments at
9812 # else and elsif's. These have become too complicated
9813 # to check, so if we see an indication of
9814 # '[ if' or '[ # elsif', then assume they were made
9816 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
9817 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
9819 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
9820 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
9823 # if old comment is contained in new comment,
9824 # only compare the common part.
9825 if ( length($new_csc) > length($old_csc) ) {
9826 $new_csc = substr( $new_csc, 0, length($old_csc) );
9829 # if the new comment is shorter and has been limited,
9830 # only compare the common part.
9831 if ( length($new_csc) < length($old_csc)
9832 && $new_trailing_dots )
9834 $old_csc = substr( $old_csc, 0, length($new_csc) );
9837 # any remaining difference?
9838 if ( $new_csc ne $old_csc ) {
9840 # just leave the old comment if we are below the threshold
9841 # for creating side comments
9842 if ( $block_line_count <
9843 $rOpts->{'closing-side-comment-interval'} )
9848 # otherwise we'll make a note of it
9852 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
9855 # save the old side comment in a new trailing block
9858 if ( $rOpts->{'timestamp'} ) {
9859 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
9862 $timestamp = "$year-$month-$day";
9864 $cscw_block_comment =
9865 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
9866 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
9871 # No differences.. we can safely delete old comment if we
9872 # are below the threshold
9873 if ( $block_line_count <
9874 $rOpts->{'closing-side-comment-interval'} )
9877 $self->unstore_token_to_go()
9878 if ( $types_to_go[$max_index_to_go] eq '#' );
9879 $self->unstore_token_to_go()
9880 if ( $types_to_go[$max_index_to_go] eq 'b' );
9885 # switch to the new csc (unless we deleted it!)
9887 $tokens_to_go[$max_index_to_go] = $token;
9888 $self->sync_token_K($max_index_to_go);
9892 # handle case of NO existing closing side comment
9895 # To avoid inserting a new token in the token arrays, we
9896 # will just return the new side comment so that it can be
9897 # inserted just before it is needed in the call to the
9899 $closing_side_comment = $token;
9902 return ( $closing_side_comment, $cscw_block_comment );
9905 sub previous_nonblank_token {
9909 return "" if ( $im < 0 );
9910 if ( $types_to_go[$im] eq 'b' ) { $im--; }
9911 return "" if ( $im < 0 );
9912 $name = $tokens_to_go[$im];
9914 # prepend any sub name to an isolated -> to avoid unwanted alignments
9915 # [test case is test8/penco.pl]
9916 if ( $name eq '->' ) {
9918 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
9919 $name = $tokens_to_go[$im] . $name;
9925 sub send_lines_to_vertical_aligner {
9927 my ( $self, $rbatch_hash ) = @_;
9929 # This routine receives a batch of code for which the final line breaks
9930 # have been defined. Here we prepare the lines for passing to the vertical
9931 # aligner. We do the following tasks:
9932 # - mark certain vertical alignment tokens tokens, such as '=', in each line.
9933 # - make minor indentation adjustments
9934 # - insert extra blank spaces to help display certain logical constructions
9936 my $rlines_K = $rbatch_hash->{rlines_K};
9937 if ( !@{$rlines_K} ) {
9938 Fault("Unexpected call with no lines");
9941 my $n_last_line = @{$rlines_K} - 1;
9942 my $do_not_pad = $rbatch_hash->{do_not_pad};
9944 my $rLL = $self->{rLL};
9945 my $Klimit = $self->{Klimit};
9947 my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
9948 my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
9949 my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
9950 my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
9952 # Construct indexes to the global_to_go arrays so that called routines can
9953 # still access those arrays. This might eventually be removed
9954 # when all called routines have been converted to access token values
9955 # in the rLL array instead.
9956 my $ibeg0 = $rbatch_hash->{ibeg0};
9957 my $Kbeg0 = $Kbeg_next;
9958 my ( $ri_first, $ri_last );
9959 foreach my $rline ( @{$rlines_K} ) {
9960 my ( $Kbeg, $Kend ) = @{$rline};
9961 my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
9962 my $iend = $ibeg0 + $Kend - $Kbeg0;
9963 push @{$ri_first}, $ibeg;
9964 push @{$ri_last}, $iend;
9966 #####################################################################
9968 my $valign_batch_number = $self->increment_valign_batch_count();
9970 my ( $cscw_block_comment, $closing_side_comment );
9971 if ( $rOpts->{'closing-side-comments'} ) {
9972 ( $closing_side_comment, $cscw_block_comment ) =
9973 $self->add_closing_side_comment();
9976 my $rindentation_list = [0]; # ref to indentations for each line
9978 # define the array @{$ralignment_type_to_go} for the output tokens
9979 # which will be non-blank for each special token (such as =>)
9980 # for which alignment is required.
9981 my $ralignment_type_to_go =
9982 $self->set_vertical_alignment_markers( $ri_first, $ri_last );
9984 # flush before a long if statement to avoid unwanted alignment
9985 if ( $n_last_line > 0
9986 && $type_beg_next eq 'k'
9987 && $token_beg_next =~ /^(if|unless)$/ )
9989 Perl::Tidy::VerticalAligner::flush();
9992 $self->undo_ci( $ri_first, $ri_last );
9994 $self->set_logical_padding( $ri_first, $ri_last );
9996 # loop to prepare each line for shipment
9998 my ( $Kbeg, $type_beg, $token_beg );
9999 my ( $Kend, $type_end );
10000 for my $n ( 0 .. $n_last_line ) {
10002 my $ibeg = $ri_first->[$n];
10003 my $iend = $ri_last->[$n];
10004 my $rline = $rlines_K->[$n];
10005 my $forced_breakpoint = $rline->[2];
10007 # we may need to look at variables on three consecutive lines ...
10009 # Some vars on line [n-1], if any:
10010 my $Kbeg_last = $Kbeg;
10011 my $type_beg_last = $type_beg;
10012 my $token_beg_last = $token_beg;
10013 my $Kend_last = $Kend;
10014 my $type_end_last = $type_end;
10016 # Some vars on line [n]:
10017 $Kbeg = $Kbeg_next;
10018 $type_beg = $type_beg_next;
10019 $token_beg = $token_beg_next;
10020 $Kend = $Kend_next;
10021 $type_end = $type_end_next;
10023 # We use two slightly different definitions of level jump at the end
10025 # $ljump is the level jump needed by 'sub set_adjusted_indentation'
10026 # $level_jump is the level jump needed by the vertical aligner.
10027 my $ljump = 0; # level jump at end of line
10029 # Get some vars on line [n+1], if any:
10030 if ( $n < $n_last_line ) {
10031 ( $Kbeg_next, $Kend_next ) =
10032 @{ $rlines_K->[ $n + 1 ] };
10033 $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
10034 $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
10035 $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
10036 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
10039 # level jump at end of line for the vertical aligner:
10043 : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
10045 $self->delete_needless_alignments( $ibeg, $iend,
10046 $ralignment_type_to_go );
10048 my ( $rtokens, $rfields, $rpatterns ) =
10049 $self->make_alignment_patterns( $ibeg, $iend,
10050 $ralignment_type_to_go );
10052 my ( $indentation, $lev, $level_end, $terminal_type,
10053 $is_semicolon_terminated, $is_outdented_line )
10054 = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
10055 $ri_first, $ri_last, $rindentation_list, $ljump );
10057 # we will allow outdenting of long lines..
10058 my $outdent_long_lines = (
10060 # which are long quotes, if allowed
10061 ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
10063 # which are long block comments, if allowed
10066 && $rOpts->{'outdent-long-comments'}
10068 # but not if this is a static block comment
10069 && !$is_static_block_comment
10073 my $rvertical_tightness_flags =
10074 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
10075 $ri_first, $ri_last );
10077 # flush an outdented line to avoid any unwanted vertical alignment
10078 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10080 # Set a flag at the final ':' of a ternary chain to request
10081 # vertical alignment of the final term. Here is a
10082 # slightly complex example:
10084 # $self->{_text} = (
10086 # : $type eq 'item' ? "the $section entry"
10087 # : "the section on $section"
10091 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
10092 # : ' elsewhere in this document'
10095 my $is_terminal_ternary = 0;
10097 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
10098 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
10099 if ( $terminal_type ne ';'
10100 && $n_last_line > $n
10101 && $level_end == $lev )
10103 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
10104 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
10107 $last_leading_type eq ':'
10108 && ( ( $terminal_type eq ';' && $level_end <= $lev )
10109 || ( $terminal_type ne ':' && $level_end < $lev ) )
10113 # the terminal term must not contain any ternary terms, as in
10115 # $Is_MSWin32 ? ".\\echo$$"
10116 # : $Is_MacOS ? ":echo$$"
10117 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
10119 $is_terminal_ternary = 1;
10121 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
10122 while ( defined($KP) && $KP <= $Kend ) {
10123 my $type_KP = $rLL->[$KP]->[_TYPE_];
10124 if ( $type_KP eq '?' || $type_KP eq ':' ) {
10125 $is_terminal_ternary = 0;
10128 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
10133 # add any new closing side comment to the last line
10134 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
10135 $rfields->[-1] .= " $closing_side_comment";
10138 # send this new line down the pipe
10139 my $rvalign_hash = {};
10140 $rvalign_hash->{level} = $lev;
10141 $rvalign_hash->{level_end} = $level_end;
10142 $rvalign_hash->{indentation} = $indentation;
10143 $rvalign_hash->{is_forced_break} = $forced_breakpoint || $in_comma_list;
10144 $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
10145 $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
10146 $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
10147 $rvalign_hash->{do_not_pad} = $do_not_pad;
10148 $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
10149 $rvalign_hash->{level_jump} = $level_jump;
10151 $rvalign_hash->{valign_batch_number} = $valign_batch_number;
10153 Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields,
10154 $rtokens, $rpatterns );
10156 $in_comma_list = $type_end eq ',' && $forced_breakpoint;
10158 # flush an outdented line to avoid any unwanted vertical alignment
10159 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10163 # Set flag indicating if this line ends in an opening
10164 # token and is very short, so that a blank line is not
10165 # needed if the subsequent line is a comment.
10166 # Examples of what we are looking for:
10172 $last_output_short_opening_token
10174 # line ends in opening token
10175 = $type_end =~ /^[\{\(\[L]$/
10179 # line has either single opening token
10182 # or is a single token followed by opening token.
10183 # Note that sub identifiers have blanks like 'sub doit'
10184 || ( $Kend - $Kbeg <= 2 && $token_beg !~ /\s+/ )
10187 # and limit total to 10 character widths
10188 && token_sequence_length( $ibeg, $iend ) <= 10;
10190 } # end of loop to output each line
10192 # remember indentation of lines containing opening containers for
10193 # later use by sub set_adjusted_indentation
10194 $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
10196 # output any new -cscw block comment
10197 if ($cscw_block_comment) {
10198 Perl::Tidy::VerticalAligner::flush();
10199 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10204 { # begin make_alignment_patterns
10206 my %block_type_map;
10212 # map related block names into a common name to
10214 %block_type_map = (
10225 # map certain keywords to the same 'if' class to align
10226 # long if/elsif sequences. [elsif.pl]
10232 'default' => 'given',
10233 'case' => 'switch',
10235 # treat an 'undef' similar to numbers and quotes
10239 # map certain operators to the same class for pattern matching
10249 sub delete_needless_alignments {
10250 my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
10252 # Remove unwanted alignments. This routine is a place to remove
10253 # alignments which might cause problems at later stages. There are
10254 # currently two types of fixes:
10256 # 1. Remove excess parens
10257 # 2. Remove alignments within 'elsif' conditions
10259 # Patch #1: Excess alignment of parens can prevent other good
10260 # alignments. For example, note the parens in the first two rows of
10261 # the following snippet. They would normally get marked for alignment
10262 # and aligned as follows:
10264 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
10265 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
10266 # my $img = new Gimp::Image( $w, $h, RGB );
10268 # This causes unnecessary paren alignment and prevents the third equals
10269 # from aligning. If we remove the unwanted alignments we get:
10271 # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
10272 # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
10273 # my $img = new Gimp::Image( $w, $h, RGB );
10275 # A rule for doing this which works well is to remove alignment of
10276 # parens whose containers do not contain other aligning tokens, with
10277 # the exception that we always keep alignment of the first opening
10278 # paren on a line (for things like 'if' and 'elsif' statements).
10280 # Setup needed constants
10281 my $i_good_paren = -1;
10282 my $imin_match = $iend + 1;
10283 my $i_elsif_close = $ibeg - 1;
10284 my $i_elsif_open = $iend + 1;
10285 if ( $iend > $ibeg ) {
10286 if ( $types_to_go[$ibeg] eq 'k' ) {
10288 # Paren patch: mark a location of a paren we should keep, such
10289 # as one following something like a leading 'if', 'elsif',..
10290 $i_good_paren = $ibeg + 1;
10291 if ( $types_to_go[$i_good_paren] eq 'b' ) {
10295 # 'elsif' patch: remember the range of the parens of an elsif,
10296 # and do not make alignments within them because this can cause
10297 # loss of padding and overall brace alignment in the vertical
10299 if ( $tokens_to_go[$ibeg] eq 'elsif'
10300 && $i_good_paren < $iend
10301 && $tokens_to_go[$i_good_paren] eq '(' )
10303 $i_elsif_open = $i_good_paren;
10304 $i_elsif_close = $self->mate_index_to_go($i_good_paren);
10309 # Loop to make the fixes on this line
10311 for my $i ( $ibeg .. $iend ) {
10313 if ( $ralignment_type_to_go->[$i] ne '' ) {
10315 # Patch #2: undo alignment within elsif parens
10316 if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
10317 $ralignment_type_to_go->[$i] = '';
10320 push @imatch_list, $i;
10323 if ( $tokens_to_go[$i] eq ')' ) {
10325 # Patch #1: undo the corresponding opening paren if:
10326 # - it is at the top of the stack
10327 # - and not the first overall opening paren
10328 # - does not follow a leading keyword on this line
10329 my $imate = $self->mate_index_to_go($i);
10331 && $imatch_list[-1] eq $imate
10332 && ( $ibeg > 1 || @imatch_list > 1 )
10333 && $imate > $i_good_paren )
10335 $ralignment_type_to_go->[$imate] = '';
10343 sub make_alignment_patterns {
10345 # Here we do some important preliminary work for the
10346 # vertical aligner. We create three arrays for one
10347 # output line. These arrays contain strings that can
10348 # be tested by the vertical aligner to see if
10349 # consecutive lines can be aligned vertically.
10351 # The three arrays are indexed on the vertical
10352 # alignment fields and are:
10353 # @tokens - a list of any vertical alignment tokens for this line.
10354 # These are tokens, such as '=' '&&' '#' etc which
10355 # we want to might align vertically. These are
10356 # decorated with various information such as
10357 # nesting depth to prevent unwanted vertical
10358 # alignment matches.
10359 # @fields - the actual text of the line between the vertical alignment
10361 # @patterns - a modified list of token types, one for each alignment
10362 # field. These should normally each match before alignment is
10363 # allowed, even when the alignment tokens match.
10364 my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
10368 my $i_start = $ibeg;
10371 my @container_name = ("");
10372 my @multiple_comma_arrows = (undef);
10374 my $j = 0; # field index
10378 for my $i ( $ibeg .. $iend ) {
10380 # Keep track of containers balanced on this line only.
10381 # These are used below to prevent unwanted cross-line alignments.
10382 # Unbalanced containers already avoid aligning across
10383 # container boundaries.
10384 my $tok = $tokens_to_go[$i];
10385 if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) {
10387 # if container is balanced on this line...
10388 my $i_mate = $self->mate_index_to_go($i);
10389 if ( $i_mate > $i && $i_mate <= $iend ) {
10391 my $seqno = $type_sequence_to_go[$i];
10392 my $count = comma_arrow_count($seqno);
10393 $multiple_comma_arrows[$depth] = $count && $count > 1;
10395 # Append the previous token name to make the container name
10396 # more unique. This name will also be given to any commas
10397 # within this container, and it helps avoid undesirable
10398 # alignments of different types of containers.
10400 # Containers beginning with { and [ are given those names
10401 # for uniqueness. That way commas in different containers
10402 # will not match. Here is an example of what this prevents:
10403 # a => [ 1, 2, 3 ],
10404 # b => { b1 => 4, b2 => 5 },
10405 # Here is another example of what we avoid by labeling the
10407 # is_d( [ $a, $a ], [ $b, $c ] );
10408 # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
10409 # is_d( [ \$a, \$a ], [ \$b, \$c ] );
10412 if ( $tok eq '(' ) {
10413 $name = previous_nonblank_token($i);
10416 $container_name[$depth] = "+" . $name;
10418 # Make the container name even more unique if necessary.
10419 # If we are not vertically aligning this opening paren,
10420 # append a character count to avoid bad alignment because
10421 # it usually looks bad to align commas within containers
10422 # for which the opening parens do not align. Here
10423 # is an example very BAD alignment of commas (because
10424 # the atan2 functions are not all aligned):
10426 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
10427 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
10428 # $X * atan2( $X, 1 ) -
10429 # $Y * atan2( $Y, 1 );
10431 # On the other hand, it is usually okay to align commas if
10432 # opening parens align, such as:
10433 # glVertex3d( $cx + $s * $xs, $cy, $z );
10434 # glVertex3d( $cx, $cy + $s * $ys, $z );
10435 # glVertex3d( $cx - $s * $xs, $cy, $z );
10436 # glVertex3d( $cx, $cy - $s * $ys, $z );
10438 # To distinguish between these situations, we will
10439 # append the length of the line from the previous matching
10440 # token, or beginning of line, to the function name. This
10441 # will allow the vertical aligner to reject undesirable
10444 # if we are not aligning on this paren...
10445 if ( $ralignment_type_to_go->[$i] eq '' ) {
10447 # Sum length from previous alignment
10448 my $len = token_sequence_length( $i_start, $i - 1 );
10449 if ( $i_start == $ibeg ) {
10451 # For first token, use distance from start of line
10452 # but subtract off the indentation due to level.
10453 # Otherwise, results could vary with indentation.
10454 $len += leading_spaces_to_go($ibeg) -
10455 $levels_to_go[$i_start] * $rOpts_indent_columns;
10456 if ( $len < 0 ) { $len = 0 }
10459 # tack this length onto the container name to try
10460 # to make a unique token name
10461 $container_name[$depth] .= "-" . $len;
10465 elsif ( $tokens_to_go[$i] =~ /^[\)\}\]]/ ) {
10466 $depth-- if $depth > 0;
10469 # if we find a new synchronization token, we are done with
10471 if ( $i > $i_start && $ralignment_type_to_go->[$i] ne '' ) {
10473 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
10475 # map similar items
10476 my $tok_map = $operator_map{$tok};
10477 $tok = $tok_map if ($tok_map);
10479 # make separators in different nesting depths unique
10480 # by appending the nesting depth digit.
10481 if ( $raw_tok ne '#' ) {
10482 $tok .= "$nesting_depth_to_go[$i]";
10485 # also decorate commas with any container name to avoid
10486 # unwanted cross-line alignments.
10487 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
10488 if ( $container_name[$depth] ) {
10489 $tok .= $container_name[$depth];
10493 # Patch to avoid aligning leading and trailing if, unless.
10494 # Mark trailing if, unless statements with container names.
10495 # This makes them different from leading if, unless which
10496 # are not so marked at present. If we ever need to name
10497 # them too, we could use ci to distinguish them.
10498 # Example problem to avoid:
10499 # return ( 2, "DBERROR" )
10500 # if ( $retval == 2 );
10501 # if ( scalar @_ ) {
10502 # my ( $a, $b, $c, $d, $e, $f ) = @_;
10504 if ( $raw_tok eq '(' ) {
10505 my $ci = $ci_levels_to_go[$ibeg];
10506 if ( $container_name[$depth] =~ /^\+(if|unless)/
10509 $tok .= $container_name[$depth];
10513 # Decorate block braces with block types to avoid
10514 # unwanted alignments such as the following:
10515 # foreach ( @{$routput_array} ) { $fh->print($_) }
10516 # eval { $fh->close() };
10517 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
10518 my $block_type = $block_type_to_go[$i];
10520 # map certain related block types to allow
10521 # else blocks to align
10522 $block_type = $block_type_map{$block_type}
10523 if ( defined( $block_type_map{$block_type} ) );
10525 # remove sub names to allow one-line sub braces to align
10526 # regardless of name
10527 #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
10528 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
10530 # allow all control-type blocks to align
10531 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
10533 $tok .= $block_type;
10536 # Mark multiple copies of certain tokens with the copy number
10537 # This will allow the aligner to decide if they are matched.
10538 # For now, only do this for equals. For example, the two
10539 # equals on the next line will be labeled '=0' and '=0.2'.
10540 # Later, the '=0.2' will be ignored in alignment because it
10543 # $| = $debug = 1 if $opt_d;
10544 # $full_index = 1 if $opt_i;
10546 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
10547 $token_count{$tok}++;
10548 if ( $token_count{$tok} > 1 ) {
10549 $tok .= '.' . $token_count{$tok};
10553 # concatenate the text of the consecutive tokens to form
10556 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10558 # store the alignment token for this field
10559 push( @tokens, $tok );
10561 # get ready for the next batch
10564 $patterns[$j] = "";
10567 # continue accumulating tokens
10568 # handle non-keywords..
10569 if ( $types_to_go[$i] ne 'k' ) {
10570 my $type = $types_to_go[$i];
10572 # Mark most things before arrows as a quote to
10573 # get them to line up. Testfile: mixed.pl.
10574 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10575 my $next_type = $types_to_go[ $i + 1 ];
10576 my $i_next_nonblank =
10577 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10579 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10582 # Patch to ignore leading minus before words,
10583 # by changing pattern 'mQ' into just 'Q',
10584 # so that we can align things like this:
10585 # Button => "Print letter \"~$_\"",
10586 # -command => [ sub { print "$_[0]\n" }, $_ ],
10587 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
10591 # Convert a bareword within braces into a quote for matching. This will
10592 # allow alignment of expressions like this:
10593 # local ( $SIG{'INT'} ) = IGNORE;
10594 # local ( $SIG{ALRM} ) = 'POSTMAN';
10598 && $types_to_go[ $i - 1 ] eq 'L'
10599 && $types_to_go[ $i + 1 ] eq 'R' )
10604 # patch to make numbers and quotes align
10605 if ( $type eq 'n' ) { $type = 'Q' }
10607 # patch to ignore any ! in patterns
10608 if ( $type eq '!' ) { $type = '' }
10610 $patterns[$j] .= $type;
10613 # for keywords we have to use the actual text
10616 my $tok = $tokens_to_go[$i];
10618 # but map certain keywords to a common string to allow
10620 $tok = $keyword_map{$tok}
10621 if ( defined( $keyword_map{$tok} ) );
10622 $patterns[$j] .= $tok;
10626 # done with this line .. join text of tokens to make the last field
10627 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10628 return ( \@tokens, \@fields, \@patterns );
10631 } # end make_alignment_patterns
10633 { # begin unmatched_indexes
10635 # closure to keep track of unbalanced containers.
10636 # arrays shared by the routines in this block:
10637 my @unmatched_opening_indexes_in_this_batch;
10638 my @unmatched_closing_indexes_in_this_batch;
10639 my %comma_arrow_count;
10641 sub is_unbalanced_batch {
10642 return @unmatched_opening_indexes_in_this_batch +
10643 @unmatched_closing_indexes_in_this_batch;
10646 sub comma_arrow_count {
10648 return $comma_arrow_count{$seqno};
10651 sub match_opening_and_closing_tokens {
10653 # Match up indexes of opening and closing braces, etc, in this batch.
10654 # This has to be done after all tokens are stored because unstoring
10655 # of tokens would otherwise cause trouble.
10657 @unmatched_opening_indexes_in_this_batch = ();
10658 @unmatched_closing_indexes_in_this_batch = ();
10659 %comma_arrow_count = ();
10660 my $comma_arrow_count_contained = 0;
10662 foreach my $i ( 0 .. $max_index_to_go ) {
10663 if ( $type_sequence_to_go[$i] ) {
10664 my $token = $tokens_to_go[$i];
10665 if ( $token =~ /^[\(\[\{\?]$/ ) {
10666 push @unmatched_opening_indexes_in_this_batch, $i;
10668 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
10670 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
10671 if ( defined($i_mate) && $i_mate >= 0 ) {
10672 if ( $type_sequence_to_go[$i_mate] ==
10673 $type_sequence_to_go[$i] )
10675 $mate_index_to_go[$i] = $i_mate;
10676 $mate_index_to_go[$i_mate] = $i;
10677 my $seqno = $type_sequence_to_go[$i];
10678 if ( $comma_arrow_count{$seqno} ) {
10679 $comma_arrow_count_contained +=
10680 $comma_arrow_count{$seqno};
10684 push @unmatched_opening_indexes_in_this_batch,
10686 push @unmatched_closing_indexes_in_this_batch, $i;
10690 push @unmatched_closing_indexes_in_this_batch, $i;
10694 elsif ( $tokens_to_go[$i] eq '=>' ) {
10695 if (@unmatched_opening_indexes_in_this_batch) {
10696 my $j = $unmatched_opening_indexes_in_this_batch[-1];
10697 my $seqno = $type_sequence_to_go[$j];
10698 $comma_arrow_count{$seqno}++;
10702 return $comma_arrow_count_contained;
10705 sub save_opening_indentation {
10707 # This should be called after each batch of tokens is output. It
10708 # saves indentations of lines of all unmatched opening tokens.
10709 # These will be used by sub get_opening_indentation.
10711 my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
10713 # we no longer need indentations of any saved indentations which
10714 # are unmatched closing tokens in this batch, because we will
10715 # never encounter them again. So we can delete them to keep
10716 # the hash size down.
10717 foreach (@unmatched_closing_indexes_in_this_batch) {
10718 my $seqno = $type_sequence_to_go[$_];
10719 delete $saved_opening_indentation{$seqno};
10722 # we need to save indentations of any unmatched opening tokens
10723 # in this batch because we may need them in a subsequent batch.
10724 foreach (@unmatched_opening_indexes_in_this_batch) {
10725 my $seqno = $type_sequence_to_go[$_];
10726 $saved_opening_indentation{$seqno} = [
10727 lookup_opening_indentation(
10728 $_, $ri_first, $ri_last, $rindentation_list
10734 } # end unmatched_indexes
10736 sub get_opening_indentation {
10738 # get the indentation of the line which output the opening token
10739 # corresponding to a given closing token in the current output batch.
10742 # $i_closing - index in this line of a closing token ')' '}' or ']'
10744 # $ri_first - reference to list of the first index $i for each output
10745 # line in this batch
10746 # $ri_last - reference to list of the last index $i for each output line
10748 # $rindentation_list - reference to a list containing the indentation
10749 # used for each line.
10752 # -the indentation of the line which contained the opening token
10753 # which matches the token at index $i_opening
10754 # -and its offset (number of columns) from the start of the line
10756 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
10758 # first, see if the opening token is in the current batch
10759 my $i_opening = $mate_index_to_go[$i_closing];
10760 my ( $indent, $offset, $is_leading, $exists );
10762 if ( $i_opening >= 0 ) {
10764 # it is..look up the indentation
10765 ( $indent, $offset, $is_leading ) =
10766 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
10767 $rindentation_list );
10770 # if not, it should have been stored in the hash by a previous batch
10772 my $seqno = $type_sequence_to_go[$i_closing];
10774 if ( $saved_opening_indentation{$seqno} ) {
10775 ( $indent, $offset, $is_leading ) =
10776 @{ $saved_opening_indentation{$seqno} };
10779 # some kind of serious error
10780 # (example is badfile.t)
10789 # if no sequence number it must be an unbalanced container
10797 return ( $indent, $offset, $is_leading, $exists );
10800 sub lookup_opening_indentation {
10802 # get the indentation of the line in the current output batch
10803 # which output a selected opening token
10806 # $i_opening - index of an opening token in the current output batch
10807 # whose line indentation we need
10808 # $ri_first - reference to list of the first index $i for each output
10809 # line in this batch
10810 # $ri_last - reference to list of the last index $i for each output line
10812 # $rindentation_list - reference to a list containing the indentation
10813 # used for each line. (NOTE: the first slot in
10814 # this list is the last returned line number, and this is
10815 # followed by the list of indentations).
10818 # -the indentation of the line which contained token $i_opening
10819 # -and its offset (number of columns) from the start of the line
10821 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
10823 if ( !@{$ri_last} ) {
10824 warning("Error in opening_indentation: no lines");
10828 my $nline = $rindentation_list->[0]; # line number of previous lookup
10830 # reset line location if necessary
10831 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
10833 # find the correct line
10834 unless ( $i_opening > $ri_last->[-1] ) {
10835 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
10838 # error - token index is out of bounds - shouldn't happen
10841 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
10843 report_definite_bug();
10844 $nline = $#{$ri_last};
10847 $rindentation_list->[0] =
10848 $nline; # save line number to start looking next call
10849 my $ibeg = $ri_start->[$nline];
10850 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
10851 my $is_leading = ( $ibeg == $i_opening );
10852 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
10856 my %is_if_elsif_else_unless_while_until_for_foreach;
10860 # These block types may have text between the keyword and opening
10861 # curly. Note: 'else' does not, but must be included to allow trailing
10862 # if/elsif text to be appended.
10863 # patch for SWITCH/CASE: added 'case' and 'when'
10864 my @q = qw(if elsif else unless while until for foreach case when);
10865 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
10869 sub set_adjusted_indentation {
10871 # This routine has the final say regarding the actual indentation of
10872 # a line. It starts with the basic indentation which has been
10873 # defined for the leading token, and then takes into account any
10874 # options that the user has set regarding special indenting and
10878 $self, $ibeg, $iend,
10879 $rfields, $rpatterns, $ri_first,
10880 $ri_last, $rindentation_list, $level_jump
10883 my $rLL = $self->{rLL};
10885 # we need to know the last token of this line
10886 my ( $terminal_type, $i_terminal ) =
10887 $self->terminal_type_i( $ibeg, $iend );
10889 my $is_outdented_line = 0;
10891 my $is_semicolon_terminated = $terminal_type eq ';'
10892 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
10894 # NOTE: A future improvement would be to make it semicolon terminated
10895 # even if it does not have a semicolon but is followed by a closing
10896 # block brace. This would undo ci even for something like the
10897 # following, in which the final paren does not have a semicolon because
10898 # it is a possible weld location:
10900 # if ($BOLD_MATH) {
10902 # $labels, $comment,
10903 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
10908 # MOJO: Set a flag if this lines begins with ')->'
10909 my $leading_paren_arrow = (
10910 $types_to_go[$ibeg] eq '}'
10911 && $tokens_to_go[$ibeg] eq ')'
10913 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
10914 || ( $ibeg < $i_terminal - 1
10915 && $types_to_go[ $ibeg + 1 ] eq 'b'
10916 && $types_to_go[ $ibeg + 2 ] eq '->' )
10920 ##########################################################
10921 # Section 1: set a flag and a default indentation
10923 # Most lines are indented according to the initial token.
10924 # But it is common to outdent to the level just after the
10925 # terminal token in certain cases...
10926 # adjust_indentation flag:
10927 # 0 - do not adjust
10929 # 2 - vertically align with opening token
10931 ##########################################################
10932 my $adjust_indentation = 0;
10933 my $default_adjust_indentation = $adjust_indentation;
10936 $opening_indentation, $opening_offset,
10937 $is_leading, $opening_exists
10940 my $type_beg = $types_to_go[$ibeg];
10941 my $token_beg = $tokens_to_go[$ibeg];
10942 my $K_beg = $K_to_go[$ibeg];
10943 my $ibeg_weld_fix = $ibeg;
10945 # QW PATCH 2 (Testing)
10946 # At an isolated closing token of a qw quote which is welded to
10947 # a following closing token, we will locally change its type to
10948 # be the same as its token. This will allow formatting to be the
10949 # same as for an ordinary closing token.
10951 # For -lp formatting se use $ibeg_weld_fix to get around the problem
10952 # that with -lp type formatting the opening and closing tokens to not
10953 # have sequence numbers.
10954 if ( $type_beg eq 'q' && $token_beg =~ /^[\)\}\]\>]/ ) {
10955 my $K_next_nonblank = $self->K_next_code($K_beg);
10956 if ( defined($K_next_nonblank) ) {
10957 my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_];
10958 my $token = $rLL->[$K_next_nonblank]->[_TOKEN_];
10959 my $welded = weld_len_left( $type_sequence, $token );
10961 $ibeg_weld_fix = $ibeg + ( $K_next_nonblank - $K_beg );
10962 $type_beg = ')'; ##$token_beg;
10967 # if we are at a closing token of some type..
10968 if ( $type_beg =~ /^[\)\}\]R]$/ ) {
10970 # get the indentation of the line containing the corresponding
10973 $opening_indentation, $opening_offset,
10974 $is_leading, $opening_exists
10976 = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
10977 $ri_last, $rindentation_list );
10979 # First set the default behavior:
10982 # default behavior is to outdent closing lines
10983 # of the form: "); }; ]; )->xxx;"
10984 $is_semicolon_terminated
10986 # and 'cuddled parens' of the form: ")->pack("
10987 # Bug fix for RT #123749]: the types here were
10988 # incorrectly '(' and ')'. Corrected to be '{' and '}'
10990 $terminal_type eq '{'
10991 && $type_beg eq '}'
10992 && ( $nesting_depth_to_go[$iend] + 1 ==
10993 $nesting_depth_to_go[$ibeg] )
10996 # remove continuation indentation for any line like
10998 # or without ending '{' and unbalanced, such as
10999 # such as '}->{$operator}'
11003 && ( $types_to_go[$iend] eq '{'
11004 || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
11007 # and when the next line is at a lower indentation level
11008 # PATCH: and only if the style allows undoing continuation
11009 # for all closing token types. We should really wait until
11010 # the indentation of the next line is known and then make
11011 # a decision, but that would require another pass.
11012 || ( $level_jump < 0 && !$some_closing_token_indentation )
11014 # Patch for -wn=2, multiple welded closing tokens
11015 || ( $i_terminal > $ibeg
11016 && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
11020 $adjust_indentation = 1;
11023 # outdent something like '),'
11025 $terminal_type eq ','
11027 # Removed this constraint for -wn
11028 # OLD: allow just one character before the comma
11029 # && $i_terminal == $ibeg + 1
11031 # require LIST environment; otherwise, we may outdent too much -
11032 # this can happen in calls without parentheses (overload.t);
11033 && $container_environment_to_go[$i_terminal] eq 'LIST'
11036 $adjust_indentation = 1;
11039 # undo continuation indentation of a terminal closing token if
11040 # it is the last token before a level decrease. This will allow
11041 # a closing token to line up with its opening counterpart, and
11042 # avoids an indentation jump larger than 1 level.
11043 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11044 && $i_terminal == $ibeg
11045 && defined($K_beg) )
11047 my $K_next_nonblank = $self->K_next_code($K_beg);
11049 # Patch for RT#131115: honor -bli flag at closing brace
11051 $rOpts_brace_left_and_indent
11052 && $block_type_to_go[$i_terminal]
11053 && $block_type_to_go[$i_terminal] =~ /$bli_pattern/o;
11055 if ( !$is_bli && defined($K_next_nonblank) ) {
11056 my $lev = $rLL->[$K_beg]->[_LEVEL_];
11057 my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
11058 $adjust_indentation = 1 if ( $level_next < $lev );
11061 # Patch for RT #96101, in which closing brace of anonymous subs
11062 # was not outdented. We should look ahead and see if there is
11063 # a level decrease at the next token (i.e., a closing token),
11064 # but right now we do not have that information. For now
11065 # we see if we are in a list, and this works well.
11066 # See test files 'sub*.t' for good test cases.
11067 if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
11068 && $container_environment_to_go[$i_terminal] eq 'LIST'
11069 && !$rOpts->{'indent-closing-brace'} )
11072 $opening_indentation, $opening_offset,
11073 $is_leading, $opening_exists
11075 = $self->get_opening_indentation( $ibeg, $ri_first,
11076 $ri_last, $rindentation_list );
11077 my $indentation = $leading_spaces_to_go[$ibeg];
11078 if ( defined($opening_indentation)
11079 && get_spaces($indentation) >
11080 get_spaces($opening_indentation) )
11082 $adjust_indentation = 1;
11087 # YVES patch 1 of 2:
11088 # Undo ci of line with leading closing eval brace,
11089 # but not beyond the indention of the line with
11090 # the opening brace.
11091 if ( $block_type_to_go[$ibeg] eq 'eval'
11092 && !$rOpts->{'line-up-parentheses'}
11093 && !$rOpts->{'indent-closing-brace'} )
11096 $opening_indentation, $opening_offset,
11097 $is_leading, $opening_exists
11099 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
11100 $rindentation_list );
11101 my $indentation = $leading_spaces_to_go[$ibeg];
11102 if ( defined($opening_indentation)
11103 && get_spaces($indentation) >
11104 get_spaces($opening_indentation) )
11106 $adjust_indentation = 1;
11110 $default_adjust_indentation = $adjust_indentation;
11112 # Now modify default behavior according to user request:
11113 # handle option to indent non-blocks of the form ); }; ];
11114 # But don't do special indentation to something like ')->pack('
11115 if ( !$block_type_to_go[$ibeg] ) {
11116 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11118 if ( $i_terminal <= $ibeg + 1
11119 || $is_semicolon_terminated )
11121 $adjust_indentation = 2;
11124 $adjust_indentation = 0;
11127 elsif ( $cti == 2 ) {
11128 if ($is_semicolon_terminated) {
11129 $adjust_indentation = 3;
11132 $adjust_indentation = 0;
11135 elsif ( $cti == 3 ) {
11136 $adjust_indentation = 3;
11140 # handle option to indent blocks
11143 $rOpts->{'indent-closing-brace'}
11145 $i_terminal == $ibeg # isolated terminal '}'
11146 || $is_semicolon_terminated
11150 $adjust_indentation = 3;
11155 # if at ');', '};', '>;', and '];' of a terminal qw quote
11156 elsif ($rpatterns->[0] =~ /^qb*;$/
11157 && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
11159 if ( $closing_token_indentation{$1} == 0 ) {
11160 $adjust_indentation = 1;
11163 $adjust_indentation = 3;
11167 # if line begins with a ':', align it with any
11168 # previous line leading with corresponding ?
11169 elsif ( $types_to_go[$ibeg] eq ':' ) {
11171 $opening_indentation, $opening_offset,
11172 $is_leading, $opening_exists
11174 = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
11175 $rindentation_list );
11176 if ($is_leading) { $adjust_indentation = 2; }
11179 ##########################################################
11180 # Section 2: set indentation according to flag set above
11182 # Select the indentation object to define leading
11183 # whitespace. If we are outdenting something like '} } );'
11184 # then we want to use one level below the last token
11185 # ($i_terminal) in order to get it to fully outdent through
11187 ##########################################################
11190 my $level_end = $levels_to_go[$iend];
11192 if ( $adjust_indentation == 0 ) {
11193 $indentation = $leading_spaces_to_go[$ibeg];
11194 $lev = $levels_to_go[$ibeg];
11196 elsif ( $adjust_indentation == 1 ) {
11198 # Change the indentation to be that of a different token on the line
11199 # Previously, the indentation of the terminal token was used:
11201 # $indentation = $reduced_spaces_to_go[$i_terminal];
11202 # $lev = $levels_to_go[$i_terminal];
11204 # Generalization for MOJO:
11205 # Use the lowest level indentation of the tokens on the line.
11206 # For example, here we can use the indentation of the ending ';':
11207 # } until ($selection > 0 and $selection < 10); # ok to use ';'
11208 # But this will not outdent if we use the terminal indentation:
11209 # )->then( sub { # use indentation of the ->, not the {
11210 # Warning: reduced_spaces_to_go[] may be a reference, do not
11211 # do numerical checks with it
11214 $indentation = $reduced_spaces_to_go[$i_ind];
11215 $lev = $levels_to_go[$i_ind];
11216 while ( $i_ind < $i_terminal ) {
11218 if ( $levels_to_go[$i_ind] < $lev ) {
11219 $indentation = $reduced_spaces_to_go[$i_ind];
11220 $lev = $levels_to_go[$i_ind];
11225 # handle indented closing token which aligns with opening token
11226 elsif ( $adjust_indentation == 2 ) {
11228 # handle option to align closing token with opening token
11229 $lev = $levels_to_go[$ibeg];
11231 # calculate spaces needed to align with opening token
11233 get_spaces($opening_indentation) + $opening_offset;
11235 # Indent less than the previous line.
11237 # Problem: For -lp we don't exactly know what it was if there
11238 # were recoverable spaces sent to the aligner. A good solution
11239 # would be to force a flush of the vertical alignment buffer, so
11240 # that we would know. For now, this rule is used for -lp:
11242 # When the last line did not start with a closing token we will
11243 # be optimistic that the aligner will recover everything wanted.
11245 # This rule will prevent us from breaking a hierarchy of closing
11246 # tokens, and in a worst case will leave a closing paren too far
11247 # indented, but this is better than frequently leaving it not
11249 my $last_spaces = get_spaces($last_indentation_written);
11250 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
11252 get_recoverable_spaces($last_indentation_written);
11255 # reset the indentation to the new space count if it works
11256 # only options are all or none: nothing in-between looks good
11257 $lev = $levels_to_go[$ibeg];
11258 if ( $space_count < $last_spaces ) {
11259 if ($rOpts_line_up_parentheses) {
11260 my $lev = $levels_to_go[$ibeg];
11262 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11265 $indentation = $space_count;
11269 # revert to default if it doesn't work
11271 $space_count = leading_spaces_to_go($ibeg);
11272 if ( $default_adjust_indentation == 0 ) {
11273 $indentation = $leading_spaces_to_go[$ibeg];
11275 elsif ( $default_adjust_indentation == 1 ) {
11276 $indentation = $reduced_spaces_to_go[$i_terminal];
11277 $lev = $levels_to_go[$i_terminal];
11282 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
11285 # handle -icb (indented closing code block braces)
11286 # Updated method for indented block braces: indent one full level if
11287 # there is no continuation indentation. This will occur for major
11288 # structures such as sub, if, else, but not for things like map
11291 # Note: only code blocks without continuation indentation are
11292 # handled here (if, else, unless, ..). In the following snippet,
11293 # the terminal brace of the sort block will have continuation
11294 # indentation as shown so it will not be handled by the coding
11295 # here. We would have to undo the continuation indentation to do
11296 # this, but it probably looks ok as is. This is a possible future
11297 # update for semicolon terminated lines.
11299 # if ($sortby eq 'date' or $sortby eq 'size') {
11301 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
11306 if ( $block_type_to_go[$ibeg]
11307 && $ci_levels_to_go[$i_terminal] == 0 )
11309 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
11310 $indentation = $spaces + $rOpts_indent_columns;
11312 # NOTE: for -lp we could create a new indentation object, but
11313 # there is probably no need to do it
11316 # handle -icp and any -icb block braces which fall through above
11317 # test such as the 'sort' block mentioned above.
11320 # There are currently two ways to handle -icp...
11321 # One way is to use the indentation of the previous line:
11322 # $indentation = $last_indentation_written;
11324 # The other way is to use the indentation that the previous line
11325 # would have had if it hadn't been adjusted:
11326 $indentation = $last_unadjusted_indentation;
11328 # Current method: use the minimum of the two. This avoids
11329 # inconsistent indentation.
11330 if ( get_spaces($last_indentation_written) <
11331 get_spaces($indentation) )
11333 $indentation = $last_indentation_written;
11337 # use previous indentation but use own level
11338 # to cause list to be flushed properly
11339 $lev = $levels_to_go[$ibeg];
11342 # remember indentation except for multi-line quotes, which get
11344 unless ( $ibeg == 0 && $starting_in_quote ) {
11345 $last_indentation_written = $indentation;
11346 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
11347 $last_leading_token = $tokens_to_go[$ibeg];
11350 # be sure lines with leading closing tokens are not outdented more
11351 # than the line which contained the corresponding opening token.
11353 #############################################################
11354 # updated per bug report in alex_bug.pl: we must not
11355 # mess with the indentation of closing logical braces so
11356 # we must treat something like '} else {' as if it were
11357 # an isolated brace
11358 #############################################################
11359 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
11360 && ( $i_terminal == $ibeg
11361 || $is_if_elsif_else_unless_while_until_for_foreach{
11362 $block_type_to_go[$ibeg]
11365 # only do this for a ':; which is aligned with its leading '?'
11366 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
11369 defined($opening_indentation)
11370 && !$leading_paren_arrow # MOJO
11371 && !$is_isolated_block_brace
11372 && !$is_unaligned_colon
11375 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
11376 $indentation = $opening_indentation;
11380 # remember the indentation of each line of this batch
11381 push @{$rindentation_list}, $indentation;
11383 # outdent lines with certain leading tokens...
11386 # must be first word of this batch
11392 # certain leading keywords if requested
11394 $rOpts->{'outdent-keywords'}
11395 && $types_to_go[$ibeg] eq 'k'
11396 && $outdent_keyword{ $tokens_to_go[$ibeg] }
11399 # or labels if requested
11400 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
11402 # or static block comments if requested
11403 || ( $types_to_go[$ibeg] eq '#'
11404 && $rOpts->{'outdent-static-block-comments'}
11405 && $is_static_block_comment )
11410 my $space_count = leading_spaces_to_go($ibeg);
11411 if ( $space_count > 0 ) {
11412 $space_count -= $rOpts_continuation_indentation;
11413 $is_outdented_line = 1;
11414 if ( $space_count < 0 ) { $space_count = 0 }
11416 # do not promote a spaced static block comment to non-spaced;
11417 # this is not normally necessary but could be for some
11418 # unusual user inputs (such as -ci = -i)
11419 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
11423 if ($rOpts_line_up_parentheses) {
11425 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11428 $indentation = $space_count;
11433 return ( $indentation, $lev, $level_end, $terminal_type,
11434 $is_semicolon_terminated, $is_outdented_line );
11438 sub mate_index_to_go {
11439 my ( $self, $i ) = @_;
11441 # Return the matching index of a container or ternary pair
11442 # This is equivalent to the array @mate_index_to_go
11443 my $K = $K_to_go[$i];
11444 my $K_mate = $self->K_mate_index($K);
11446 if ( defined($K_mate) ) {
11447 $i_mate = $i + ( $K_mate - $K );
11448 if ( $i_mate < 0 || $i_mate > $max_index_to_go ) {
11452 my $i_mate_alt = $mate_index_to_go[$i];
11454 # Debug code to eventually be removed
11455 if ( 0 && $i_mate_alt != $i_mate ) {
11456 my $tok = $tokens_to_go[$i];
11457 my $type = $types_to_go[$i];
11458 my $tok_mate = '*';
11459 my $type_mate = '*';
11460 if ( $i_mate >= 0 && $i_mate <= $max_index_to_go ) {
11461 $tok_mate = $tokens_to_go[$i_mate];
11462 $type_mate = $types_to_go[$i_mate];
11464 my $seq = $type_sequence_to_go[$i];
11465 my $file = $logger_object->get_input_stream_name();
11468 "mate_index: file '$file': i=$i, imate=$i_mate, should be $i_mate_alt, K=$K, K_mate=$K_mate\ntype=$type, tok=$tok, seq=$seq, max=$max_index_to_go, tok_mate=$tok_mate, type_mate=$type_mate"
11476 # Given the index K of an opening or closing container, or ?/: ternary pair,
11477 # return the index K of the other member of the pair.
11478 my ( $self, $K ) = @_;
11479 return unless defined($K);
11480 my $rLL = $self->{rLL};
11481 my $seqno = $rLL->[$K]->[_TYPE_SEQUENCE_];
11482 return unless ($seqno);
11484 my $K_opening = $self->{K_opening_container}->{$seqno};
11485 if ( defined($K_opening) ) {
11486 if ( $K != $K_opening ) { return $K_opening }
11487 return $self->{K_closing_container}->{$seqno};
11490 $K_opening = $self->{K_opening_ternary}->{$seqno};
11491 if ( defined($K_opening) ) {
11492 if ( $K != $K_opening ) { return $K_opening }
11493 return $self->{K_closing_ternary}->{$seqno};
11498 sub set_vertical_tightness_flags {
11500 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
11502 # Define vertical tightness controls for the nth line of a batch.
11503 # We create an array of parameters which tell the vertical aligner
11504 # if we should combine this line with the next line to achieve the
11505 # desired vertical tightness. The array of parameters contains:
11507 # [0] type: 1=opening non-block 2=closing non-block
11508 # 3=opening block brace 4=closing block brace
11510 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
11511 # if closing: spaces of padding to use
11512 # [2] sequence number of container
11513 # [3] valid flag: do not append if this flag is false. Will be
11514 # true if appropriate -vt flag is set. Otherwise, Will be
11515 # made true only for 2 line container in parens with -lp
11517 # These flags are used by sub set_leading_whitespace in
11518 # the vertical aligner
11520 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
11522 #--------------------------------------------------------------
11523 # Vertical Tightness Flags Section 1:
11524 # Handle Lines 1 .. n-1 but not the last line
11525 # For non-BLOCK tokens, we will need to examine the next line
11526 # too, so we won't consider the last line.
11527 #--------------------------------------------------------------
11528 if ( $n < $n_last_line ) {
11530 #--------------------------------------------------------------
11531 # Vertical Tightness Flags Section 1a:
11532 # Look for Type 1, last token of this line is a non-block opening token
11533 #--------------------------------------------------------------
11534 my $ibeg_next = $ri_first->[ $n + 1 ];
11535 my $token_end = $tokens_to_go[$iend];
11536 my $iend_next = $ri_last->[ $n + 1 ];
11538 $type_sequence_to_go[$iend]
11539 && !$block_type_to_go[$iend]
11540 && $is_opening_token{$token_end}
11542 $opening_vertical_tightness{$token_end} > 0
11544 # allow 2-line method call to be closed up
11545 || ( $rOpts_line_up_parentheses
11546 && $token_end eq '('
11548 && $types_to_go[ $iend - 1 ] ne 'b' )
11553 # avoid multiple jumps in nesting depth in one line if
11555 my $ovt = $opening_vertical_tightness{$token_end};
11556 my $iend_next = $ri_last->[ $n + 1 ];
11559 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
11560 $nesting_depth_to_go[$ibeg_next] )
11564 # If -vt flag has not been set, mark this as invalid
11565 # and aligner will validate it if it sees the closing paren
11567 my $valid_flag = $ovt;
11568 @{$rvertical_tightness_flags} =
11569 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
11573 #--------------------------------------------------------------
11574 # Vertical Tightness Flags Section 1b:
11575 # Look for Type 2, first token of next line is a non-block closing
11576 # token .. and be sure this line does not have a side comment
11577 #--------------------------------------------------------------
11578 my $token_next = $tokens_to_go[$ibeg_next];
11579 if ( $type_sequence_to_go[$ibeg_next]
11580 && !$block_type_to_go[$ibeg_next]
11581 && $is_closing_token{$token_next}
11582 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
11584 my $ovt = $opening_vertical_tightness{$token_next};
11585 my $cvt = $closing_vertical_tightness{$token_next};
11588 # never append a trailing line like )->pack(
11589 # because it will throw off later alignment
11591 $nesting_depth_to_go[$ibeg_next] ==
11592 $nesting_depth_to_go[ $iend_next + 1 ] + 1
11597 $container_environment_to_go[$ibeg_next] ne 'LIST'
11601 # allow closing up 2-line method calls
11602 || ( $rOpts_line_up_parentheses
11603 && $token_next eq ')' )
11610 # decide which trailing closing tokens to append..
11612 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11614 my $str = join( '',
11615 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11617 # append closing token if followed by comment or ';'
11618 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11622 my $valid_flag = $cvt;
11623 @{$rvertical_tightness_flags} = (
11625 $tightness{$token_next} == 2 ? 0 : 1,
11626 $type_sequence_to_go[$ibeg_next], $valid_flag,
11632 #--------------------------------------------------------------
11633 # Vertical Tightness Flags Section 1c:
11634 # Implement the Opening Token Right flag (Type 2)..
11635 # If requested, move an isolated trailing opening token to the end of
11636 # the previous line which ended in a comma. We could do this
11637 # in sub recombine_breakpoints but that would cause problems
11638 # with -lp formatting. The problem is that indentation will
11639 # quickly move far to the right in nested expressions. By
11640 # doing it after indentation has been set, we avoid changes
11641 # to the indentation. Actual movement of the token takes place
11642 # in sub valign_output_step_B.
11643 #--------------------------------------------------------------
11645 $opening_token_right{ $tokens_to_go[$ibeg_next] }
11647 # previous line is not opening
11648 # (use -sot to combine with it)
11649 && !$is_opening_token{$token_end}
11651 # previous line ended in one of these
11652 # (add other cases if necessary; '=>' and '.' are not necessary
11653 && !$block_type_to_go[$ibeg_next]
11655 # this is a line with just an opening token
11656 && ( $iend_next == $ibeg_next
11657 || $iend_next == $ibeg_next + 2
11658 && $types_to_go[$iend_next] eq '#' )
11660 # looks bad if we align vertically with the wrong container
11661 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
11664 my $valid_flag = 1;
11665 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11666 @{$rvertical_tightness_flags} =
11667 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
11670 #--------------------------------------------------------------
11671 # Vertical Tightness Flags Section 1d:
11672 # Stacking of opening and closing tokens (Type 2)
11673 #--------------------------------------------------------------
11675 my $token_beg_next = $tokens_to_go[$ibeg_next];
11677 # patch to make something like 'qw(' behave like an opening paren
11679 if ( $types_to_go[$ibeg_next] eq 'q' ) {
11680 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
11681 $token_beg_next = $1;
11685 if ( $is_closing_token{$token_end}
11686 && $is_closing_token{$token_beg_next} )
11688 $stackable = $stack_closing_token{$token_beg_next}
11689 unless ( $block_type_to_go[$ibeg_next] )
11690 ; # shouldn't happen; just checking
11692 elsif ($is_opening_token{$token_end}
11693 && $is_opening_token{$token_beg_next} )
11695 $stackable = $stack_opening_token{$token_beg_next}
11696 unless ( $block_type_to_go[$ibeg_next] )
11697 ; # shouldn't happen; just checking
11702 my $is_semicolon_terminated;
11703 if ( $n + 1 == $n_last_line ) {
11704 my ( $terminal_type, $i_terminal ) =
11705 $self->terminal_type_i( $ibeg_next, $iend_next );
11706 $is_semicolon_terminated = $terminal_type eq ';'
11707 && $nesting_depth_to_go[$iend_next] <
11708 $nesting_depth_to_go[$ibeg_next];
11711 # this must be a line with just an opening token
11712 # or end in a semicolon
11714 $is_semicolon_terminated
11715 || ( $iend_next == $ibeg_next
11716 || $iend_next == $ibeg_next + 2
11717 && $types_to_go[$iend_next] eq '#' )
11720 my $valid_flag = 1;
11721 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11722 @{$rvertical_tightness_flags} =
11723 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
11729 #--------------------------------------------------------------
11730 # Vertical Tightness Flags Section 2:
11731 # Handle type 3, opening block braces on last line of the batch
11732 # Check for a last line with isolated opening BLOCK curly
11733 #--------------------------------------------------------------
11734 elsif ($rOpts_block_brace_vertical_tightness
11736 && $types_to_go[$iend] eq '{'
11737 && $block_type_to_go[$iend] =~
11738 /$block_brace_vertical_tightness_pattern/o )
11740 @{$rvertical_tightness_flags} =
11741 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11744 #--------------------------------------------------------------
11745 # Vertical Tightness Flags Section 3:
11746 # Handle type 4, a closing block brace on the last line of the batch Check
11747 # for a last line with isolated closing BLOCK curly
11748 #--------------------------------------------------------------
11749 elsif ($rOpts_stack_closing_block_brace
11751 && $block_type_to_go[$iend]
11752 && $types_to_go[$iend] eq '}' )
11754 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
11755 @{$rvertical_tightness_flags} =
11756 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
11759 # pack in the sequence numbers of the ends of this line
11760 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
11761 $rvertical_tightness_flags->[5] = get_seqno($iend);
11762 return $rvertical_tightness_flags;
11767 # get opening and closing sequence numbers of a token for the vertical
11768 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
11769 # to be treated somewhat like opening and closing tokens for stacking
11770 # tokens by the vertical aligner.
11772 my $seqno = $type_sequence_to_go[$ii];
11773 if ( $types_to_go[$ii] eq 'q' ) {
11776 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
11779 if ( !$ending_in_quote ) {
11780 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
11788 my %is_vertical_alignment_type;
11789 my %is_not_vertical_alignment_token;
11790 my %is_vertical_alignment_keyword;
11791 my %is_terminal_alignment_type;
11792 my %is_low_level_alignment_token;
11798 # Replaced =~ and // in the list. // had been removed in RT 119588
11800 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
11801 { ? : => && || ~~ !~~ =~ !~ //
11803 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
11805 # These 'tokens' are not aligned. We need this to remove [
11806 # from the above list because it has type ='{'
11808 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
11810 # these are the only types aligned at a line end
11812 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
11814 # these tokens only align at line level
11816 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
11818 # eq and ne were removed from this list to improve alignment chances
11819 @q = qw(if unless and or err for foreach while until);
11820 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
11823 sub set_vertical_alignment_markers {
11825 # This routine takes the first step toward vertical alignment of the
11826 # lines of output text. It looks for certain tokens which can serve as
11827 # vertical alignment markers (such as an '=').
11829 # Method: We look at each token $i in this output batch and set
11830 # $ralignment_type_to_go->[$i] equal to those tokens at which we would
11831 # accept vertical alignment.
11833 my ( $self, $ri_first, $ri_last ) = @_;
11835 my $ralignment_type_to_go;
11836 for my $i ( 0 .. $max_index_to_go ) {
11837 $ralignment_type_to_go->[$i] = '';
11840 # nothing to do if we aren't allowed to change whitespace
11841 if ( !$rOpts_add_whitespace ) {
11842 return $ralignment_type_to_go;
11845 # remember the index of last nonblank token before any sidecomment
11846 my $i_terminal = $max_index_to_go;
11847 if ( $types_to_go[$i_terminal] eq '#' ) {
11848 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
11849 if ( $i_terminal > 0 ) { --$i_terminal }
11853 # look at each line of this batch..
11854 my $last_vertical_alignment_before_index;
11855 my $vert_last_nonblank_type;
11856 my $vert_last_nonblank_token;
11857 my $vert_last_nonblank_block_type;
11858 my $max_line = @{$ri_first} - 1;
11860 foreach my $line ( 0 .. $max_line ) {
11861 my $ibeg = $ri_first->[$line];
11862 my $iend = $ri_last->[$line];
11863 $last_vertical_alignment_before_index = -1;
11864 $vert_last_nonblank_type = '';
11865 $vert_last_nonblank_token = '';
11866 $vert_last_nonblank_block_type = '';
11868 # look at each token in this output line..
11869 my $level_beg = $levels_to_go[$ibeg];
11870 foreach my $i ( $ibeg .. $iend ) {
11871 my $alignment_type = '';
11872 my $type = $types_to_go[$i];
11873 my $block_type = $block_type_to_go[$i];
11874 my $token = $tokens_to_go[$i];
11876 # do not align tokens at lower level then start of line
11877 # except for side comments
11878 if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
11879 && $types_to_go[$i] ne '#' )
11881 $ralignment_type_to_go->[$i] = '';
11885 #--------------------------------------------------------
11886 # First see if we want to align BEFORE this token
11887 #--------------------------------------------------------
11889 # The first possible token that we can align before
11890 # is index 2 because: 1) it doesn't normally make sense to
11891 # align before the first token and 2) the second
11892 # token must be a blank if we are to align before
11894 if ( $i < $ibeg + 2 ) { }
11896 # must follow a blank token
11897 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
11899 # align a side comment --
11900 elsif ( $type eq '#' ) {
11904 # it is a static side comment
11906 $rOpts->{'static-side-comments'}
11907 && $token =~ /$static_side_comment_pattern/o
11910 # or a closing side comment
11911 || ( $vert_last_nonblank_block_type
11913 /$closing_side_comment_prefix_pattern/o )
11916 $alignment_type = $type;
11917 } ## Example of a static side comment
11920 # otherwise, do not align two in a row to create a
11922 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
11924 # align before one of these keywords
11925 # (within a line, since $i>1)
11926 elsif ( $type eq 'k' ) {
11928 # /^(if|unless|and|or|eq|ne)$/
11929 if ( $is_vertical_alignment_keyword{$token} ) {
11930 $alignment_type = $token;
11934 # align before one of these types..
11935 # Note: add '.' after new vertical aligner is operational
11936 elsif ( $is_vertical_alignment_type{$type}
11937 && !$is_not_vertical_alignment_token{$token} )
11939 $alignment_type = $token;
11941 # Do not align a terminal token. Although it might
11942 # occasionally look ok to do this, this has been found to be
11943 # a good general rule. The main problems are:
11944 # (1) that the terminal token (such as an = or :) might get
11945 # moved far to the right where it is hard to see because
11946 # nothing follows it, and
11947 # (2) doing so may prevent other good alignments.
11948 # Current exceptions are && and ||
11949 if ( $i == $iend || $i >= $i_terminal ) {
11950 $alignment_type = ""
11951 unless ( $is_terminal_alignment_type{$type} );
11954 # Do not align leading ': (' or '. ('. This would prevent
11955 # alignment in something like the following:
11957 # ( $input_line_number < 10 ) ? " "
11958 # : ( $input_line_number < 100 ) ? " "
11962 # ( $case_matters ? $accessor : " lc($accessor) " )
11963 # . ( $yesno ? " eq " : " ne " )
11965 # Also, do not align a ( following a leading ? so we can
11966 # align something like this:
11967 # $converter{$_}->{ushortok} =
11968 # $PDL::IO::Pic::biggrays
11969 # ? ( m/GIF/ ? 0 : 1 )
11970 # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
11971 if ( $i == $ibeg + 2
11972 && $types_to_go[$ibeg] =~ /^[\.\:\?]$/
11973 && $types_to_go[ $i - 1 ] eq 'b' )
11975 $alignment_type = "";
11978 # Certain tokens only align at the same level as the
11979 # initial line level
11980 if ( $is_low_level_alignment_token{$token}
11981 && $levels_to_go[$i] != $level_beg )
11983 $alignment_type = "";
11986 # For a paren after keyword, only align something like this:
11988 # elsif ( $b ) { &b }
11989 if ( $token eq '(' ) {
11991 if ( $vert_last_nonblank_type eq 'k' ) {
11992 $alignment_type = ""
11993 unless $vert_last_nonblank_token =~
11994 /^(if|unless|elsif)$/;
11998 # be sure the alignment tokens are unique
11999 # This didn't work well: reason not determined
12000 # if ($token ne $type) {$alignment_type .= $type}
12003 # NOTE: This is deactivated because it causes the previous
12004 # if/elsif alignment to fail
12005 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
12006 #{ $alignment_type = $type; }
12008 if ($alignment_type) {
12009 $last_vertical_alignment_before_index = $i;
12012 #--------------------------------------------------------
12013 # Next see if we want to align AFTER the previous nonblank
12014 #--------------------------------------------------------
12016 # We want to line up ',' and interior ';' tokens, with the added
12017 # space AFTER these tokens. (Note: interior ';' is included
12018 # because it may occur in short blocks).
12021 # we haven't already set it
12024 # and its not the first token of the line
12027 # and it follows a blank
12028 && $types_to_go[ $i - 1 ] eq 'b'
12030 # and previous token IS one of these:
12031 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12033 # and it's NOT one of these
12034 && ( $type !~ /^[b\#\)\]\}]$/ )
12036 # then go ahead and align
12040 $alignment_type = $vert_last_nonblank_type;
12043 #--------------------------------------------------------
12044 # then store the value
12045 #--------------------------------------------------------
12046 $ralignment_type_to_go->[$i] = $alignment_type;
12047 if ( $type ne 'b' ) {
12048 $vert_last_nonblank_type = $type;
12049 $vert_last_nonblank_token = $token;
12050 $vert_last_nonblank_block_type = $block_type;
12054 return $ralignment_type_to_go;
12058 sub terminal_type_i {
12060 # returns type of last token on this line (terminal token), as follows:
12061 # returns # for a full-line comment
12062 # returns ' ' for a blank line
12063 # otherwise returns final token type
12065 my ( $self, $ibeg, $iend ) = @_;
12067 # Start at the end and work backwards
12069 my $type_i = $types_to_go[$i];
12071 # Check for side comment
12072 if ( $type_i eq '#' ) {
12074 if ( $i < $ibeg ) {
12075 return wantarray ? ( $type_i, $ibeg ) : $type_i;
12077 $type_i = $types_to_go[$i];
12080 # Skip past a blank
12081 if ( $type_i eq 'b' ) {
12083 if ( $i < $ibeg ) {
12084 return wantarray ? ( $type_i, $ibeg ) : $type_i;
12086 $type_i = $types_to_go[$i];
12089 # Found it..make sure it is a BLOCK termination,
12090 # but hide a terminal } after sort/grep/map because it is not
12091 # necessarily the end of the line. (terminal.t)
12092 my $block_type = $block_type_to_go[$i];
12096 || ( $is_sort_map_grep_eval_do{$block_type} ) )
12101 return wantarray ? ( $type_i, $i ) : $type_i;
12104 sub terminal_type_K {
12106 # returns type of last token on this line (terminal token), as follows:
12107 # returns # for a full-line comment
12108 # returns ' ' for a blank line
12109 # otherwise returns final token type
12111 my ( $self, $Kbeg, $Kend ) = @_;
12112 my $rLL = $self->{rLL};
12114 if ( !defined($Kend) ) {
12115 Fault("Error in terminal_type_K: Kbeg=$Kbeg > $Kend=Kend");
12118 # Start at the end and work backwards
12120 my $type_K = $rLL->[$K]->[_TYPE_];
12122 # Check for side comment
12123 if ( $type_K eq '#' ) {
12125 if ( $K < $Kbeg ) {
12126 return wantarray ? ( $type_K, $Kbeg ) : $type_K;
12128 $type_K = $rLL->[$K]->[_TYPE_];
12131 # Skip past a blank
12132 if ( $type_K eq 'b' ) {
12134 if ( $K < $Kbeg ) {
12135 return wantarray ? ( $type_K, $Kbeg ) : $type_K;
12137 $type_K = $rLL->[$K]->[_TYPE_];
12140 # found it..make sure it is a BLOCK termination,
12141 # but hide a terminal } after sort/grep/map because it is not
12142 # necessarily the end of the line. (terminal.t)
12143 my $block_type = $rLL->[$K]->[_BLOCK_TYPE_];
12147 || ( $is_sort_map_grep_eval_do{$block_type} ) )
12152 return wantarray ? ( $type_K, $K ) : $type_K;
12156 { # set_bond_strengths
12158 my %is_good_keyword_breakpoint;
12159 my %is_lt_gt_le_ge;
12161 my %binary_bond_strength;
12168 sub bias_table_key {
12169 my ( $type, $token ) = @_;
12170 my $bias_table_key = $type;
12171 if ( $type eq 'k' ) {
12172 $bias_table_key = $token;
12173 if ( $token eq 'err' ) { $bias_table_key = 'or' }
12175 return $bias_table_key;
12178 sub initialize_bond_strength_hashes {
12181 @q = qw(if unless while until for foreach);
12182 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
12184 @q = qw(lt gt le ge);
12185 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
12187 # The decision about where to break a line depends upon a "bond
12188 # strength" between tokens. The LOWER the bond strength, the MORE
12189 # likely a break. A bond strength may be any value but to simplify
12190 # things there are several pre-defined strength levels:
12192 # NO_BREAK => 10000;
12193 # VERY_STRONG => 100;
12197 # VERY_WEAK => 0.55;
12199 # The strength values are based on trial-and-error, and need to be
12200 # tweaked occasionally to get desired results. Some comments:
12202 # 1. Only relative strengths are important. small differences
12203 # in strengths can make big formatting differences.
12204 # 2. Each indentation level adds one unit of bond strength.
12205 # 3. A value of NO_BREAK makes an unbreakable bond
12206 # 4. A value of VERY_WEAK is the strength of a ','
12207 # 5. Values below NOMINAL are considered ok break points.
12208 # 6. Values above NOMINAL are considered poor break points.
12210 # The bond strengths should roughly follow precedence order where
12211 # possible. If you make changes, please check the results very
12212 # carefully on a variety of scripts. Testing with the -extrude
12213 # options is particularly helpful in exercising all of the rules.
12215 # Wherever possible, bond strengths are defined in the following
12216 # tables. There are two main stages to setting bond strengths and
12217 # two types of tables:
12219 # The first stage involves looking at each token individually and
12220 # defining left and right bond strengths, according to if we want
12221 # to break to the left or right side, and how good a break point it
12222 # is. For example tokens like =, ||, && make good break points and
12223 # will have low strengths, but one might want to break on either
12224 # side to put them at the end of one line or beginning of the next.
12226 # The second stage involves looking at certain pairs of tokens and
12227 # defining a bond strength for that particular pair. This second
12228 # stage has priority.
12230 #---------------------------------------------------------------
12231 # Bond Strength BEGIN Section 1.
12232 # Set left and right bond strengths of individual tokens.
12233 #---------------------------------------------------------------
12235 # NOTE: NO_BREAK's set in this section first are HINTS which will
12236 # probably not be honored. Essential NO_BREAKS's should be set in
12237 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
12238 # of this subroutine.
12240 # Note that we are setting defaults in this section. The user
12241 # cannot change bond strengths but can cause the left and right
12242 # bond strengths of any token type to be swapped through the use of
12243 # the -wba and -wbb flags. In this way the user can determine if a
12244 # breakpoint token should appear at the end of one line or the
12245 # beginning of the next line.
12247 # The hash keys in this section are token types, plus the text of
12248 # certain keywords like 'or', 'and'.
12250 # no break around possible filehandle
12251 $left_bond_strength{'Z'} = NO_BREAK;
12252 $right_bond_strength{'Z'} = NO_BREAK;
12254 # never put a bare word on a new line:
12255 # example print (STDERR, "bla"); will fail with break after (
12256 $left_bond_strength{'w'} = NO_BREAK;
12258 # blanks always have infinite strength to force breaks after
12260 $right_bond_strength{'b'} = NO_BREAK;
12262 # try not to break on exponentation
12263 @q = qw# ** .. ... <=> #;
12264 @left_bond_strength{@q} = (STRONG) x scalar(@q);
12265 @right_bond_strength{@q} = (STRONG) x scalar(@q);
12267 # The comma-arrow has very low precedence but not a good break point
12268 $left_bond_strength{'=>'} = NO_BREAK;
12269 $right_bond_strength{'=>'} = NOMINAL;
12271 # ok to break after label
12272 $left_bond_strength{'J'} = NO_BREAK;
12273 $right_bond_strength{'J'} = NOMINAL;
12274 $left_bond_strength{'j'} = STRONG;
12275 $right_bond_strength{'j'} = STRONG;
12276 $left_bond_strength{'A'} = STRONG;
12277 $right_bond_strength{'A'} = STRONG;
12279 $left_bond_strength{'->'} = STRONG;
12280 $right_bond_strength{'->'} = VERY_STRONG;
12282 $left_bond_strength{'CORE::'} = NOMINAL;
12283 $right_bond_strength{'CORE::'} = NO_BREAK;
12285 # breaking AFTER modulus operator is ok:
12287 @left_bond_strength{@q} = (STRONG) x scalar(@q);
12288 @right_bond_strength{@q} =
12289 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
12291 # Break AFTER math operators * and /
12293 @left_bond_strength{@q} = (STRONG) x scalar(@q);
12294 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
12296 # Break AFTER weakest math operators + and -
12297 # Make them weaker than * but a bit stronger than '.'
12299 @left_bond_strength{@q} = (STRONG) x scalar(@q);
12300 @right_bond_strength{@q} =
12301 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
12303 # breaking BEFORE these is just ok:
12305 @right_bond_strength{@q} = (STRONG) x scalar(@q);
12306 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
12308 # breaking before the string concatenation operator seems best
12309 # because it can be hard to see at the end of a line
12310 $right_bond_strength{'.'} = STRONG;
12311 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
12313 @q = qw< } ] ) R >;
12314 @left_bond_strength{@q} = (STRONG) x scalar(@q);
12315 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
12317 # make these a little weaker than nominal so that they get
12318 # favored for end-of-line characters
12319 @q = qw< != == =~ !~ ~~ !~~ >;
12320 @left_bond_strength{@q} = (STRONG) x scalar(@q);
12321 @right_bond_strength{@q} =
12322 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
12324 # break AFTER these
12325 @q = qw# < > | & >= <= #;
12326 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
12327 @right_bond_strength{@q} =
12328 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
12330 # breaking either before or after a quote is ok
12331 # but bias for breaking before a quote
12332 $left_bond_strength{'Q'} = NOMINAL;
12333 $right_bond_strength{'Q'} = NOMINAL + 0.02;
12334 $left_bond_strength{'q'} = NOMINAL;
12335 $right_bond_strength{'q'} = NOMINAL;
12337 # starting a line with a keyword is usually ok
12338 $left_bond_strength{'k'} = NOMINAL;
12340 # we usually want to bond a keyword strongly to what immediately
12341 # follows, rather than leaving it stranded at the end of a line
12342 $right_bond_strength{'k'} = STRONG;
12344 $left_bond_strength{'G'} = NOMINAL;
12345 $right_bond_strength{'G'} = STRONG;
12347 # assignment operators
12349 = **= += *= &= <<= &&=
12350 -= /= |= >>= ||= //=
12355 # Default is to break AFTER various assignment operators
12356 @left_bond_strength{@q} = (STRONG) x scalar(@q);
12357 @right_bond_strength{@q} =
12358 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
12360 # Default is to break BEFORE '&&' and '||' and '//'
12361 # set strength of '||' to same as '=' so that chains like
12362 # $a = $b || $c || $d will break before the first '||'
12363 $right_bond_strength{'||'} = NOMINAL;
12364 $left_bond_strength{'||'} = $right_bond_strength{'='};
12366 # same thing for '//'
12367 $right_bond_strength{'//'} = NOMINAL;
12368 $left_bond_strength{'//'} = $right_bond_strength{'='};
12370 # set strength of && a little higher than ||
12371 $right_bond_strength{'&&'} = NOMINAL;
12372 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
12374 $left_bond_strength{';'} = VERY_STRONG;
12375 $right_bond_strength{';'} = VERY_WEAK;
12376 $left_bond_strength{'f'} = VERY_STRONG;
12378 # make right strength of for ';' a little less than '='
12379 # to make for contents break after the ';' to avoid this:
12380 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12381 # $number_of_fields )
12382 # and make it weaker than ',' and 'and' too
12383 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12385 # The strengths of ?/: should be somewhere between
12386 # an '=' and a quote (NOMINAL),
12387 # make strength of ':' slightly less than '?' to help
12388 # break long chains of ? : after the colons
12389 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
12390 $right_bond_strength{':'} = NO_BREAK;
12391 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
12392 $right_bond_strength{'?'} = NO_BREAK;
12394 $left_bond_strength{','} = VERY_STRONG;
12395 $right_bond_strength{','} = VERY_WEAK;
12397 # remaining digraphs and trigraphs not defined above
12398 @q = qw( :: <> ++ --);
12399 @left_bond_strength{@q} = (WEAK) x scalar(@q);
12400 @right_bond_strength{@q} = (STRONG) x scalar(@q);
12402 # Set bond strengths of certain keywords
12403 # make 'or', 'err', 'and' slightly weaker than a ','
12404 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
12405 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
12406 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
12407 $left_bond_strength{'xor'} = NOMINAL;
12408 $right_bond_strength{'and'} = NOMINAL;
12409 $right_bond_strength{'or'} = NOMINAL;
12410 $right_bond_strength{'err'} = NOMINAL;
12411 $right_bond_strength{'xor'} = STRONG;
12413 #---------------------------------------------------------------
12414 # Bond Strength BEGIN Section 2.
12415 # Set binary rules for bond strengths between certain token types.
12416 #---------------------------------------------------------------
12418 # We have a little problem making tables which apply to the
12419 # container tokens. Here is a list of container tokens and
12422 # type tokens // meaning
12423 # { {, [, ( // indent
12424 # } }, ], ) // outdent
12425 # [ [ // left non-structural [ (enclosing an array index)
12426 # ] ] // right non-structural square bracket
12427 # ( ( // left non-structural paren
12428 # ) ) // right non-structural paren
12429 # L { // left non-structural curly brace (enclosing a key)
12430 # R } // right non-structural curly brace
12432 # Some rules apply to token types and some to just the token
12433 # itself. We solve the problem by combining type and token into a
12434 # new hash key for the container types.
12436 # If a rule applies to a token 'type' then we need to make rules
12437 # for each of these 'type.token' combinations:
12448 # If a rule applies to a token then we need to make rules for
12449 # these 'type.token' combinations:
12458 # allow long lines before final { in an if statement, as in:
12463 # Otherwise, the line before the { tends to be too short.
12465 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
12466 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
12468 # break on something like '} (', but keep this stronger than a ','
12469 # example is in 'howe.pl'
12470 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
12471 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
12473 # keep matrix and hash indices together
12474 # but make them a little below STRONG to allow breaking open
12475 # something like {'some-word'}{'some-very-long-word'} at the }{
12477 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
12478 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
12479 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
12480 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
12482 # increase strength to the point where a break in the following
12483 # will be after the opening paren rather than at the arrow:
12485 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
12487 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
12488 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
12489 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
12490 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
12491 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
12492 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
12494 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
12495 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
12496 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
12497 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
12499 #---------------------------------------------------------------
12500 # Binary NO_BREAK rules
12501 #---------------------------------------------------------------
12503 # use strict requires that bare word and => not be separated
12504 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
12505 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
12507 # Never break between a bareword and a following paren because
12508 # perl may give an error. For example, if a break is placed
12509 # between 'to_filehandle' and its '(' the following line will
12510 # give a syntax error [Carp.pm]: my( $no) =fileno(
12511 # to_filehandle( $in)) ;
12512 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
12513 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
12514 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
12515 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
12517 # use strict requires that bare word within braces not start new
12519 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
12521 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
12523 # use strict requires that bare word and => not be separated
12524 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
12526 # use strict does not allow separating type info from trailing { }
12527 # testfile is readmail.pl
12528 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
12529 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
12531 # As a defensive measure, do not break between a '(' and a
12532 # filehandle. In some cases, this can cause an error. For
12533 # example, the following program works:
12540 # But this program fails:
12548 # This is normally only a problem with the 'extrude' option
12549 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
12550 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
12552 # never break between sub name and opening paren
12553 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
12554 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
12556 # keep '}' together with ';'
12557 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
12559 # Breaking before a ++ can cause perl to guess wrong. For
12560 # example the following line will cause a syntax error
12561 # with -extrude if we break between '$i' and '++' [fixstyle2]
12562 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
12563 $nobreak_lhs{'++'} = NO_BREAK;
12565 # Do not break before a possible file handle
12566 $nobreak_lhs{'Z'} = NO_BREAK;
12568 # use strict hates bare words on any new line. For
12569 # example, a break before the underscore here provokes the
12570 # wrath of use strict:
12571 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
12572 $nobreak_rhs{'F'} = NO_BREAK;
12573 $nobreak_rhs{'CORE::'} = NO_BREAK;
12575 #---------------------------------------------------------------
12576 # Bond Strength BEGIN Section 3.
12577 # Define tables and values for applying a small bias to the above
12579 #---------------------------------------------------------------
12580 # Adding a small 'bias' to strengths is a simple way to make a line
12581 # break at the first of a sequence of identical terms. For
12582 # example, to force long string of conditional operators to break
12583 # with each line ending in a ':', we can add a small number to the
12584 # bond strength of each ':' (colon.t)
12585 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
12586 $delta_bias = 0.0001; # a very small strength level
12589 } ## end sub initialize_bond_strength_hashes
12591 sub set_bond_strengths {
12593 # patch-its always ok to break at end of line
12594 $nobreak_to_go[$max_index_to_go] = 0;
12596 # we start a new set of bias values for each line
12598 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
12599 my $code_bias = -.01; # bias for closing block braces
12604 my $last_nonblank_type = $type;
12605 my $last_nonblank_token = $token;
12606 my $list_str = $left_bond_strength{'?'};
12608 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
12609 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12612 # main loop to compute bond strengths between each pair of tokens
12613 foreach my $i ( 0 .. $max_index_to_go ) {
12614 $last_type = $type;
12615 if ( $type ne 'b' ) {
12616 $last_nonblank_type = $type;
12617 $last_nonblank_token = $token;
12619 $type = $types_to_go[$i];
12621 # strength on both sides of a blank is the same
12622 if ( $type eq 'b' && $last_type ne 'b' ) {
12623 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12627 $token = $tokens_to_go[$i];
12628 $block_type = $block_type_to_go[$i];
12630 $next_type = $types_to_go[$i_next];
12631 $next_token = $tokens_to_go[$i_next];
12632 $total_nesting_depth = $nesting_depth_to_go[$i_next];
12633 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12634 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12635 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12637 # We are computing the strength of the bond between the current
12638 # token and the NEXT token.
12640 #---------------------------------------------------------------
12641 # Bond Strength Section 1:
12642 # First Approximation.
12643 # Use minimum of individual left and right tabulated bond
12645 #---------------------------------------------------------------
12646 my $bsr = $right_bond_strength{$type};
12647 my $bsl = $left_bond_strength{$next_nonblank_type};
12649 # define right bond strengths of certain keywords
12650 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12651 $bsr = $right_bond_strength{$token};
12653 elsif ( $token eq 'ne' or $token eq 'eq' ) {
12657 # set terminal bond strength to the nominal value
12658 # this will cause good preceding breaks to be retained
12659 if ( $i_next_nonblank > $max_index_to_go ) {
12663 # define right bond strengths of certain keywords
12664 if ( $next_nonblank_type eq 'k'
12665 && defined( $left_bond_strength{$next_nonblank_token} ) )
12667 $bsl = $left_bond_strength{$next_nonblank_token};
12669 elsif ($next_nonblank_token eq 'ne'
12670 or $next_nonblank_token eq 'eq' )
12674 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12675 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12678 # Use the minimum of the left and right strengths. Note: it might
12679 # seem that we would want to keep a NO_BREAK if either token has
12680 # this value. This didn't work, for example because in an arrow
12681 # list, it prevents the comma from separating from the following
12682 # bare word (which is probably quoted by its arrow). So necessary
12683 # NO_BREAK's have to be handled as special cases in the final
12685 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
12686 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
12687 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12688 my $bond_str_1 = $bond_str;
12690 #---------------------------------------------------------------
12691 # Bond Strength Section 2:
12692 # Apply hardwired rules..
12693 #---------------------------------------------------------------
12695 # Patch to put terminal or clauses on a new line: Weaken the bond
12696 # at an || followed by die or similar keyword to make the terminal
12697 # or clause fall on a new line, like this:
12699 # my $class = shift
12700 # || die "Cannot add broadcast: No class identifier found";
12702 # Otherwise the break will be at the previous '=' since the || and
12703 # = have the same starting strength and the or is biased, like
12707 # shift || die "Cannot add broadcast: No class identifier found";
12709 # In any case if the user places a break at either the = or the ||
12710 # it should remain there.
12711 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
12712 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
12713 if ( $want_break_before{$token} && $i > 0 ) {
12714 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
12717 $bond_str -= $delta_bias;
12722 # good to break after end of code blocks
12723 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
12725 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
12726 $code_bias += $delta_bias;
12729 if ( $type eq 'k' ) {
12731 # allow certain control keywords to stand out
12732 if ( $next_nonblank_type eq 'k'
12733 && $is_last_next_redo_return{$token} )
12735 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
12738 # Don't break after keyword my. This is a quick fix for a
12739 # rare problem with perl. An example is this line from file
12742 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
12743 # $this->{'question'} ) )
12745 if ( $token eq 'my' ) {
12746 $bond_str = NO_BREAK;
12751 # good to break before 'if', 'unless', etc
12752 if ( $is_if_brace_follower{$next_nonblank_token} ) {
12753 $bond_str = VERY_WEAK;
12756 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
12758 # FIXME: needs more testing
12759 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
12760 $bond_str = $list_str if ( $bond_str > $list_str );
12763 # keywords like 'unless', 'if', etc, within statements
12765 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
12766 $bond_str = VERY_WEAK / 1.05;
12770 # try not to break before a comma-arrow
12771 elsif ( $next_nonblank_type eq '=>' ) {
12772 if ( $bond_str < STRONG ) { $bond_str = STRONG }
12775 #---------------------------------------------------------------
12776 # Additional hardwired NOBREAK rules
12777 #---------------------------------------------------------------
12779 # map1.t -- correct for a quirk in perl
12781 && $next_nonblank_type eq 'i'
12782 && $last_nonblank_type eq 'k'
12783 && $is_sort_map_grep{$last_nonblank_token} )
12785 # /^(sort|map|grep)$/ )
12787 $bond_str = NO_BREAK;
12790 # extrude.t: do not break before paren at:
12792 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
12793 $bond_str = NO_BREAK;
12796 # in older version of perl, use strict can cause problems with
12797 # breaks before bare words following opening parens. For example,
12798 # this will fail under older versions if a break is made between
12799 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
12800 # command"); close MAIL;
12801 if ( $type eq '{' ) {
12803 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
12805 # but it's fine to break if the word is followed by a '=>'
12806 # or if it is obviously a sub call
12807 my $i_next_next_nonblank = $i_next_nonblank + 1;
12808 my $next_next_type = $types_to_go[$i_next_next_nonblank];
12809 if ( $next_next_type eq 'b'
12810 && $i_next_nonblank < $max_index_to_go )
12812 $i_next_next_nonblank++;
12813 $next_next_type = $types_to_go[$i_next_next_nonblank];
12816 # We'll check for an old breakpoint and keep a leading
12817 # bareword if it was that way in the input file.
12818 # Presumably it was ok that way. For example, the
12819 # following would remain unchanged:
12822 # January, February, March, April,
12823 # May, June, July, August,
12824 # September, October, November, December,
12827 # This should be sufficient:
12829 !$old_breakpoint_to_go[$i]
12830 && ( $next_next_type eq ','
12831 || $next_next_type eq '}' )
12834 $bond_str = NO_BREAK;
12839 # Do not break between a possible filehandle and a ? or / and do
12840 # not introduce a break after it if there is no blank
12842 elsif ( $type eq 'Z' ) {
12847 # if there is no blank and we do not want one. Examples:
12848 # print $x++ # do not break after $x
12849 # print HTML"HELLO" # break ok after HTML
12852 && defined( $want_left_space{$next_type} )
12853 && $want_left_space{$next_type} == WS_NO
12856 # or we might be followed by the start of a quote
12857 || $next_nonblank_type =~ /^[\/\?]$/
12860 $bond_str = NO_BREAK;
12864 # Breaking before a ? before a quote can cause trouble if
12865 # they are not separated by a blank.
12866 # Example: a syntax error occurs if you break before the ? here
12867 # my$logic=join$all?' && ':' || ',@regexps;
12868 # From: Professional_Perl_Programming_Code/multifind.pl
12869 if ( $next_nonblank_type eq '?' ) {
12870 $bond_str = NO_BREAK
12871 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
12874 # Breaking before a . followed by a number
12875 # can cause trouble if there is no intervening space
12876 # Example: a syntax error occurs if you break before the .2 here
12877 # $str .= pack($endian.2, ensurrogate($ord));
12878 # From: perl58/Unicode.pm
12879 elsif ( $next_nonblank_type eq '.' ) {
12880 $bond_str = NO_BREAK
12881 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
12884 my $bond_str_2 = $bond_str;
12886 #---------------------------------------------------------------
12887 # End of hardwired rules
12888 #---------------------------------------------------------------
12890 #---------------------------------------------------------------
12891 # Bond Strength Section 3:
12892 # Apply table rules. These have priority over the above
12894 #---------------------------------------------------------------
12896 my $tabulated_bond_str;
12898 my $rtype = $next_nonblank_type;
12899 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
12900 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
12901 $rtype = $next_nonblank_type . $next_nonblank_token;
12904 if ( $binary_bond_strength{$ltype}{$rtype} ) {
12905 $bond_str = $binary_bond_strength{$ltype}{$rtype};
12906 $tabulated_bond_str = $bond_str;
12909 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
12910 $bond_str = NO_BREAK;
12911 $tabulated_bond_str = $bond_str;
12913 my $bond_str_3 = $bond_str;
12915 # If the hardwired rules conflict with the tabulated bond
12916 # strength then there is an inconsistency that should be fixed
12917 FORMATTER_DEBUG_FLAG_BOND_TABLES
12918 && $tabulated_bond_str
12920 && $bond_str_1 != $bond_str_2
12921 && $bond_str_2 != $tabulated_bond_str
12924 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
12927 #-----------------------------------------------------------------
12928 # Bond Strength Section 4:
12929 # Modify strengths of certain tokens which often occur in sequence
12930 # by adding a small bias to each one in turn so that the breaks
12931 # occur from left to right.
12933 # Note that we only changing strengths by small amounts here,
12934 # and usually increasing, so we should not be altering any NO_BREAKs.
12935 # Other routines which check for NO_BREAKs will use a tolerance
12936 # of one to avoid any problem.
12937 #-----------------------------------------------------------------
12939 # The bias tables use special keys
12940 my $left_key = bias_table_key( $type, $token );
12942 bias_table_key( $next_nonblank_type, $next_nonblank_token );
12944 # add any bias set by sub scan_list at old comma break points.
12945 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
12948 elsif ( defined( $bias{$left_key} ) ) {
12949 if ( !$want_break_before{$left_key} ) {
12950 $bias{$left_key} += $delta_bias;
12951 $bond_str += $bias{$left_key};
12956 if ( defined( $bias{$right_key} ) ) {
12957 if ( $want_break_before{$right_key} ) {
12959 # for leading '.' align all but 'short' quotes; the idea
12960 # is to not place something like "\n" on a single line.
12961 if ( $right_key eq '.' ) {
12963 $last_nonblank_type eq '.'
12966 $rOpts_short_concatenation_item_length )
12967 && ( !$is_closing_token{$token} )
12970 $bias{$right_key} += $delta_bias;
12974 $bias{$right_key} += $delta_bias;
12976 $bond_str += $bias{$right_key};
12979 my $bond_str_4 = $bond_str;
12981 #---------------------------------------------------------------
12982 # Bond Strength Section 5:
12983 # Fifth Approximation.
12984 # Take nesting depth into account by adding the nesting depth
12985 # to the bond strength.
12986 #---------------------------------------------------------------
12989 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
12990 if ( $total_nesting_depth > 0 ) {
12991 $strength = $bond_str + $total_nesting_depth;
12994 $strength = $bond_str;
12998 $strength = NO_BREAK;
13001 #---------------------------------------------------------------
13002 # Bond Strength Section 6:
13003 # Sixth Approximation. Welds.
13004 #---------------------------------------------------------------
13006 # Do not allow a break within welds,
13007 if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
13009 # But encourage breaking after opening welded tokens
13010 elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
13014 # always break after side comment
13015 if ( $type eq '#' ) { $strength = 0 }
13017 $bond_strength_to_go[$i] = $strength;
13019 FORMATTER_DEBUG_FLAG_BOND && do {
13020 my $str = substr( $token, 0, 15 );
13021 $str .= ' ' x ( 16 - length($str) );
13023 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
13027 } ## end sub set_bond_strengths
13030 sub pad_array_to_go {
13032 # to simplify coding in scan_list and set_bond_strengths, it helps
13033 # to create some extra blank tokens at the end of the arrays
13034 $tokens_to_go[ $max_index_to_go + 1 ] = '';
13035 $tokens_to_go[ $max_index_to_go + 2 ] = '';
13036 $types_to_go[ $max_index_to_go + 1 ] = 'b';
13037 $types_to_go[ $max_index_to_go + 2 ] = 'b';
13038 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
13039 $nesting_depth_to_go[$max_index_to_go];
13042 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
13043 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
13045 # shouldn't happen:
13046 unless ( get_saw_brace_error() ) {
13048 "Program bug in scan_list: hit nesting error which should have been caught\n"
13050 report_definite_bug();
13054 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
13059 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
13060 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
13065 { # begin scan_list
13068 $block_type, $current_depth,
13070 $i_last_nonblank_token, $last_colon_sequence_number,
13071 $last_nonblank_token, $last_nonblank_type,
13072 $last_nonblank_block_type, $last_old_breakpoint_count,
13073 $minimum_depth, $next_nonblank_block_type,
13074 $next_nonblank_token, $next_nonblank_type,
13075 $old_breakpoint_count, $starting_breakpoint_count,
13076 $starting_depth, $token,
13077 $type, $type_sequence,
13081 @breakpoint_stack, @breakpoint_undo_stack,
13082 @comma_index, @container_type,
13083 @identifier_count_stack, @index_before_arrow,
13084 @interrupted_list, @item_count_stack,
13085 @last_comma_index, @last_dot_index,
13086 @last_nonblank_type, @old_breakpoint_count_stack,
13087 @opening_structure_index_stack, @rfor_semicolon_list,
13088 @has_old_logical_breakpoints, @rand_or_list,
13092 # routine to define essential variables when we go 'up' to
13094 sub check_for_new_minimum_depth {
13096 if ( $depth < $minimum_depth ) {
13098 $minimum_depth = $depth;
13100 # these arrays need not retain values between calls
13101 $breakpoint_stack[$depth] = $starting_breakpoint_count;
13102 $container_type[$depth] = "";
13103 $identifier_count_stack[$depth] = 0;
13104 $index_before_arrow[$depth] = -1;
13105 $interrupted_list[$depth] = 1;
13106 $item_count_stack[$depth] = 0;
13107 $last_nonblank_type[$depth] = "";
13108 $opening_structure_index_stack[$depth] = -1;
13110 $breakpoint_undo_stack[$depth] = undef;
13111 $comma_index[$depth] = undef;
13112 $last_comma_index[$depth] = undef;
13113 $last_dot_index[$depth] = undef;
13114 $old_breakpoint_count_stack[$depth] = undef;
13115 $has_old_logical_breakpoints[$depth] = 0;
13116 $rand_or_list[$depth] = [];
13117 $rfor_semicolon_list[$depth] = [];
13118 $i_equals[$depth] = -1;
13120 # these arrays must retain values between calls
13121 if ( !defined( $has_broken_sublist[$depth] ) ) {
13122 $dont_align[$depth] = 0;
13123 $has_broken_sublist[$depth] = 0;
13124 $want_comma_break[$depth] = 0;
13130 # routine to decide which commas to break at within a container;
13132 # $bp_count = number of comma breakpoints set
13133 # $do_not_break_apart = a flag indicating if container need not
13135 sub set_comma_breakpoints {
13139 my $do_not_break_apart = 0;
13142 if ( $item_count_stack[$dd] ) {
13144 # handle commas not in containers...
13145 if ( $dont_align[$dd] ) {
13146 do_uncontained_comma_breaks($dd);
13149 # handle commas within containers...
13151 my $fbc = $forced_breakpoint_count;
13153 # always open comma lists not preceded by keywords,
13154 # barewords, identifiers (that is, anything that doesn't
13155 # look like a function call)
13156 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
13158 set_comma_breakpoints_do(
13160 $opening_structure_index_stack[$dd],
13162 $item_count_stack[$dd],
13163 $identifier_count_stack[$dd],
13165 $next_nonblank_type,
13166 $container_type[$dd],
13167 $interrupted_list[$dd],
13168 \$do_not_break_apart,
13171 $bp_count = $forced_breakpoint_count - $fbc;
13172 $do_not_break_apart = 0 if $must_break_open;
13175 return ( $bp_count, $do_not_break_apart );
13178 sub do_uncontained_comma_breaks {
13180 # Handle commas not in containers...
13181 # This is a catch-all routine for commas that we
13182 # don't know what to do with because the don't fall
13183 # within containers. We will bias the bond strength
13184 # to break at commas which ended lines in the input
13185 # file. This usually works better than just trying
13186 # to put as many items on a line as possible. A
13187 # downside is that if the input file is garbage it
13188 # won't work very well. However, the user can always
13189 # prevent following the old breakpoints with the
13193 my $old_comma_break_count = 0;
13194 foreach my $ii ( @{ $comma_index[$dd] } ) {
13195 if ( $old_breakpoint_to_go[$ii] ) {
13196 $old_comma_break_count++;
13197 $bond_strength_to_go[$ii] = $bias;
13199 # reduce bias magnitude to force breaks in order
13204 # Also put a break before the first comma if
13205 # (1) there was a break there in the input, and
13206 # (2) there was exactly one old break before the first comma break
13207 # (3) OLD: there are multiple old comma breaks
13208 # (3) NEW: there are one or more old comma breaks (see return example)
13210 # For example, we will follow the user and break after
13211 # 'print' in this snippet:
13213 # "conformability (Not the same dimension)\n",
13214 # "\t", $have, " is ", text_unit($hu), "\n",
13215 # "\t", $want, " is ", text_unit($wu), "\n",
13218 # Another example, just one comma, where we will break after
13221 # $x * cos($a) - $y * sin($a),
13222 # $x * sin($a) + $y * cos($a);
13224 # Breaking a print statement:
13226 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
13227 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
13229 # But we will not force a break after the opening paren here
13230 # (causes a blinker):
13231 # $heap->{stream}->set_output_filter(
13232 # poe::filter::reference->new('myotherfreezer') ),
13235 my $i_first_comma = $comma_index[$dd]->[0];
13236 if ( $old_breakpoint_to_go[$i_first_comma] ) {
13237 my $level_comma = $levels_to_go[$i_first_comma];
13240 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
13241 if ( $old_breakpoint_to_go[$ii] ) {
13243 last if ( $obp_count > 1 );
13245 if ( $levels_to_go[$ii] == $level_comma );
13249 # Changed rule from multiple old commas to just one here:
13250 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
13252 # Do not to break before an opening token because
13253 # it can lead to "blinkers".
13254 my $ibreakm = $ibreak;
13255 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
13256 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
13258 set_forced_breakpoint($ibreak);
13265 my %is_logical_container;
13268 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
13269 @is_logical_container{@q} = (1) x scalar(@q);
13272 sub set_for_semicolon_breakpoints {
13274 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
13275 set_forced_breakpoint($_);
13280 sub set_logical_breakpoints {
13283 $item_count_stack[$dd] == 0
13284 && $is_logical_container{ $container_type[$dd] }
13286 || $has_old_logical_breakpoints[$dd]
13290 # Look for breaks in this order:
13293 foreach my $i ( 0 .. 3 ) {
13294 if ( $rand_or_list[$dd][$i] ) {
13295 foreach ( @{ $rand_or_list[$dd][$i] } ) {
13296 set_forced_breakpoint($_);
13299 # break at any 'if' and 'unless' too
13300 foreach ( @{ $rand_or_list[$dd][4] } ) {
13301 set_forced_breakpoint($_);
13303 $rand_or_list[$dd] = [];
13311 sub is_unbreakable_container {
13313 # never break a container of one of these types
13314 # because bad things can happen (map1.t)
13316 return $is_sort_map_grep{ $container_type[$dd] };
13321 # This routine is responsible for setting line breaks for all lists,
13322 # so that hierarchical structure can be displayed and so that list
13323 # items can be vertically aligned. The output of this routine is
13324 # stored in the array @forced_breakpoint_to_go, which is used to set
13325 # final breakpoints.
13327 $starting_depth = $nesting_depth_to_go[0];
13330 $current_depth = $starting_depth;
13332 $last_colon_sequence_number = -1;
13333 $last_nonblank_token = ';';
13334 $last_nonblank_type = ';';
13335 $last_nonblank_block_type = ' ';
13336 $last_old_breakpoint_count = 0;
13337 $minimum_depth = $current_depth + 1; # forces update in check below
13338 $old_breakpoint_count = 0;
13339 $starting_breakpoint_count = $forced_breakpoint_count;
13342 $type_sequence = '';
13344 my $total_depth_variation = 0;
13345 my $i_old_assignment_break;
13346 my $depth_last = $starting_depth;
13348 check_for_new_minimum_depth($current_depth);
13350 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13351 my $want_previous_breakpoint = -1;
13353 my $saw_good_breakpoint;
13354 my $i_line_end = -1;
13355 my $i_line_start = -1;
13357 # loop over all tokens in this batch
13358 while ( ++$i <= $max_index_to_go ) {
13359 if ( $type ne 'b' ) {
13360 $i_last_nonblank_token = $i - 1;
13361 $last_nonblank_type = $type;
13362 $last_nonblank_token = $token;
13363 $last_nonblank_block_type = $block_type;
13364 } ## end if ( $type ne 'b' )
13365 $type = $types_to_go[$i];
13366 $block_type = $block_type_to_go[$i];
13367 $token = $tokens_to_go[$i];
13368 $type_sequence = $type_sequence_to_go[$i];
13369 my $next_type = $types_to_go[ $i + 1 ];
13370 my $next_token = $tokens_to_go[ $i + 1 ];
13371 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13372 $next_nonblank_type = $types_to_go[$i_next_nonblank];
13373 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13374 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13376 # set break if flag was set
13377 if ( $want_previous_breakpoint >= 0 ) {
13378 set_forced_breakpoint($want_previous_breakpoint);
13379 $want_previous_breakpoint = -1;
13382 $last_old_breakpoint_count = $old_breakpoint_count;
13383 if ( $old_breakpoint_to_go[$i] ) {
13385 $i_line_start = $i_next_nonblank;
13387 $old_breakpoint_count++;
13389 # Break before certain keywords if user broke there and
13390 # this is a 'safe' break point. The idea is to retain
13391 # any preferred breaks for sequential list operations,
13392 # like a schwartzian transform.
13393 if ($rOpts_break_at_old_keyword_breakpoints) {
13395 $next_nonblank_type eq 'k'
13396 && $is_keyword_returning_list{$next_nonblank_token}
13397 && ( $type =~ /^[=\)\]\}Riw]$/
13399 && $is_keyword_returning_list{$token} )
13403 # we actually have to set this break next time through
13404 # the loop because if we are at a closing token (such
13405 # as '}') which forms a one-line block, this break might
13407 $want_previous_breakpoint = $i;
13408 } ## end if ( $next_nonblank_type...)
13409 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
13411 # Break before attributes if user broke there
13412 if ($rOpts_break_at_old_attribute_breakpoints) {
13413 if ( $next_nonblank_type eq 'A' ) {
13414 $want_previous_breakpoint = $i;
13418 # remember an = break as possible good break point
13419 if ( $is_assignment{$type} ) {
13420 $i_old_assignment_break = $i;
13422 elsif ( $is_assignment{$next_nonblank_type} ) {
13423 $i_old_assignment_break = $i_next_nonblank;
13425 } ## end if ( $old_breakpoint_to_go...)
13427 next if ( $type eq 'b' );
13428 $depth = $nesting_depth_to_go[ $i + 1 ];
13430 $total_depth_variation += abs( $depth - $depth_last );
13431 $depth_last = $depth;
13433 # safety check - be sure we always break after a comment
13434 # Shouldn't happen .. an error here probably means that the
13435 # nobreak flag did not get turned off correctly during
13437 if ( $type eq '#' ) {
13438 if ( $i != $max_index_to_go ) {
13440 "Non-fatal program bug: backup logic needed to break after a comment\n"
13442 report_definite_bug();
13443 $nobreak_to_go[$i] = 0;
13444 set_forced_breakpoint($i);
13445 } ## end if ( $i != $max_index_to_go)
13446 } ## end if ( $type eq '#' )
13448 # Force breakpoints at certain tokens in long lines.
13449 # Note that such breakpoints will be undone later if these tokens
13450 # are fully contained within parens on a line.
13453 # break before a keyword within a line
13457 # if one of these keywords:
13458 && $token =~ /^(if|unless|while|until|for)$/
13460 # but do not break at something like '1 while'
13461 && ( $last_nonblank_type ne 'n' || $i > 2 )
13463 # and let keywords follow a closing 'do' brace
13464 && $last_nonblank_block_type ne 'do'
13469 # or container is broken (by side-comment, etc)
13470 || ( $next_nonblank_token eq '('
13471 && $mate_index_to_go[$i_next_nonblank] < $i )
13475 set_forced_breakpoint( $i - 1 );
13476 } ## end if ( $type eq 'k' && $i...)
13478 # remember locations of -> if this is a pre-broken method chain
13479 if ( $type eq '->' ) {
13480 if ($rOpts_break_at_old_method_breakpoints) {
13482 # Case 1: look for lines with leading pointers
13483 if ( $i == $i_line_start ) {
13484 set_forced_breakpoint( $i - 1 );
13487 # Case 2: look for cuddled pointer calls
13490 # look for old lines with leading ')->' or ') ->'
13491 # and, when found, force a break before the
13492 # opening paren and after the previous closing paren.
13494 $types_to_go[$i_line_start] eq '}'
13495 && ( $i == $i_line_start + 1
13496 || $i == $i_line_start + 2
13497 && $types_to_go[ $i - 1 ] eq 'b' )
13500 set_forced_breakpoint( $i_line_start - 1 );
13501 set_forced_breakpoint(
13502 $mate_index_to_go[$i_line_start] );
13506 } ## end if ( $type eq '->' )
13508 # remember locations of '||' and '&&' for possible breaks if we
13509 # decide this is a long logical expression.
13510 elsif ( $type eq '||' ) {
13511 push @{ $rand_or_list[$depth][2] }, $i;
13512 ++$has_old_logical_breakpoints[$depth]
13513 if ( ( $i == $i_line_start || $i == $i_line_end )
13514 && $rOpts_break_at_old_logical_breakpoints );
13515 } ## end elsif ( $type eq '||' )
13516 elsif ( $type eq '&&' ) {
13517 push @{ $rand_or_list[$depth][3] }, $i;
13518 ++$has_old_logical_breakpoints[$depth]
13519 if ( ( $i == $i_line_start || $i == $i_line_end )
13520 && $rOpts_break_at_old_logical_breakpoints );
13521 } ## end elsif ( $type eq '&&' )
13522 elsif ( $type eq 'f' ) {
13523 push @{ $rfor_semicolon_list[$depth] }, $i;
13525 elsif ( $type eq 'k' ) {
13526 if ( $token eq 'and' ) {
13527 push @{ $rand_or_list[$depth][1] }, $i;
13528 ++$has_old_logical_breakpoints[$depth]
13529 if ( ( $i == $i_line_start || $i == $i_line_end )
13530 && $rOpts_break_at_old_logical_breakpoints );
13531 } ## end if ( $token eq 'and' )
13533 # break immediately at 'or's which are probably not in a logical
13534 # block -- but we will break in logical breaks below so that
13535 # they do not add to the forced_breakpoint_count
13536 elsif ( $token eq 'or' ) {
13537 push @{ $rand_or_list[$depth][0] }, $i;
13538 ++$has_old_logical_breakpoints[$depth]
13539 if ( ( $i == $i_line_start || $i == $i_line_end )
13540 && $rOpts_break_at_old_logical_breakpoints );
13541 if ( $is_logical_container{ $container_type[$depth] } ) {
13544 if ($is_long_line) { set_forced_breakpoint($i) }
13545 elsif ( ( $i == $i_line_start || $i == $i_line_end )
13546 && $rOpts_break_at_old_logical_breakpoints )
13548 $saw_good_breakpoint = 1;
13550 } ## end else [ if ( $is_logical_container...)]
13551 } ## end elsif ( $token eq 'or' )
13552 elsif ( $token eq 'if' || $token eq 'unless' ) {
13553 push @{ $rand_or_list[$depth][4] }, $i;
13554 if ( ( $i == $i_line_start || $i == $i_line_end )
13555 && $rOpts_break_at_old_logical_breakpoints )
13557 set_forced_breakpoint($i);
13559 } ## end elsif ( $token eq 'if' ||...)
13560 } ## end elsif ( $type eq 'k' )
13561 elsif ( $is_assignment{$type} ) {
13562 $i_equals[$depth] = $i;
13565 if ($type_sequence) {
13567 # handle any postponed closing breakpoints
13568 if ( $token =~ /^[\)\]\}\:]$/ ) {
13569 if ( $type eq ':' ) {
13570 $last_colon_sequence_number = $type_sequence;
13572 # retain break at a ':' line break
13573 if ( ( $i == $i_line_start || $i == $i_line_end )
13574 && $rOpts_break_at_old_ternary_breakpoints )
13577 set_forced_breakpoint($i);
13579 # break at previous '='
13580 if ( $i_equals[$depth] > 0 ) {
13581 set_forced_breakpoint( $i_equals[$depth] );
13582 $i_equals[$depth] = -1;
13584 } ## end if ( ( $i == $i_line_start...))
13585 } ## end if ( $type eq ':' )
13586 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
13587 my $inc = ( $type eq ':' ) ? 0 : 1;
13588 set_forced_breakpoint( $i - $inc );
13589 delete $postponed_breakpoint{$type_sequence};
13591 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
13593 # set breaks at ?/: if they will get separated (and are
13594 # not a ?/: chain), or if the '?' is at the end of the
13596 elsif ( $token eq '?' ) {
13597 my $i_colon = $mate_index_to_go[$i];
13599 $i_colon <= 0 # the ':' is not in this batch
13600 || $i == 0 # this '?' is the first token of the line
13602 $max_index_to_go # or this '?' is the last token
13606 # don't break at a '?' if preceded by ':' on
13607 # this line of previous ?/: pair on this line.
13608 # This is an attempt to preserve a chain of ?/:
13609 # expressions (elsif2.t). And don't break if
13610 # this has a side comment.
13611 set_forced_breakpoint($i)
13613 $type_sequence == (
13614 $last_colon_sequence_number +
13615 TYPE_SEQUENCE_INCREMENT
13617 || $tokens_to_go[$max_index_to_go] eq '#'
13619 set_closing_breakpoint($i);
13620 } ## end if ( $i_colon <= 0 ||...)
13621 } ## end elsif ( $token eq '?' )
13622 } ## end if ($type_sequence)
13624 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
13626 #------------------------------------------------------------
13627 # Handle Increasing Depth..
13629 # prepare for a new list when depth increases
13630 # token $i is a '(','{', or '['
13631 #------------------------------------------------------------
13632 if ( $depth > $current_depth ) {
13634 $breakpoint_stack[$depth] = $forced_breakpoint_count;
13635 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
13636 $has_broken_sublist[$depth] = 0;
13637 $identifier_count_stack[$depth] = 0;
13638 $index_before_arrow[$depth] = -1;
13639 $interrupted_list[$depth] = 0;
13640 $item_count_stack[$depth] = 0;
13641 $last_comma_index[$depth] = undef;
13642 $last_dot_index[$depth] = undef;
13643 $last_nonblank_type[$depth] = $last_nonblank_type;
13644 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
13645 $opening_structure_index_stack[$depth] = $i;
13646 $rand_or_list[$depth] = [];
13647 $rfor_semicolon_list[$depth] = [];
13648 $i_equals[$depth] = -1;
13649 $want_comma_break[$depth] = 0;
13650 $container_type[$depth] =
13651 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13652 ? $last_nonblank_token
13654 $has_old_logical_breakpoints[$depth] = 0;
13656 # if line ends here then signal closing token to break
13657 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13659 set_closing_breakpoint($i);
13662 # Not all lists of values should be vertically aligned..
13663 $dont_align[$depth] =
13665 # code BLOCKS are handled at a higher level
13666 ( $block_type ne "" )
13668 # certain paren lists
13669 || ( $type eq '(' ) && (
13671 # it does not usually look good to align a list of
13672 # identifiers in a parameter list, as in:
13673 # my($var1, $var2, ...)
13674 # (This test should probably be refined, for now I'm just
13675 # testing for any keyword)
13676 ( $last_nonblank_type eq 'k' )
13678 # a trailing '(' usually indicates a non-list
13679 || ( $next_nonblank_type eq '(' )
13682 # patch to outdent opening brace of long if/for/..
13683 # statements (like this one). See similar coding in
13684 # set_continuation breaks. We have also catch it here for
13685 # short line fragments which otherwise will not go through
13686 # set_continuation_breaks.
13690 # if we have the ')' but not its '(' in this batch..
13691 && ( $last_nonblank_token eq ')' )
13692 && $mate_index_to_go[$i_last_nonblank_token] < 0
13694 # and user wants brace to left
13695 && !$rOpts->{'opening-brace-always-on-right'}
13697 && ( $type eq '{' ) # should be true
13698 && ( $token eq '{' ) # should be true
13701 set_forced_breakpoint( $i - 1 );
13702 } ## end if ( $block_type && ( ...))
13703 } ## end if ( $depth > $current_depth)
13705 #------------------------------------------------------------
13706 # Handle Decreasing Depth..
13708 # finish off any old list when depth decreases
13709 # token $i is a ')','}', or ']'
13710 #------------------------------------------------------------
13711 elsif ( $depth < $current_depth ) {
13713 check_for_new_minimum_depth($depth);
13715 # force all outer logical containers to break after we see on
13717 $has_old_logical_breakpoints[$depth] ||=
13718 $has_old_logical_breakpoints[$current_depth];
13720 # Patch to break between ') {' if the paren list is broken.
13721 # There is similar logic in set_continuation_breaks for
13722 # non-broken lists.
13724 && $next_nonblank_block_type
13725 && $interrupted_list[$current_depth]
13726 && $next_nonblank_type eq '{'
13727 && !$rOpts->{'opening-brace-always-on-right'} )
13729 set_forced_breakpoint($i);
13730 } ## end if ( $token eq ')' && ...
13732 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
13734 # set breaks at commas if necessary
13735 my ( $bp_count, $do_not_break_apart ) =
13736 set_comma_breakpoints($current_depth);
13738 my $i_opening = $opening_structure_index_stack[$current_depth];
13739 my $saw_opening_structure = ( $i_opening >= 0 );
13741 # this term is long if we had to break at interior commas..
13742 my $is_long_term = $bp_count > 0;
13744 # If this is a short container with one or more comma arrows,
13745 # then we will mark it as a long term to open it if requested.
13746 # $rOpts_comma_arrow_breakpoints =
13747 # 0 - open only if comma precedes closing brace
13748 # 1 - stable: except for one line blocks
13749 # 2 - try to form 1 line blocks
13751 # 4 - always open up if vt=0
13752 # 5 - stable: even for one line blocks if vt=0
13753 if ( !$is_long_term
13754 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
13755 && $index_before_arrow[ $depth + 1 ] > 0
13756 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
13759 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
13760 || ( $rOpts_comma_arrow_breakpoints == 0
13761 && $last_nonblank_token eq ',' )
13762 || ( $rOpts_comma_arrow_breakpoints == 5
13763 && $old_breakpoint_to_go[$i_opening] );
13764 } ## end if ( !$is_long_term &&...)
13766 # mark term as long if the length between opening and closing
13767 # parens exceeds allowed line length
13768 if ( !$is_long_term && $saw_opening_structure ) {
13769 my $i_opening_minus = find_token_starting_list($i_opening);
13771 # Note: we have to allow for one extra space after a
13772 # closing token so that we do not strand a comma or
13773 # semicolon, hence the '>=' here (oneline.t)
13774 # Note: we ignore left weld lengths here for best results
13776 excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
13777 } ## end if ( !$is_long_term &&...)
13779 # We've set breaks after all comma-arrows. Now we have to
13780 # undo them if this can be a one-line block
13781 # (the only breakpoints set will be due to comma-arrows)
13784 # user doesn't require breaking after all comma-arrows
13785 ( $rOpts_comma_arrow_breakpoints != 0 )
13786 && ( $rOpts_comma_arrow_breakpoints != 4 )
13788 # and if the opening structure is in this batch
13789 && $saw_opening_structure
13791 # and either on the same old line
13793 $old_breakpoint_count_stack[$current_depth] ==
13794 $last_old_breakpoint_count
13796 # or user wants to form long blocks with arrows
13797 || $rOpts_comma_arrow_breakpoints == 2
13800 # and we made some breakpoints between the opening and closing
13801 && ( $breakpoint_undo_stack[$current_depth] <
13802 $forced_breakpoint_undo_count )
13804 # and this block is short enough to fit on one line
13805 # Note: use < because need 1 more space for possible comma
13810 undo_forced_breakpoint_stack(
13811 $breakpoint_undo_stack[$current_depth] );
13812 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
13814 # now see if we have any comma breakpoints left
13815 my $has_comma_breakpoints =
13816 ( $breakpoint_stack[$current_depth] !=
13817 $forced_breakpoint_count );
13819 # update broken-sublist flag of the outer container
13820 $has_broken_sublist[$depth] =
13821 $has_broken_sublist[$depth]
13822 || $has_broken_sublist[$current_depth]
13824 || $has_comma_breakpoints;
13826 # Having come to the closing ')', '}', or ']', now we have to decide if we
13827 # should 'open up' the structure by placing breaks at the opening and
13828 # closing containers. This is a tricky decision. Here are some of the
13829 # basic considerations:
13831 # -If this is a BLOCK container, then any breakpoints will have already
13832 # been set (and according to user preferences), so we need do nothing here.
13834 # -If we have a comma-separated list for which we can align the list items,
13835 # then we need to do so because otherwise the vertical aligner cannot
13836 # currently do the alignment.
13838 # -If this container does itself contain a container which has been broken
13839 # open, then it should be broken open to properly show the structure.
13841 # -If there is nothing to align, and no other reason to break apart,
13842 # then do not do it.
13844 # We will not break open the parens of a long but 'simple' logical expression.
13847 # This is an example of a simple logical expression and its formatting:
13849 # if ( $bigwasteofspace1 && $bigwasteofspace2
13850 # || $bigwasteofspace3 && $bigwasteofspace4 )
13852 # Most people would prefer this than the 'spacey' version:
13855 # $bigwasteofspace1 && $bigwasteofspace2
13856 # || $bigwasteofspace3 && $bigwasteofspace4
13859 # To illustrate the rules for breaking logical expressions, consider:
13863 # and ( exists $ids_excl_uc{$id_uc}
13864 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
13866 # This is on the verge of being difficult to read. The current default is to
13867 # open it up like this:
13872 # and ( exists $ids_excl_uc{$id_uc}
13873 # or grep $id_uc =~ /$_/, @ids_excl_uc )
13876 # This is a compromise which tries to avoid being too dense and to spacey.
13877 # A more spaced version would be:
13883 # exists $ids_excl_uc{$id_uc}
13884 # or grep $id_uc =~ /$_/, @ids_excl_uc
13888 # Some people might prefer the spacey version -- an option could be added. The
13889 # innermost expression contains a long block '( exists $ids_... ')'.
13891 # Here is how the logic goes: We will force a break at the 'or' that the
13892 # innermost expression contains, but we will not break apart its opening and
13893 # closing containers because (1) it contains no multi-line sub-containers itself,
13894 # and (2) there is no alignment to be gained by breaking it open like this
13897 # exists $ids_excl_uc{$id_uc}
13898 # or grep $id_uc =~ /$_/, @ids_excl_uc
13901 # (although this looks perfectly ok and might be good for long expressions). The
13902 # outer 'if' container, though, contains a broken sub-container, so it will be
13903 # broken open to avoid too much density. Also, since it contains no 'or's, there
13904 # will be a forced break at its 'and'.
13906 # set some flags telling something about this container..
13907 my $is_simple_logical_expression = 0;
13908 if ( $item_count_stack[$current_depth] == 0
13909 && $saw_opening_structure
13910 && $tokens_to_go[$i_opening] eq '('
13911 && $is_logical_container{ $container_type[$current_depth] }
13915 # This seems to be a simple logical expression with
13916 # no existing breakpoints. Set a flag to prevent
13918 if ( !$has_comma_breakpoints ) {
13919 $is_simple_logical_expression = 1;
13922 # This seems to be a simple logical expression with
13923 # breakpoints (broken sublists, for example). Break
13924 # at all 'or's and '||'s.
13926 set_logical_breakpoints($current_depth);
13928 } ## end if ( $item_count_stack...)
13931 && @{ $rfor_semicolon_list[$current_depth] } )
13933 set_for_semicolon_breakpoints($current_depth);
13935 # open up a long 'for' or 'foreach' container to allow
13936 # leading term alignment unless -lp is used.
13937 $has_comma_breakpoints = 1
13938 unless $rOpts_line_up_parentheses;
13939 } ## end if ( $is_long_term && ...)
13943 # breaks for code BLOCKS are handled at a higher level
13946 # we do not need to break at the top level of an 'if'
13948 && !$is_simple_logical_expression
13950 ## modification to keep ': (' containers vertically tight;
13951 ## but probably better to let user set -vt=1 to avoid
13952 ## inconsistency with other paren types
13953 ## && ($container_type[$current_depth] ne ':')
13955 # otherwise, we require one of these reasons for breaking:
13958 # - this term has forced line breaks
13959 $has_comma_breakpoints
13961 # - the opening container is separated from this batch
13962 # for some reason (comment, blank line, code block)
13963 # - this is a non-paren container spanning multiple lines
13964 || !$saw_opening_structure
13966 # - this is a long block contained in another breakable
13969 && $container_environment_to_go[$i_opening] ne
13975 # For -lp option, we must put a breakpoint before
13976 # the token which has been identified as starting
13977 # this indentation level. This is necessary for
13978 # proper alignment.
13979 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
13981 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
13982 if ( $i_opening + 1 < $max_index_to_go
13983 && $types_to_go[ $i_opening + 1 ] eq 'b' )
13985 $item = $leading_spaces_to_go[ $i_opening + 2 ];
13987 if ( defined($item) ) {
13988 my $i_start_2 = $item->get_starting_index();
13990 defined($i_start_2)
13992 # we are breaking after an opening brace, paren,
13993 # so don't break before it too
13994 && $i_start_2 ne $i_opening
13998 # Only break for breakpoints at the same
13999 # indentation level as the opening paren
14000 my $test1 = $nesting_depth_to_go[$i_opening];
14001 my $test2 = $nesting_depth_to_go[$i_start_2];
14002 if ( $test2 == $test1 ) {
14003 set_forced_breakpoint( $i_start_2 - 1 );
14005 } ## end if ( defined($i_start_2...))
14006 } ## end if ( defined($item) )
14007 } ## end if ( $rOpts_line_up_parentheses...)
14009 # break after opening structure.
14010 # note: break before closing structure will be automatic
14011 if ( $minimum_depth <= $current_depth ) {
14013 set_forced_breakpoint($i_opening)
14014 unless ( $do_not_break_apart
14015 || is_unbreakable_container($current_depth) );
14017 # break at ',' of lower depth level before opening token
14018 if ( $last_comma_index[$depth] ) {
14019 set_forced_breakpoint( $last_comma_index[$depth] );
14022 # break at '.' of lower depth level before opening token
14023 if ( $last_dot_index[$depth] ) {
14024 set_forced_breakpoint( $last_dot_index[$depth] );
14027 # break before opening structure if preceded by another
14028 # closing structure and a comma. This is normally
14029 # done by the previous closing brace, but not
14030 # if it was a one-line block.
14031 if ( $i_opening > 2 ) {
14033 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
14037 if ( $types_to_go[$i_prev] eq ','
14038 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14040 set_forced_breakpoint($i_prev);
14043 # also break before something like ':(' or '?('
14046 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14048 my $token_prev = $tokens_to_go[$i_prev];
14049 if ( $want_break_before{$token_prev} ) {
14050 set_forced_breakpoint($i_prev);
14052 } ## end elsif ( $types_to_go[$i_prev...])
14053 } ## end if ( $i_opening > 2 )
14054 } ## end if ( $minimum_depth <=...)
14056 # break after comma following closing structure
14057 if ( $next_type eq ',' ) {
14058 set_forced_breakpoint( $i + 1 );
14061 # break before an '=' following closing structure
14063 $is_assignment{$next_nonblank_type}
14064 && ( $breakpoint_stack[$current_depth] !=
14065 $forced_breakpoint_count )
14068 set_forced_breakpoint($i);
14069 } ## end if ( $is_assignment{$next_nonblank_type...})
14071 # break at any comma before the opening structure Added
14072 # for -lp, but seems to be good in general. It isn't
14073 # obvious how far back to look; the '5' below seems to
14074 # work well and will catch the comma in something like
14075 # push @list, myfunc( $param, $param, ..
14077 my $icomma = $last_comma_index[$depth];
14078 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
14079 unless ( $forced_breakpoint_to_go[$icomma] ) {
14080 set_forced_breakpoint($icomma);
14083 } # end logic to open up a container
14085 # Break open a logical container open if it was already open
14086 elsif ($is_simple_logical_expression
14087 && $has_old_logical_breakpoints[$current_depth] )
14089 set_logical_breakpoints($current_depth);
14092 # Handle long container which does not get opened up
14093 elsif ($is_long_term) {
14095 # must set fake breakpoint to alert outer containers that
14097 set_fake_breakpoint();
14098 } ## end elsif ($is_long_term)
14100 } ## end elsif ( $depth < $current_depth)
14102 #------------------------------------------------------------
14103 # Handle this token
14104 #------------------------------------------------------------
14106 $current_depth = $depth;
14108 # handle comma-arrow
14109 if ( $type eq '=>' ) {
14110 next if ( $last_nonblank_type eq '=>' );
14111 next if $rOpts_break_at_old_comma_breakpoints;
14112 next if $rOpts_comma_arrow_breakpoints == 3;
14113 $want_comma_break[$depth] = 1;
14114 $index_before_arrow[$depth] = $i_last_nonblank_token;
14116 } ## end if ( $type eq '=>' )
14118 elsif ( $type eq '.' ) {
14119 $last_dot_index[$depth] = $i;
14122 # Turn off alignment if we are sure that this is not a list
14123 # environment. To be safe, we will do this if we see certain
14124 # non-list tokens, such as ';', and also the environment is
14125 # not a list. Note that '=' could be in any of the = operators
14126 # (lextest.t). We can't just use the reported environment
14127 # because it can be incorrect in some cases.
14128 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
14129 && $container_environment_to_go[$i] ne 'LIST' )
14131 $dont_align[$depth] = 1;
14132 $want_comma_break[$depth] = 0;
14133 $index_before_arrow[$depth] = -1;
14134 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
14136 # now just handle any commas
14137 next unless ( $type eq ',' );
14139 $last_dot_index[$depth] = undef;
14140 $last_comma_index[$depth] = $i;
14142 # break here if this comma follows a '=>'
14143 # but not if there is a side comment after the comma
14144 if ( $want_comma_break[$depth] ) {
14146 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
14147 if ($rOpts_comma_arrow_breakpoints) {
14148 $want_comma_break[$depth] = 0;
14153 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14155 # break before the previous token if it looks safe
14156 # Example of something that we will not try to break before:
14157 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
14158 # Also we don't want to break at a binary operator (like +):
14162 # $y - $R, -fill => 'black',
14164 my $ibreak = $index_before_arrow[$depth] - 1;
14166 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
14168 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
14169 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
14170 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
14172 # don't break pointer calls, such as the following:
14173 # File::Spec->curdir => 1,
14174 # (This is tokenized as adjacent 'w' tokens)
14175 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
14177 # And don't break before a comma, as in the following:
14178 # ( LONGER_THAN,=> 1,
14179 # EIGHTY_CHARACTERS,=> 2,
14180 # CAUSES_FORMATTING,=> 3,
14183 # This example is for -tso but should be general rule
14184 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
14185 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
14187 set_forced_breakpoint($ibreak);
14189 } ## end if ( $types_to_go[$ibreak...])
14190 } ## end if ( $ibreak > 0 && $tokens_to_go...)
14192 $want_comma_break[$depth] = 0;
14193 $index_before_arrow[$depth] = -1;
14195 # handle list which mixes '=>'s and ','s:
14196 # treat any list items so far as an interrupted list
14197 $interrupted_list[$depth] = 1;
14199 } ## end if ( $want_comma_break...)
14201 # break after all commas above starting depth
14202 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
14203 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14207 # add this comma to the list..
14208 my $item_count = $item_count_stack[$depth];
14209 if ( $item_count == 0 ) {
14211 # but do not form a list with no opening structure
14214 # open INFILE_COPY, ">$input_file_copy"
14215 # or die ("very long message");
14217 if ( ( $opening_structure_index_stack[$depth] < 0 )
14218 && $container_environment_to_go[$i] eq 'BLOCK' )
14220 $dont_align[$depth] = 1;
14222 } ## end if ( $item_count == 0 )
14224 $comma_index[$depth][$item_count] = $i;
14225 ++$item_count_stack[$depth];
14226 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
14227 $identifier_count_stack[$depth]++;
14229 } ## end while ( ++$i <= $max_index_to_go)
14231 #-------------------------------------------
14232 # end of loop over all tokens in this batch
14233 #-------------------------------------------
14235 # set breaks for any unfinished lists ..
14236 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
14238 $interrupted_list[$dd] = 1;
14239 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
14240 set_comma_breakpoints($dd);
14241 set_logical_breakpoints($dd)
14242 if ( $has_old_logical_breakpoints[$dd] );
14243 set_for_semicolon_breakpoints($dd);
14245 # break open container...
14246 my $i_opening = $opening_structure_index_stack[$dd];
14247 set_forced_breakpoint($i_opening)
14249 is_unbreakable_container($dd)
14251 # Avoid a break which would place an isolated ' or "
14254 && $i_opening >= $max_index_to_go - 2
14255 && $token =~ /^['"]$/ )
14257 } ## end for ( my $dd = $current_depth...)
14259 # Return a flag indicating if the input file had some good breakpoints.
14260 # This flag will be used to force a break in a line shorter than the
14261 # allowed line length.
14262 if ( $has_old_logical_breakpoints[$current_depth] ) {
14263 $saw_good_breakpoint = 1;
14266 # A complex line with one break at an = has a good breakpoint.
14267 # This is not complex ($total_depth_variation=0):
14271 # This is complex ($total_depth_variation=6):
14273 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
14274 elsif ($i_old_assignment_break
14275 && $total_depth_variation > 4
14276 && $old_breakpoint_count == 1 )
14278 $saw_good_breakpoint = 1;
14279 } ## end elsif ( $i_old_assignment_break...)
14281 return $saw_good_breakpoint;
14282 } ## end sub scan_list
14285 sub find_token_starting_list {
14287 # When testing to see if a block will fit on one line, some
14288 # previous token(s) may also need to be on the line; particularly
14289 # if this is a sub call. So we will look back at least one
14290 # token. NOTE: This isn't perfect, but not critical, because
14291 # if we mis-identify a block, it will be wrapped and therefore
14292 # fixed the next time it is formatted.
14293 my $i_opening_paren = shift;
14294 my $i_opening_minus = $i_opening_paren;
14295 my $im1 = $i_opening_paren - 1;
14296 my $im2 = $i_opening_paren - 2;
14297 my $im3 = $i_opening_paren - 3;
14298 my $typem1 = $types_to_go[$im1];
14299 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
14301 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
14302 $i_opening_minus = $i_opening_paren;
14304 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
14305 $i_opening_minus = $im1 if $im1 >= 0;
14307 # walk back to improve length estimate
14308 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
14309 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
14310 $i_opening_minus = $j;
14312 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
14314 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
14315 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
14316 $i_opening_minus = $im2;
14318 return $i_opening_minus;
14321 { # begin set_comma_breakpoints_do
14323 my %is_keyword_with_special_leading_term;
14327 # These keywords have prototypes which allow a special leading item
14328 # followed by a list
14330 qw(formline grep kill map printf sprintf push chmod join pack unshift);
14331 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
14334 sub set_comma_breakpoints_do {
14336 # Given a list with some commas, set breakpoints at some of the
14337 # commas, if necessary, to make it easy to read. This list is
14340 $depth, $i_opening_paren, $i_closing_paren,
14341 $item_count, $identifier_count, $rcomma_index,
14342 $next_nonblank_type, $list_type, $interrupted,
14343 $rdo_not_break_apart, $must_break_open,
14346 # nothing to do if no commas seen
14347 return if ( $item_count < 1 );
14348 my $i_first_comma = $rcomma_index->[0];
14349 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
14350 my $i_last_comma = $i_true_last_comma;
14351 if ( $i_last_comma >= $max_index_to_go ) {
14352 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
14353 return if ( $item_count < 1 );
14356 #---------------------------------------------------------------
14357 # find lengths of all items in the list to calculate page layout
14358 #---------------------------------------------------------------
14359 my $comma_count = $item_count;
14365 my @max_length = ( 0, 0 );
14366 my $first_term_length;
14367 my $i = $i_opening_paren;
14370 foreach my $j ( 0 .. $comma_count - 1 ) {
14371 $is_odd = 1 - $is_odd;
14372 $i_prev_plus = $i + 1;
14373 $i = $rcomma_index->[$j];
14376 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
14378 ( $types_to_go[$i_prev_plus] eq 'b' )
14381 push @i_term_begin, $i_term_begin;
14382 push @i_term_end, $i_term_end;
14383 push @i_term_comma, $i;
14385 # note: currently adding 2 to all lengths (for comma and space)
14387 2 + token_sequence_length( $i_term_begin, $i_term_end );
14388 push @item_lengths, $length;
14391 $first_term_length = $length;
14395 if ( $length > $max_length[$is_odd] ) {
14396 $max_length[$is_odd] = $length;
14401 # now we have to make a distinction between the comma count and item
14402 # count, because the item count will be one greater than the comma
14403 # count if the last item is not terminated with a comma
14405 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
14406 ? $i_last_comma + 1
14409 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
14410 ? $i_closing_paren - 2
14411 : $i_closing_paren - 1;
14412 my $i_effective_last_comma = $i_last_comma;
14414 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
14416 if ( $last_item_length > 0 ) {
14418 # add 2 to length because other lengths include a comma and a blank
14419 $last_item_length += 2;
14420 push @item_lengths, $last_item_length;
14421 push @i_term_begin, $i_b + 1;
14422 push @i_term_end, $i_e;
14423 push @i_term_comma, undef;
14425 my $i_odd = $item_count % 2;
14427 if ( $last_item_length > $max_length[$i_odd] ) {
14428 $max_length[$i_odd] = $last_item_length;
14432 $i_effective_last_comma = $i_e + 1;
14434 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
14435 $identifier_count++;
14439 #---------------------------------------------------------------
14440 # End of length calculations
14441 #---------------------------------------------------------------
14443 #---------------------------------------------------------------
14444 # Compound List Rule 1:
14445 # Break at (almost) every comma for a list containing a broken
14446 # sublist. This has higher priority than the Interrupted List
14448 #---------------------------------------------------------------
14449 if ( $has_broken_sublist[$depth] ) {
14451 # Break at every comma except for a comma between two
14452 # simple, small terms. This prevents long vertical
14453 # columns of, say, just 0's.
14454 my $small_length = 10; # 2 + actual maximum length wanted
14456 # We'll insert a break in long runs of small terms to
14457 # allow alignment in uniform tables.
14458 my $skipped_count = 0;
14459 my $columns = table_columns_available($i_first_comma);
14460 my $fields = int( $columns / $small_length );
14461 if ( $rOpts_maximum_fields_per_table
14462 && $fields > $rOpts_maximum_fields_per_table )
14464 $fields = $rOpts_maximum_fields_per_table;
14466 my $max_skipped_count = $fields - 1;
14468 my $is_simple_last_term = 0;
14469 my $is_simple_next_term = 0;
14470 foreach my $j ( 0 .. $item_count ) {
14471 $is_simple_last_term = $is_simple_next_term;
14472 $is_simple_next_term = 0;
14473 if ( $j < $item_count
14474 && $i_term_end[$j] == $i_term_begin[$j]
14475 && $item_lengths[$j] <= $small_length )
14477 $is_simple_next_term = 1;
14480 if ( $is_simple_last_term
14481 && $is_simple_next_term
14482 && $skipped_count < $max_skipped_count )
14487 $skipped_count = 0;
14488 my $i = $i_term_comma[ $j - 1 ];
14489 last unless defined $i;
14490 set_forced_breakpoint($i);
14494 # always break at the last comma if this list is
14495 # interrupted; we wouldn't want to leave a terminal '{', for
14497 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14501 #my ( $a, $b, $c ) = caller();
14502 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
14503 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
14504 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14506 #---------------------------------------------------------------
14507 # Interrupted List Rule:
14508 # A list is forced to use old breakpoints if it was interrupted
14509 # by side comments or blank lines, or requested by user.
14510 #---------------------------------------------------------------
14511 if ( $rOpts_break_at_old_comma_breakpoints
14513 || $i_opening_paren < 0 )
14515 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14519 #---------------------------------------------------------------
14520 # Looks like a list of items. We have to look at it and size it up.
14521 #---------------------------------------------------------------
14523 my $opening_token = $tokens_to_go[$i_opening_paren];
14524 my $opening_environment =
14525 $container_environment_to_go[$i_opening_paren];
14527 #-------------------------------------------------------------------
14528 # Return if this will fit on one line
14529 #-------------------------------------------------------------------
14531 my $i_opening_minus = find_token_starting_list($i_opening_paren);
14533 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14535 #-------------------------------------------------------------------
14536 # Now we know that this block spans multiple lines; we have to set
14537 # at least one breakpoint -- real or fake -- as a signal to break
14538 # open any outer containers.
14539 #-------------------------------------------------------------------
14540 set_fake_breakpoint();
14542 # be sure we do not extend beyond the current list length
14543 if ( $i_effective_last_comma >= $max_index_to_go ) {
14544 $i_effective_last_comma = $max_index_to_go - 1;
14547 # Set a flag indicating if we need to break open to keep -lp
14548 # items aligned. This is necessary if any of the list terms
14549 # exceeds the available space after the '('.
14550 my $need_lp_break_open = $must_break_open;
14551 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14552 my $columns_if_unbroken =
14553 maximum_line_length($i_opening_minus) -
14554 total_line_length( $i_opening_minus, $i_opening_paren );
14555 $need_lp_break_open =
14556 ( $max_length[0] > $columns_if_unbroken )
14557 || ( $max_length[1] > $columns_if_unbroken )
14558 || ( $first_term_length > $columns_if_unbroken );
14561 # Specify if the list must have an even number of fields or not.
14562 # It is generally safest to assume an even number, because the
14563 # list items might be a hash list. But if we can be sure that
14564 # it is not a hash, then we can allow an odd number for more
14566 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
14568 if ( $identifier_count >= $item_count - 1
14569 || $is_assignment{$next_nonblank_type}
14570 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
14576 # do we have a long first term which should be
14577 # left on a line by itself?
14578 my $use_separate_first_term = (
14579 $odd_or_even == 1 # only if we can use 1 field/line
14580 && $item_count > 3 # need several items
14581 && $first_term_length >
14582 2 * $max_length[0] - 2 # need long first term
14583 && $first_term_length >
14584 2 * $max_length[1] - 2 # need long first term
14587 # or do we know from the type of list that the first term should
14589 if ( !$use_separate_first_term ) {
14590 if ( $is_keyword_with_special_leading_term{$list_type} ) {
14591 $use_separate_first_term = 1;
14593 # should the container be broken open?
14594 if ( $item_count < 3 ) {
14595 if ( $i_first_comma - $i_opening_paren < 4 ) {
14596 ${$rdo_not_break_apart} = 1;
14599 elsif ($first_term_length < 20
14600 && $i_first_comma - $i_opening_paren < 4 )
14602 my $columns = table_columns_available($i_first_comma);
14603 if ( $first_term_length < $columns ) {
14604 ${$rdo_not_break_apart} = 1;
14611 if ($use_separate_first_term) {
14613 # ..set a break and update starting values
14614 $use_separate_first_term = 1;
14615 set_forced_breakpoint($i_first_comma);
14616 $i_opening_paren = $i_first_comma;
14617 $i_first_comma = $rcomma_index->[1];
14619 return if $comma_count == 1;
14620 shift @item_lengths;
14621 shift @i_term_begin;
14623 shift @i_term_comma;
14626 # if not, update the metrics to include the first term
14628 if ( $first_term_length > $max_length[0] ) {
14629 $max_length[0] = $first_term_length;
14633 # Field width parameters
14634 my $pair_width = ( $max_length[0] + $max_length[1] );
14636 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14638 # Number of free columns across the page width for laying out tables
14639 my $columns = table_columns_available($i_first_comma);
14641 # Estimated maximum number of fields which fit this space
14642 # This will be our first guess
14643 my $number_of_fields_max =
14644 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14646 my $number_of_fields = $number_of_fields_max;
14648 # Find the best-looking number of fields
14649 # and make this our second guess if possible
14650 my ( $number_of_fields_best, $ri_ragged_break_list,
14651 $new_identifier_count )
14652 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14655 if ( $number_of_fields_best != 0
14656 && $number_of_fields_best < $number_of_fields_max )
14658 $number_of_fields = $number_of_fields_best;
14661 # ----------------------------------------------------------------------
14662 # If we are crowded and the -lp option is being used, try to
14663 # undo some indentation
14664 # ----------------------------------------------------------------------
14666 $rOpts_line_up_parentheses
14668 $number_of_fields == 0
14669 || ( $number_of_fields == 1
14670 && $number_of_fields != $number_of_fields_best )
14674 my $available_spaces = get_available_spaces_to_go($i_first_comma);
14675 if ( $available_spaces > 0 ) {
14677 my $spaces_wanted = $max_width - $columns; # for 1 field
14679 if ( $number_of_fields_best == 0 ) {
14680 $number_of_fields_best =
14681 get_maximum_fields_wanted( \@item_lengths );
14684 if ( $number_of_fields_best != 1 ) {
14685 my $spaces_wanted_2 =
14686 1 + $pair_width - $columns; # for 2 fields
14687 if ( $available_spaces > $spaces_wanted_2 ) {
14688 $spaces_wanted = $spaces_wanted_2;
14692 if ( $spaces_wanted > 0 ) {
14693 my $deleted_spaces =
14694 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14697 if ( $deleted_spaces > 0 ) {
14698 $columns = table_columns_available($i_first_comma);
14699 $number_of_fields_max =
14700 maximum_number_of_fields( $columns, $odd_or_even,
14701 $max_width, $pair_width );
14702 $number_of_fields = $number_of_fields_max;
14704 if ( $number_of_fields_best == 1
14705 && $number_of_fields >= 1 )
14707 $number_of_fields = $number_of_fields_best;
14714 # try for one column if two won't work
14715 if ( $number_of_fields <= 0 ) {
14716 $number_of_fields = int( $columns / $max_width );
14719 # The user can place an upper bound on the number of fields,
14720 # which can be useful for doing maintenance on tables
14721 if ( $rOpts_maximum_fields_per_table
14722 && $number_of_fields > $rOpts_maximum_fields_per_table )
14724 $number_of_fields = $rOpts_maximum_fields_per_table;
14727 # How many columns (characters) and lines would this container take
14728 # if no additional whitespace were added?
14729 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
14730 $i_effective_last_comma + 1 );
14731 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
14732 my $packed_lines = 1 + int( $packed_columns / $columns );
14734 # are we an item contained in an outer list?
14735 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
14737 if ( $number_of_fields <= 0 ) {
14739 # #---------------------------------------------------------------
14740 # # We're in trouble. We can't find a single field width that works.
14741 # # There is no simple answer here; we may have a single long list
14743 # #---------------------------------------------------------------
14745 # In many cases, it may be best to not force a break if there is just one
14746 # comma, because the standard continuation break logic will do a better
14749 # In the common case that all but one of the terms can fit
14750 # on a single line, it may look better not to break open the
14751 # containing parens. Consider, for example
14755 # sort { $color_value{$::a} <=> $color_value{$::b}; }
14758 # which will look like this with the container broken:
14762 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
14765 # Here is an example of this rule for a long last term:
14767 # log_message( 0, 256, 128,
14768 # "Number of routes in adj-RIB-in to be considered: $peercount" );
14770 # And here is an example with a long first term:
14773 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
14774 # $r, $pu, $ps, $cu, $cs, $tt
14776 # if $style eq 'all';
14778 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
14779 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
14780 my $long_first_term =
14781 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
14783 # break at every comma ...
14786 # if requested by user or is best looking
14787 $number_of_fields_best == 1
14789 # or if this is a sublist of a larger list
14790 || $in_hierarchical_list
14792 # or if multiple commas and we don't have a long first or last
14794 || ( $comma_count > 1
14795 && !( $long_last_term || $long_first_term ) )
14798 foreach ( 0 .. $comma_count - 1 ) {
14799 set_forced_breakpoint( $rcomma_index->[$_] );
14802 elsif ($long_last_term) {
14804 set_forced_breakpoint($i_last_comma);
14805 ${$rdo_not_break_apart} = 1 unless $must_break_open;
14807 elsif ($long_first_term) {
14809 set_forced_breakpoint($i_first_comma);
14813 # let breaks be defined by default bond strength logic
14818 # --------------------------------------------------------
14819 # We have a tentative field count that seems to work.
14820 # How many lines will this require?
14821 # --------------------------------------------------------
14822 my $formatted_lines = $item_count / ($number_of_fields);
14823 if ( $formatted_lines != int $formatted_lines ) {
14824 $formatted_lines = 1 + int $formatted_lines;
14827 # So far we've been trying to fill out to the right margin. But
14828 # compact tables are easier to read, so let's see if we can use fewer
14829 # fields without increasing the number of lines.
14830 $number_of_fields =
14831 compactify_table( $item_count, $number_of_fields, $formatted_lines,
14834 # How many spaces across the page will we fill?
14835 my $columns_per_line =
14836 ( int $number_of_fields / 2 ) * $pair_width +
14837 ( $number_of_fields % 2 ) * $max_width;
14839 my $formatted_columns;
14841 if ( $number_of_fields > 1 ) {
14842 $formatted_columns =
14843 ( $pair_width * ( int( $item_count / 2 ) ) +
14844 ( $item_count % 2 ) * $max_width );
14847 $formatted_columns = $max_width * $item_count;
14849 if ( $formatted_columns < $packed_columns ) {
14850 $formatted_columns = $packed_columns;
14853 my $unused_columns = $formatted_columns - $packed_columns;
14855 # set some empirical parameters to help decide if we should try to
14856 # align; high sparsity does not look good, especially with few lines
14857 my $sparsity = ($unused_columns) / ($formatted_columns);
14858 my $max_allowed_sparsity =
14859 ( $item_count < 3 ) ? 0.1
14860 : ( $packed_lines == 1 ) ? 0.15
14861 : ( $packed_lines == 2 ) ? 0.4
14864 # Begin check for shortcut methods, which avoid treating a list
14865 # as a table for relatively small parenthesized lists. These
14866 # are usually easier to read if not formatted as tables.
14868 $packed_lines <= 2 # probably can fit in 2 lines
14869 && $item_count < 9 # doesn't have too many items
14870 && $opening_environment eq 'BLOCK' # not a sub-container
14871 && $opening_token eq '(' # is paren list
14875 # Shortcut method 1: for -lp and just one comma:
14876 # This is a no-brainer, just break at the comma.
14878 $rOpts_line_up_parentheses # -lp
14879 && $item_count == 2 # two items, one comma
14880 && !$must_break_open
14883 my $i_break = $rcomma_index->[0];
14884 set_forced_breakpoint($i_break);
14885 ${$rdo_not_break_apart} = 1;
14890 # method 2 is for most small ragged lists which might look
14891 # best if not displayed as a table.
14893 ( $number_of_fields == 2 && $item_count == 3 )
14895 $new_identifier_count > 0 # isn't all quotes
14896 && $sparsity > 0.15
14897 ) # would be fairly spaced gaps if aligned
14901 my $break_count = set_ragged_breakpoints( \@i_term_comma,
14902 $ri_ragged_break_list );
14903 ++$break_count if ($use_separate_first_term);
14905 # NOTE: we should really use the true break count here,
14906 # which can be greater if there are large terms and
14907 # little space, but usually this will work well enough.
14908 unless ($must_break_open) {
14910 if ( $break_count <= 1 ) {
14911 ${$rdo_not_break_apart} = 1;
14913 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14915 ${$rdo_not_break_apart} = 1;
14921 } # end shortcut methods
14925 FORMATTER_DEBUG_FLAG_SPARSE && do {
14927 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
14931 #---------------------------------------------------------------
14932 # Compound List Rule 2:
14933 # If this list is too long for one line, and it is an item of a
14934 # larger list, then we must format it, regardless of sparsity
14935 # (ian.t). One reason that we have to do this is to trigger
14936 # Compound List Rule 1, above, which causes breaks at all commas of
14937 # all outer lists. In this way, the structure will be properly
14939 #---------------------------------------------------------------
14941 # Decide if this list is too long for one line unless broken
14942 my $total_columns = table_columns_available($i_opening_paren);
14943 my $too_long = $packed_columns > $total_columns;
14945 # For a paren list, include the length of the token just before the
14946 # '(' because this is likely a sub call, and we would have to
14947 # include the sub name on the same line as the list. This is still
14948 # imprecise, but not too bad. (steve.t)
14949 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
14951 $too_long = excess_line_length( $i_opening_minus,
14952 $i_effective_last_comma + 1 ) > 0;
14955 # FIXME: For an item after a '=>', try to include the length of the
14956 # thing before the '=>'. This is crude and should be improved by
14957 # actually looking back token by token.
14958 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
14959 my $i_opening_minus = $i_opening_paren - 4;
14960 if ( $i_opening_minus >= 0 ) {
14961 $too_long = excess_line_length( $i_opening_minus,
14962 $i_effective_last_comma + 1 ) > 0;
14966 # Always break lists contained in '[' and '{' if too long for 1 line,
14967 # and always break lists which are too long and part of a more complex
14969 my $must_break_open_container = $must_break_open
14971 && ( $in_hierarchical_list || $opening_token ne '(' ) );
14973 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
14975 #---------------------------------------------------------------
14976 # The main decision:
14977 # Now decide if we will align the data into aligned columns. Do not
14978 # attempt to align columns if this is a tiny table or it would be
14979 # too spaced. It seems that the more packed lines we have, the
14980 # sparser the list that can be allowed and still look ok.
14981 #---------------------------------------------------------------
14983 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
14984 || ( $formatted_lines < 2 )
14985 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
14989 #---------------------------------------------------------------
14990 # too sparse: would look ugly if aligned in a table;
14991 #---------------------------------------------------------------
14993 # use old breakpoints if this is a 'big' list
14994 # FIXME: goal is to improve set_ragged_breakpoints so that
14995 # this is not necessary.
14996 if ( $packed_lines > 2 && $item_count > 10 ) {
14997 write_logfile_entry("List sparse: using old breakpoints\n");
14998 copy_old_breakpoints( $i_first_comma, $i_last_comma );
15001 # let the continuation logic handle it if 2 lines
15004 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15005 $ri_ragged_break_list );
15006 ++$break_count if ($use_separate_first_term);
15008 unless ($must_break_open_container) {
15009 if ( $break_count <= 1 ) {
15010 ${$rdo_not_break_apart} = 1;
15012 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15014 ${$rdo_not_break_apart} = 1;
15021 #---------------------------------------------------------------
15022 # go ahead and format as a table
15023 #---------------------------------------------------------------
15024 write_logfile_entry(
15025 "List: auto formatting with $number_of_fields fields/row\n");
15027 my $j_first_break =
15028 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15031 my $j = $j_first_break ;
15032 $j < $comma_count ;
15033 $j += $number_of_fields
15036 my $i = $rcomma_index->[$j];
15037 set_forced_breakpoint($i);
15043 sub study_list_complexity {
15045 # Look for complex tables which should be formatted with one term per line.
15046 # Returns the following:
15048 # \@i_ragged_break_list = list of good breakpoints to avoid lines
15049 # which are hard to read
15050 # $number_of_fields_best = suggested number of fields based on
15051 # complexity; = 0 if any number may be used.
15053 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15054 my $item_count = @{$ri_term_begin};
15055 my $complex_item_count = 0;
15056 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15057 my $i_max = @{$ritem_lengths} - 1;
15058 ##my @item_complexity;
15060 my $i_last_last_break = -3;
15061 my $i_last_break = -2;
15062 my @i_ragged_break_list;
15064 my $definitely_complex = 30;
15065 my $definitely_simple = 12;
15066 my $quote_count = 0;
15068 for my $i ( 0 .. $i_max ) {
15069 my $ib = $ri_term_begin->[$i];
15070 my $ie = $ri_term_end->[$i];
15072 # define complexity: start with the actual term length
15073 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15075 ##TBD: join types here and check for variations
15076 ##my $str=join "", @tokens_to_go[$ib..$ie];
15079 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
15083 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
15087 if ( $ib eq $ie ) {
15088 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
15089 $complex_item_count++;
15090 $weighted_length *= 2;
15096 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
15097 $complex_item_count++;
15098 $weighted_length *= 2;
15100 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
15101 $weighted_length += 4;
15105 # add weight for extra tokens.
15106 $weighted_length += 2 * ( $ie - $ib );
15108 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
15109 ## print "# COMPLEXITY:$weighted_length $BUB\n";
15111 ##push @item_complexity, $weighted_length;
15113 # now mark a ragged break after this item it if it is 'long and
15115 if ( $weighted_length >= $definitely_complex ) {
15117 # if we broke after the previous term
15118 # then break before it too
15119 if ( $i_last_break == $i - 1
15121 && $i_last_last_break != $i - 2 )
15124 ## FIXME: don't strand a small term
15125 pop @i_ragged_break_list;
15126 push @i_ragged_break_list, $i - 2;
15127 push @i_ragged_break_list, $i - 1;
15130 push @i_ragged_break_list, $i;
15131 $i_last_last_break = $i_last_break;
15132 $i_last_break = $i;
15135 # don't break before a small last term -- it will
15136 # not look good on a line by itself.
15137 elsif ($i == $i_max
15138 && $i_last_break == $i - 1
15139 && $weighted_length <= $definitely_simple )
15141 pop @i_ragged_break_list;
15145 my $identifier_count = $i_max + 1 - $quote_count;
15147 # Need more tuning here..
15148 if ( $max_width > 12
15149 && $complex_item_count > $item_count / 2
15150 && $number_of_fields_best != 2 )
15152 $number_of_fields_best = 1;
15155 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
15158 sub get_maximum_fields_wanted {
15160 # Not all tables look good with more than one field of items.
15161 # This routine looks at a table and decides if it should be
15162 # formatted with just one field or not.
15163 # This coding is still under development.
15164 my ($ritem_lengths) = @_;
15166 my $number_of_fields_best = 0;
15168 # For just a few items, we tentatively assume just 1 field.
15169 my $item_count = @{$ritem_lengths};
15170 if ( $item_count <= 5 ) {
15171 $number_of_fields_best = 1;
15174 # For larger tables, look at it both ways and see what looks best
15178 my @max_length = ( 0, 0 );
15179 my @last_length_2 = ( undef, undef );
15180 my @first_length_2 = ( undef, undef );
15181 my $last_length = undef;
15182 my $total_variation_1 = 0;
15183 my $total_variation_2 = 0;
15184 my @total_variation_2 = ( 0, 0 );
15186 foreach my $j ( 0 .. $item_count - 1 ) {
15188 $is_odd = 1 - $is_odd;
15189 my $length = $ritem_lengths->[$j];
15190 if ( $length > $max_length[$is_odd] ) {
15191 $max_length[$is_odd] = $length;
15194 if ( defined($last_length) ) {
15195 my $dl = abs( $length - $last_length );
15196 $total_variation_1 += $dl;
15198 $last_length = $length;
15200 my $ll = $last_length_2[$is_odd];
15201 if ( defined($ll) ) {
15202 my $dl = abs( $length - $ll );
15203 $total_variation_2[$is_odd] += $dl;
15206 $first_length_2[$is_odd] = $length;
15208 $last_length_2[$is_odd] = $length;
15210 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
15212 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
15213 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
15214 $number_of_fields_best = 1;
15217 return ($number_of_fields_best);
15220 sub table_columns_available {
15221 my $i_first_comma = shift;
15223 maximum_line_length($i_first_comma) -
15224 leading_spaces_to_go($i_first_comma);
15226 # Patch: the vertical formatter does not line up lines whose lengths
15227 # exactly equal the available line length because of allowances
15228 # that must be made for side comments. Therefore, the number of
15229 # available columns is reduced by 1 character.
15234 sub maximum_number_of_fields {
15236 # how many fields will fit in the available space?
15237 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
15238 my $max_pairs = int( $columns / $pair_width );
15239 my $number_of_fields = $max_pairs * 2;
15240 if ( $odd_or_even == 1
15241 && $max_pairs * $pair_width + $max_width <= $columns )
15243 $number_of_fields++;
15245 return $number_of_fields;
15248 sub compactify_table {
15250 # given a table with a certain number of fields and a certain number
15251 # of lines, see if reducing the number of fields will make it look
15253 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
15254 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
15258 $min_fields = $number_of_fields ;
15259 $min_fields >= $odd_or_even
15260 && $min_fields * $formatted_lines >= $item_count ;
15261 $min_fields -= $odd_or_even
15264 $number_of_fields = $min_fields;
15267 return $number_of_fields;
15270 sub set_ragged_breakpoints {
15272 # Set breakpoints in a list that cannot be formatted nicely as a
15274 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
15276 my $break_count = 0;
15277 foreach ( @{$ri_ragged_break_list} ) {
15278 my $j = $ri_term_comma->[$_];
15280 set_forced_breakpoint($j);
15284 return $break_count;
15287 sub copy_old_breakpoints {
15288 my ( $i_first_comma, $i_last_comma ) = @_;
15289 for my $i ( $i_first_comma .. $i_last_comma ) {
15290 if ( $old_breakpoint_to_go[$i] ) {
15291 set_forced_breakpoint($i);
15298 my ( $i, $j ) = @_;
15299 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
15301 FORMATTER_DEBUG_FLAG_NOBREAK && do {
15302 my ( $a, $b, $c ) = caller();
15304 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
15307 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
15310 # shouldn't happen; non-critical error
15312 FORMATTER_DEBUG_FLAG_NOBREAK && do {
15313 my ( $a, $b, $c ) = caller();
15315 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
15321 sub set_fake_breakpoint {
15323 # Just bump up the breakpoint count as a signal that there are breaks.
15324 # This is useful if we have breaks but may want to postpone deciding where
15326 $forced_breakpoint_count++;
15330 sub set_forced_breakpoint {
15333 return unless defined $i && $i >= 0;
15335 # no breaks between welded tokens
15336 return if ( weld_len_right_to_go($i) );
15338 # when called with certain tokens, use bond strengths to decide
15339 # if we break before or after it
15340 my $token = $tokens_to_go[$i];
15342 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
15343 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15346 # breaks are forced before 'if' and 'unless'
15347 elsif ( $is_if_unless{$token} ) { $i-- }
15349 if ( $i >= 0 && $i <= $max_index_to_go ) {
15350 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15352 FORMATTER_DEBUG_FLAG_FORCE && do {
15353 my ( $a, $b, $c ) = caller();
15355 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
15358 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
15359 $forced_breakpoint_to_go[$i_nonblank] = 1;
15361 if ( $i_nonblank > $index_max_forced_break ) {
15362 $index_max_forced_break = $i_nonblank;
15364 $forced_breakpoint_count++;
15365 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
15368 # if we break at an opening container..break at the closing
15369 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
15370 set_closing_breakpoint($i_nonblank);
15377 sub clear_breakpoint_undo_stack {
15378 $forced_breakpoint_undo_count = 0;
15382 sub undo_forced_breakpoint_stack {
15384 my $i_start = shift;
15385 if ( $i_start < 0 ) {
15387 my ( $a, $b, $c ) = caller();
15389 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
15393 while ( $forced_breakpoint_undo_count > $i_start ) {
15395 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15396 if ( $i >= 0 && $i <= $max_index_to_go ) {
15397 $forced_breakpoint_to_go[$i] = 0;
15398 $forced_breakpoint_count--;
15400 FORMATTER_DEBUG_FLAG_UNDOBP && do {
15401 my ( $a, $b, $c ) = caller();
15403 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
15407 # shouldn't happen, but not a critical error
15409 FORMATTER_DEBUG_FLAG_UNDOBP && do {
15410 my ( $a, $b, $c ) = caller();
15412 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
15420 my ( $self, $i ) = @_;
15422 # Keep tokens in the rLL array in sync with the _to_go array
15423 my $rLL = $self->{rLL};
15424 my $K = $K_to_go[$i];
15425 if ( defined($K) ) {
15426 $rLL->[$K]->[_TOKEN_] = $tokens_to_go[$i];
15434 { # begin recombine_breakpoints
15446 @is_amp_amp{@q} = (1) x scalar(@q);
15449 @is_ternary{@q} = (1) x scalar(@q);
15451 @q = qw( + - * / );
15452 @is_math_op{@q} = (1) x scalar(@q);
15455 @is_plus_minus{@q} = (1) x scalar(@q);
15458 @is_mult_div{@q} = (1) x scalar(@q);
15461 sub DUMP_BREAKPOINTS {
15463 # Debug routine to dump current breakpoints...not normally called
15464 # We are given indexes to the current lines:
15465 # $ri_beg = ref to array of BEGinning indexes of each line
15466 # $ri_end = ref to array of ENDing indexes of each line
15467 my ( $ri_beg, $ri_end, $msg ) = @_;
15468 print STDERR "----Dumping breakpoints from: $msg----\n";
15469 for my $n ( 0 .. @{$ri_end} - 1 ) {
15470 my $ibeg = $ri_beg->[$n];
15471 my $iend = $ri_end->[$n];
15473 foreach my $i ( $ibeg .. $iend ) {
15474 $text .= $tokens_to_go[$i];
15476 print STDERR "$n ($ibeg:$iend) $text\n";
15478 print STDERR "----\n";
15482 sub delete_one_line_semicolons {
15484 my ( $self, $ri_beg, $ri_end ) = @_;
15485 my $rLL = $self->{rLL};
15486 my $K_opening_container = $self->{K_opening_container};
15488 # Walk down the lines of this batch and delete any semicolons
15489 # terminating one-line blocks;
15490 my $nmax = @{$ri_end} - 1;
15492 foreach my $n ( 0 .. $nmax ) {
15493 my $i_beg = $ri_beg->[$n];
15494 my $i_e = $ri_end->[$n];
15495 my $K_beg = $K_to_go[$i_beg];
15496 my $K_e = $K_to_go[$i_e];
15498 my $type_end = $rLL->[$K_end]->[_TYPE_];
15499 if ( $type_end eq '#' ) {
15500 $K_end = $self->K_previous_nonblank($K_end);
15501 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
15504 # we are looking for a line ending in closing brace
15506 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
15508 # ...and preceded by a semicolon on the same line
15509 my $K_semicolon = $self->K_previous_nonblank($K_end);
15510 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
15511 next if ( $i_semicolon <= $i_beg );
15512 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
15514 # safety check - shouldn't happen
15515 if ( $types_to_go[$i_semicolon] ne ';' ) {
15516 Fault("unexpected type looking for semicolon, ignoring");
15520 # ... with the corresponding opening brace on the same line
15521 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
15522 my $K_opening = $K_opening_container->{$type_sequence};
15523 my $i_opening = $i_beg + ( $K_opening - $K_beg );
15524 next if ( $i_opening < $i_beg );
15526 # ... and only one semicolon between these braces
15527 my $semicolon_count = 0;
15528 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
15529 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
15530 $semicolon_count++;
15534 next if ($semicolon_count);
15536 # ...ok, then make the semicolon invisible
15537 $tokens_to_go[$i_semicolon] = "";
15542 sub unmask_phantom_semicolons {
15544 my ( $self, $ri_beg, $ri_end ) = @_;
15546 # Walk down the lines of this batch and unmask any invisible line-ending
15547 # semicolons. They were placed by sub respace_tokens but we only now
15548 # know if we actually need them.
15550 my $nmax = @{$ri_end} - 1;
15551 foreach my $n ( 0 .. $nmax ) {
15553 my $i = $ri_end->[$n];
15554 if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
15556 $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;';
15557 $self->sync_token_K($i);
15559 my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] );
15560 note_added_semicolon($line_number);
15566 sub recombine_breakpoints {
15568 # sub set_continuation_breaks is very liberal in setting line breaks
15569 # for long lines, always setting breaks at good breakpoints, even
15570 # when that creates small lines. Sometimes small line fragments
15571 # are produced which would look better if they were combined.
15572 # That's the task of this routine.
15574 # We are given indexes to the current lines:
15575 # $ri_beg = ref to array of BEGinning indexes of each line
15576 # $ri_end = ref to array of ENDing indexes of each line
15577 my ( $ri_beg, $ri_end ) = @_;
15579 # Make a list of all good joining tokens between the lines
15582 my $nmax = @{$ri_end} - 1;
15583 for my $n ( 1 .. $nmax ) {
15584 my $ibeg_1 = $ri_beg->[ $n - 1 ];
15585 my $iend_1 = $ri_end->[ $n - 1 ];
15586 my $iend_2 = $ri_end->[$n];
15587 my $ibeg_2 = $ri_beg->[$n];
15589 my ( $itok, $itokp, $itokm );
15591 foreach my $itest ( $iend_1, $ibeg_2 ) {
15592 my $type = $types_to_go[$itest];
15593 if ( $is_math_op{$type}
15594 || $is_amp_amp{$type}
15595 || $is_assignment{$type}
15601 $joint[$n] = [$itok];
15604 my $more_to_do = 1;
15606 # We keep looping over all of the lines of this batch
15607 # until there are no more possible recombinations
15608 my $nmax_last = @{$ri_end};
15610 while ($more_to_do) {
15613 my $nmax = @{$ri_end} - 1;
15615 # Safety check for infinite loop
15616 unless ( $nmax < $nmax_last ) {
15618 # Shouldn't happen because splice below decreases nmax on each
15620 Fault("Program bug-infinite loop in recombine breakpoints\n");
15622 $nmax_last = $nmax;
15624 my $skip_Section_3;
15625 my $leading_amp_count = 0;
15626 my $this_line_is_semicolon_terminated;
15628 # loop over all remaining lines in this batch
15629 for my $iter ( 1 .. $nmax ) {
15631 # alternating sweep direction gives symmetric results
15632 # for recombining lines which exceed the line length
15633 # such as eval {{{{.... }}}}
15635 if ($reverse) { $n = 1 + $nmax - $iter; }
15636 else { $n = $iter }
15638 #----------------------------------------------------------
15639 # If we join the current pair of lines,
15640 # line $n-1 will become the left part of the joined line
15641 # line $n will become the right part of the joined line
15643 # Here are Indexes of the endpoint tokens of the two lines:
15645 # -----line $n-1--- | -----line $n-----
15646 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
15649 # We want to decide if we should remove the line break
15650 # between the tokens at $iend_1 and $ibeg_2
15652 # We will apply a number of ad-hoc tests to see if joining
15653 # here will look ok. The code will just issue a 'next'
15654 # command if the join doesn't look good. If we get through
15655 # the gauntlet of tests, the lines will be recombined.
15656 #----------------------------------------------------------
15658 # beginning and ending tokens of the lines we are working on
15659 my $ibeg_1 = $ri_beg->[ $n - 1 ];
15660 my $iend_1 = $ri_end->[ $n - 1 ];
15661 my $iend_2 = $ri_end->[$n];
15662 my $ibeg_2 = $ri_beg->[$n];
15663 my $ibeg_nmax = $ri_beg->[$nmax];
15665 # combined line cannot be too long
15666 my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
15667 next if ( $excess > 0 );
15669 my $type_iend_1 = $types_to_go[$iend_1];
15670 my $type_iend_2 = $types_to_go[$iend_2];
15671 my $type_ibeg_1 = $types_to_go[$ibeg_1];
15672 my $type_ibeg_2 = $types_to_go[$ibeg_2];
15674 # terminal token of line 2 if any side comment is ignored:
15675 my $iend_2t = $iend_2;
15676 my $type_iend_2t = $type_iend_2;
15678 # some beginning indexes of other lines, which may not exist
15679 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
15680 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
15681 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
15685 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15686 # $nesting_depth_to_go[$ibeg_1] );
15688 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
15690 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
15693 # If line $n is the last line, we set some flags and
15694 # do any special checks for it
15695 if ( $n == $nmax ) {
15697 # a terminal '{' should stay where it is
15698 # unless preceded by a fat comma
15699 next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
15701 if ( $type_iend_2 eq '#'
15702 && $iend_2 - $ibeg_2 >= 2
15703 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
15705 $iend_2t = $iend_2 - 2;
15706 $type_iend_2t = $types_to_go[$iend_2t];
15709 $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
15712 #----------------------------------------------------------
15713 # Recombine Section 0:
15714 # Examine the special token joining this line pair, if any.
15715 # Put as many tests in this section to avoid duplicate code and
15716 # to make formatting independent of whether breaks are to the
15717 # left or right of an operator.
15718 #----------------------------------------------------------
15720 my ($itok) = @{ $joint[$n] };
15723 # FIXME: Patch - may not be necessary
15725 $type_iend_1 eq 'b'
15730 $type_iend_2 eq 'b'
15735 my $type = $types_to_go[$itok];
15737 if ( $type eq ':' ) {
15739 # do not join at a colon unless it disobeys the break request
15740 if ( $itok eq $iend_1 ) {
15741 next unless $want_break_before{$type};
15744 $leading_amp_count++;
15745 next if $want_break_before{$type};
15749 # handle math operators + - * /
15750 elsif ( $is_math_op{$type} ) {
15752 # Combine these lines if this line is a single
15753 # number, or if it is a short term with same
15754 # operator as the previous line. For example, in
15755 # the following code we will combine all of the
15756 # short terms $A, $B, $C, $D, $E, $F, together
15757 # instead of leaving them one per line:
15759 # $A * $B * $C * $D * $E * $F *
15760 # ( 2. * $eps * $sigma * $area ) *
15761 # ( 1. / $tcold**3 - 1. / $thot**3 );
15763 # This can be important in math-intensive code.
15767 my $itokp = min( $inext_to_go[$itok], $iend_2 );
15768 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
15769 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
15770 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
15772 # check for a number on the right
15773 if ( $types_to_go[$itokp] eq 'n' ) {
15775 # ok if nothing else on right
15776 if ( $itokp == $iend_2 ) {
15781 # look one more token to right..
15782 # okay if math operator or some termination
15784 ( ( $itokpp == $iend_2 )
15785 && $is_math_op{ $types_to_go[$itokpp] } )
15786 || $types_to_go[$itokpp] =~ /^[#,;]$/;
15790 # check for a number on the left
15791 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
15793 # okay if nothing else to left
15794 if ( $itokm == $ibeg_1 ) {
15798 # otherwise look one more token to left
15801 # okay if math operator, comma, or assignment
15802 $good_combo = ( $itokmm == $ibeg_1 )
15803 && ( $is_math_op{ $types_to_go[$itokmm] }
15804 || $types_to_go[$itokmm] =~ /^[,]$/
15805 || $is_assignment{ $types_to_go[$itokmm] }
15810 # look for a single short token either side of the
15812 if ( !$good_combo ) {
15814 # Slight adjustment factor to make results
15815 # independent of break before or after operator in
15816 # long summed lists. (An operator and a space make
15818 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
15822 # numbers or id's on both sides of this joint
15823 $types_to_go[$itokp] =~ /^[in]$/
15824 && $types_to_go[$itokm] =~ /^[in]$/
15826 # one of the two lines must be short:
15829 # no more than 2 nonblank tokens right of
15834 && token_sequence_length( $itokp, $iend_2 )
15836 $rOpts_short_concatenation_item_length
15839 # no more than 2 nonblank tokens left of
15844 && token_sequence_length( $ibeg_1, $itokm )
15846 $rOpts_short_concatenation_item_length
15851 # keep pure terms; don't mix +- with */
15853 $is_plus_minus{$type}
15854 && ( $is_mult_div{ $types_to_go[$itokmm] }
15855 || $is_mult_div{ $types_to_go[$itokpp] } )
15858 $is_mult_div{$type}
15859 && ( $is_plus_minus{ $types_to_go[$itokmm] }
15860 || $is_plus_minus{ $types_to_go[$itokpp] } )
15866 # it is also good to combine if we can reduce to 2 lines
15867 if ( !$good_combo ) {
15869 # index on other line where same token would be in a
15872 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
15877 && $types_to_go[$iother] ne $type;
15880 next unless ($good_combo);
15884 elsif ( $is_amp_amp{$type} ) {
15888 elsif ( $is_assignment{$type} ) {
15890 } ## end assignment
15893 #----------------------------------------------------------
15894 # Recombine Section 1:
15895 # Join welded nested containers immediately
15896 #----------------------------------------------------------
15897 if ( weld_len_right_to_go($iend_1)
15898 || weld_len_left_to_go($ibeg_2) )
15902 # Old coding alternated sweep direction: no longer needed
15903 # $reverse = 1 - $reverse;
15908 #----------------------------------------------------------
15909 # Recombine Section 2:
15910 # Examine token at $iend_1 (right end of first line of pair)
15911 #----------------------------------------------------------
15913 # an isolated '}' may join with a ';' terminated segment
15914 if ( $type_iend_1 eq '}' ) {
15916 # Check for cases where combining a semicolon terminated
15917 # statement with a previous isolated closing paren will
15918 # allow the combined line to be outdented. This is
15919 # generally a good move. For example, we can join up
15920 # the last two lines here:
15922 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15923 # $size, $atime, $mtime, $ctime, $blksize, $blocks
15929 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15930 # $size, $atime, $mtime, $ctime, $blksize, $blocks
15933 # which makes the parens line up.
15935 # Another example, from Joe Matarazzo, probably looks best
15936 # with the 'or' clause appended to the trailing paren:
15937 # $self->some_method(
15940 # ) or die "Some_method didn't work";
15942 # But we do not want to do this for something like the -lp
15943 # option where the paren is not outdentable because the
15944 # trailing clause will be far to the right.
15946 # The logic here is synchronized with the logic in sub
15947 # sub set_adjusted_indentation, which actually does
15950 $skip_Section_3 ||= $this_line_is_semicolon_terminated
15952 # only one token on last line
15953 && $ibeg_1 == $iend_1
15955 # must be structural paren
15956 && $tokens_to_go[$iend_1] eq ')'
15958 # style must allow outdenting,
15959 && !$closing_token_indentation{')'}
15961 # only leading '&&', '||', and ':' if no others seen
15962 # (but note: our count made below could be wrong
15963 # due to intervening comments)
15964 && ( $leading_amp_count == 0
15965 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
15967 # but leading colons probably line up with a
15968 # previous colon or question (count could be wrong).
15969 && $type_ibeg_2 ne ':'
15971 # only one step in depth allowed. this line must not
15972 # begin with a ')' itself.
15973 && ( $nesting_depth_to_go[$iend_1] ==
15974 $nesting_depth_to_go[$iend_2] + 1 );
15976 # YVES patch 2 of 2:
15977 # Allow cuddled eval chains, like this:
15984 # This patch works together with a patch in
15985 # setting adjusted indentation (where the closing eval
15986 # brace is outdented if possible).
15987 # The problem is that an 'eval' block has continuation
15988 # indentation and it looks better to undo it in some
15989 # cases. If we do not use this patch we would get:
15997 # The alternative, for uncuddled style, is to create
15998 # a patch in set_adjusted_indentation which undoes
15999 # the indentation of a leading line like 'or do {'.
16000 # This doesn't work well with -icb through
16002 $block_type_to_go[$iend_1] eq 'eval'
16003 && !$rOpts->{'line-up-parentheses'}
16004 && !$rOpts->{'indent-closing-brace'}
16005 && $tokens_to_go[$iend_2] eq '{'
16007 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
16008 || ( $type_ibeg_2 eq 'k'
16009 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16010 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16014 $skip_Section_3 ||= 1;
16021 # handle '.' and '?' specially below
16022 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
16026 elsif ( $type_iend_1 eq '{' ) {
16029 # honor breaks at opening brace
16030 # Added to prevent recombining something like this:
16031 # } || eval { package main;
16032 next if $forced_breakpoint_to_go[$iend_1];
16035 # do not recombine lines with ending &&, ||,
16036 elsif ( $is_amp_amp{$type_iend_1} ) {
16037 next unless $want_break_before{$type_iend_1};
16040 # Identify and recombine a broken ?/: chain
16041 elsif ( $type_iend_1 eq '?' ) {
16043 # Do not recombine different levels
16045 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16047 # do not recombine unless next line ends in :
16048 next unless $type_iend_2 eq ':';
16051 # for lines ending in a comma...
16052 elsif ( $type_iend_1 eq ',' ) {
16054 # Do not recombine at comma which is following the
16056 # TODO: might be best to make a special flag
16057 next if ( $old_breakpoint_to_go[$iend_1] );
16059 # an isolated '},' may join with an identifier + ';'
16060 # this is useful for the class of a 'bless' statement (bless.t)
16061 if ( $type_ibeg_1 eq '}'
16062 && $type_ibeg_2 eq 'i' )
16065 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16066 && ( $iend_2 == ( $ibeg_2 + 1 ) )
16067 && $this_line_is_semicolon_terminated );
16069 # override breakpoint
16070 $forced_breakpoint_to_go[$iend_1] = 0;
16076 # do not recombine after a comma unless this will leave
16078 next unless ( $n + 1 >= $nmax );
16080 # do not recombine if there is a change in indentation depth
16083 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
16085 # do not recombine a "complex expression" after a
16086 # comma. "complex" means no parens.
16088 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16089 if ( $tokens_to_go[$ii] eq '(' ) {
16094 next if $saw_paren;
16099 elsif ( $type_iend_1 eq '(' ) {
16101 # No longer doing this
16104 elsif ( $type_iend_1 eq ')' ) {
16106 # No longer doing this
16109 # keep a terminal for-semicolon
16110 elsif ( $type_iend_1 eq 'f' ) {
16114 # if '=' at end of line ...
16115 elsif ( $is_assignment{$type_iend_1} ) {
16117 # keep break after = if it was in input stream
16118 # this helps prevent 'blinkers'
16119 next if $old_breakpoint_to_go[$iend_1]
16121 # don't strand an isolated '='
16122 && $iend_1 != $ibeg_1;
16124 my $is_short_quote =
16125 ( $type_ibeg_2 eq 'Q'
16126 && $ibeg_2 == $iend_2
16127 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
16128 $rOpts_short_concatenation_item_length );
16130 ( $type_ibeg_1 eq '?'
16131 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
16133 # always join an isolated '=', a short quote, or if this
16134 # will put ?/: at start of adjacent lines
16135 if ( $ibeg_1 != $iend_1
16136 && !$is_short_quote
16143 # unless we can reduce this to two lines
16146 # or three lines, the last with a leading semicolon
16147 || ( $nmax == $n + 2
16148 && $types_to_go[$ibeg_nmax] eq ';' )
16150 # or the next line ends with a here doc
16151 || $type_iend_2 eq 'h'
16153 # or the next line ends in an open paren or brace
16154 # and the break hasn't been forced [dima.t]
16155 || ( !$forced_breakpoint_to_go[$iend_1]
16156 && $type_iend_2 eq '{' )
16159 # do not recombine if the two lines might align well
16160 # this is a very approximate test for this
16163 # RT#127633 - the leading tokens are not operators
16164 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
16166 # or they are different
16168 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
16174 # Recombine if we can make two lines
16177 # -lp users often prefer this:
16178 # my $title = function($env, $env, $sysarea,
16179 # "bubba Borrower Entry");
16180 # so we will recombine if -lp is used we have
16182 && ( !$rOpts_line_up_parentheses
16183 || $type_iend_2 ne ',' )
16187 # otherwise, scan the rhs line up to last token for
16188 # complexity. Note that we are not counting the last
16189 # token in case it is an opening paren.
16191 my $depth = $nesting_depth_to_go[$ibeg_2];
16192 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
16193 if ( $nesting_depth_to_go[$i] != $depth ) {
16195 last if ( $tv > 1 );
16197 $depth = $nesting_depth_to_go[$i];
16200 # ok to recombine if no level changes before last token
16203 # otherwise, do not recombine if more than two
16205 next if ( $tv > 1 );
16207 # check total complexity of the two adjacent lines
16208 # that will occur if we do this join
16211 ? $ri_end->[ $n + 1 ]
16213 foreach my $i ( $iend_2 .. $istop ) {
16214 if ( $nesting_depth_to_go[$i] != $depth ) {
16216 last if ( $tv > 2 );
16218 $depth = $nesting_depth_to_go[$i];
16221 # do not recombine if total is more than 2 level changes
16222 next if ( $tv > 2 );
16227 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16228 $forced_breakpoint_to_go[$iend_1] = 0;
16233 elsif ( $type_iend_1 eq 'k' ) {
16235 # make major control keywords stand out
16240 #/^(last|next|redo|return)$/
16241 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16243 # but only if followed by multiple lines
16247 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16249 unless $want_break_before{ $tokens_to_go[$iend_1] };
16253 #----------------------------------------------------------
16254 # Recombine Section 3:
16255 # Examine token at $ibeg_2 (left end of second line of pair)
16256 #----------------------------------------------------------
16258 # join lines identified above as capable of
16259 # causing an outdented line with leading closing paren
16260 # Note that we are skipping the rest of this section
16261 # and the rest of the loop to do the join
16262 if ($skip_Section_3) {
16263 $forced_breakpoint_to_go[$iend_1] = 0;
16268 # handle lines with leading &&, ||
16269 elsif ( $is_amp_amp{$type_ibeg_2} ) {
16271 $leading_amp_count++;
16273 # ok to recombine if it follows a ? or :
16274 # and is followed by an open paren..
16276 ( $is_ternary{$type_ibeg_1}
16277 && $tokens_to_go[$iend_2] eq '(' )
16279 # or is followed by a ? or : at same depth
16281 # We are looking for something like this. We can
16282 # recombine the && line with the line above to make the
16283 # structure more clear:
16285 # exists $G->{Attr}->{V}
16286 # && exists $G->{Attr}->{V}->{$u}
16287 # ? %{ $G->{Attr}->{V}->{$u} }
16290 # We should probably leave something like this alone:
16292 # exists $G->{Attr}->{E}
16293 # && exists $G->{Attr}->{E}->{$u}
16294 # && exists $G->{Attr}->{E}->{$u}->{$v}
16295 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16297 # so that we either have all of the &&'s (or ||'s)
16298 # on one line, as in the first example, or break at
16299 # each one as in the second example. However, it
16300 # sometimes makes things worse to check for this because
16301 # it prevents multiple recombinations. So this is not done.
16303 && $is_ternary{ $types_to_go[$ibeg_3] }
16304 && $nesting_depth_to_go[$ibeg_3] ==
16305 $nesting_depth_to_go[$ibeg_2] );
16307 next if !$ok && $want_break_before{$type_ibeg_2};
16308 $forced_breakpoint_to_go[$iend_1] = 0;
16310 # tweak the bond strength to give this joint priority
16315 # Identify and recombine a broken ?/: chain
16316 elsif ( $type_ibeg_2 eq '?' ) {
16318 # Do not recombine different levels
16319 my $lev = $levels_to_go[$ibeg_2];
16320 next if ( $lev ne $levels_to_go[$ibeg_1] );
16322 # Do not recombine a '?' if either next line or
16323 # previous line does not start with a ':'. The reasons
16324 # are that (1) no alignment of the ? will be possible
16325 # and (2) the expression is somewhat complex, so the
16326 # '?' is harder to see in the interior of the line.
16327 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
16328 my $precedes_colon =
16329 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16330 next unless ( $follows_colon || $precedes_colon );
16332 # we will always combining a ? line following a : line
16333 if ( !$follows_colon ) {
16335 # ...otherwise recombine only if it looks like a chain.
16336 # we will just look at a few nearby lines to see if
16337 # this looks like a chain.
16338 my $local_count = 0;
16339 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
16342 && $types_to_go[$ii] eq ':'
16343 && $levels_to_go[$ii] == $lev;
16345 next unless ( $local_count > 1 );
16347 $forced_breakpoint_to_go[$iend_1] = 0;
16350 # do not recombine lines with leading '.'
16351 elsif ( $type_ibeg_2 eq '.' ) {
16352 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
16356 # ... unless there is just one and we can reduce
16357 # this to two lines if we do. For example, this
16361 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16363 # looks better than this:
16364 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16365 # . '$args .= $pat;'
16370 && $type_ibeg_1 ne $type_ibeg_2
16373 # ... or this would strand a short quote , like this
16374 # . "some long quote"
16377 || ( $types_to_go[$i_next_nonblank] eq 'Q'
16378 && $i_next_nonblank >= $iend_2 - 1
16379 && $token_lengths_to_go[$i_next_nonblank] <
16380 $rOpts_short_concatenation_item_length )
16384 # handle leading keyword..
16385 elsif ( $type_ibeg_2 eq 'k' ) {
16387 # handle leading "or"
16388 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16391 $this_line_is_semicolon_terminated
16393 $type_ibeg_1 eq '}'
16396 # following 'if' or 'unless' or 'or'
16397 $type_ibeg_1 eq 'k'
16398 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16400 # important: only combine a very simple or
16401 # statement because the step below may have
16402 # combined a trailing 'and' with this or,
16403 # and we do not want to then combine
16404 # everything together
16405 && ( $iend_2 - $ibeg_2 <= 7 )
16411 $forced_breakpoint_to_go[$iend_1] = 0
16412 unless $old_breakpoint_to_go[$iend_1];
16415 # handle leading 'and'
16416 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16418 # Decide if we will combine a single terminal 'and'
16419 # after an 'if' or 'unless'.
16421 # This looks best with the 'and' on the same
16422 # line as the 'if':
16425 # if $seconds and $nu < 2;
16427 # But this looks better as shown:
16430 # if !$this->{Parents}{$_}
16431 # or $this->{Parents}{$_} eq $_;
16435 $this_line_is_semicolon_terminated
16438 # following 'if' or 'unless' or 'or'
16439 $type_ibeg_1 eq 'k'
16440 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
16441 || $tokens_to_go[$ibeg_1] eq 'or' )
16446 # handle leading "if" and "unless"
16447 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16449 # FIXME: This is still experimental..may not be too useful
16452 $this_line_is_semicolon_terminated
16454 # previous line begins with 'and' or 'or'
16455 && $type_ibeg_1 eq 'k'
16456 && $is_and_or{ $tokens_to_go[$ibeg_1] }
16461 # handle all other leading keywords
16464 # keywords look best at start of lines,
16465 # but combine things like "1 while"
16466 unless ( $is_assignment{$type_iend_1} ) {
16468 if ( ( $type_iend_1 ne 'k' )
16469 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16474 # similar treatment of && and || as above for 'and' and 'or':
16475 # NOTE: This block of code is currently bypassed because
16476 # of a previous block but is retained for possible future use.
16477 elsif ( $is_amp_amp{$type_ibeg_2} ) {
16479 # maybe looking at something like:
16480 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16484 $this_line_is_semicolon_terminated
16486 # previous line begins with an 'if' or 'unless' keyword
16487 && $type_ibeg_1 eq 'k'
16488 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16493 # handle line with leading = or similar
16494 elsif ( $is_assignment{$type_ibeg_2} ) {
16495 next unless ( $n == 1 || $n == $nmax );
16496 next if $old_breakpoint_to_go[$iend_1];
16500 # unless we can reduce this to two lines
16503 # or three lines, the last with a leading semicolon
16504 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16506 # or the next line ends with a here doc
16507 || $type_iend_2 eq 'h'
16509 # or this is a short line ending in ;
16510 || ( $n == $nmax && $this_line_is_semicolon_terminated )
16512 $forced_breakpoint_to_go[$iend_1] = 0;
16515 #----------------------------------------------------------
16516 # Recombine Section 4:
16517 # Combine the lines if we arrive here and it is possible
16518 #----------------------------------------------------------
16520 # honor hard breakpoints
16521 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16523 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16525 # Require a few extra spaces before recombining lines if we are
16526 # at an old breakpoint unless this is a simple list or terminal
16527 # line. The goal is to avoid oscillating between two
16528 # quasi-stable end states. For example this snippet caused
16532 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
16536 if ( $old_breakpoint_to_go[$iend_1]
16537 && !$this_line_is_semicolon_terminated
16540 && $type_iend_2 ne ',' );
16542 # do not recombine if we would skip in indentation levels
16543 if ( $n < $nmax ) {
16544 my $if_next = $ri_beg->[ $n + 1 ];
16547 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16548 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16550 # but an isolated 'if (' is undesirable
16553 && $iend_1 - $ibeg_1 <= 2
16554 && $type_ibeg_1 eq 'k'
16555 && $tokens_to_go[$ibeg_1] eq 'if'
16556 && $tokens_to_go[$iend_1] ne '('
16562 next if ( $bs >= NO_BREAK - 1 );
16564 # remember the pair with the greatest bond strength
16571 if ( $bs > $bs_best ) {
16578 # recombine the pair with the greatest bond strength
16580 splice @{$ri_beg}, $n_best, 1;
16581 splice @{$ri_end}, $n_best - 1, 1;
16582 splice @joint, $n_best, 1;
16584 # keep going if we are still making progress
16588 return ( $ri_beg, $ri_end );
16590 } # end recombine_breakpoints
16592 sub break_all_chain_tokens {
16594 # scan the current breakpoints looking for breaks at certain "chain
16595 # operators" (. : && || + etc) which often occur repeatedly in a long
16596 # statement. If we see a break at any one, break at all similar tokens
16597 # within the same container.
16599 my ( $self, $ri_left, $ri_right ) = @_;
16601 my %saw_chain_type;
16602 my %left_chain_type;
16603 my %right_chain_type;
16604 my %interior_chain_type;
16605 my $nmax = @{$ri_right} - 1;
16607 # scan the left and right end tokens of all lines
16609 for my $n ( 0 .. $nmax ) {
16610 my $il = $ri_left->[$n];
16611 my $ir = $ri_right->[$n];
16612 my $typel = $types_to_go[$il];
16613 my $typer = $types_to_go[$ir];
16614 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
16615 $typer = '+' if ( $typer eq '-' );
16616 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
16617 $typer = '*' if ( $typer eq '/' );
16618 my $tokenl = $tokens_to_go[$il];
16619 my $tokenr = $tokens_to_go[$ir];
16621 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
16622 next if ( $typel eq '?' );
16623 push @{ $left_chain_type{$typel} }, $il;
16624 $saw_chain_type{$typel} = 1;
16627 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
16628 next if ( $typer eq '?' );
16629 push @{ $right_chain_type{$typer} }, $ir;
16630 $saw_chain_type{$typer} = 1;
16634 return unless $count;
16636 # now look for any interior tokens of the same types
16638 for my $n ( 0 .. $nmax ) {
16639 my $il = $ri_left->[$n];
16640 my $ir = $ri_right->[$n];
16641 foreach my $i ( $il + 1 .. $ir - 1 ) {
16642 my $type = $types_to_go[$i];
16643 $type = '+' if ( $type eq '-' );
16644 $type = '*' if ( $type eq '/' );
16645 if ( $saw_chain_type{$type} ) {
16646 push @{ $interior_chain_type{$type} }, $i;
16651 return unless $count;
16653 # now make a list of all new break points
16656 # loop over all chain types
16657 foreach my $type ( keys %saw_chain_type ) {
16659 # quit if just ONE continuation line with leading . For example--
16660 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16662 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
16664 # loop over all interior chain tokens
16665 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
16667 # loop over all left end tokens of same type
16668 if ( $left_chain_type{$type} ) {
16669 next if $nobreak_to_go[ $itest - 1 ];
16670 foreach my $i ( @{ $left_chain_type{$type} } ) {
16671 next unless $self->in_same_container_i( $i, $itest );
16672 push @insert_list, $itest - 1;
16674 # Break at matching ? if this : is at a different level.
16675 # For example, the ? before $THRf_DEAD in the following
16676 # should get a break if its : gets a break.
16679 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16680 # : ( $_ & 4 ) ? $THRf_R_DETACHED
16681 # : $THRf_R_JOINABLE;
16683 && $levels_to_go[$i] != $levels_to_go[$itest] )
16685 my $i_question = $mate_index_to_go[$itest];
16686 if ( $i_question > 0 ) {
16687 push @insert_list, $i_question - 1;
16694 # loop over all right end tokens of same type
16695 if ( $right_chain_type{$type} ) {
16696 next if $nobreak_to_go[$itest];
16697 foreach my $i ( @{ $right_chain_type{$type} } ) {
16698 next unless $self->in_same_container_i( $i, $itest );
16699 push @insert_list, $itest;
16701 # break at matching ? if this : is at a different level
16703 && $levels_to_go[$i] != $levels_to_go[$itest] )
16705 my $i_question = $mate_index_to_go[$itest];
16706 if ( $i_question >= 0 ) {
16707 push @insert_list, $i_question;
16716 # insert any new break points
16717 if (@insert_list) {
16718 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16725 # Look for assignment operators that could use a breakpoint.
16726 # For example, in the following snippet
16728 # $HOME = $ENV{HOME}
16731 # || die "no home directory for user $<";
16733 # we could break at the = to get this, which is a little nicer:
16738 # || die "no home directory for user $<";
16740 # The logic here follows the logic in set_logical_padding, which
16741 # will add the padding in the second line to improve alignment.
16743 my ( $ri_left, $ri_right ) = @_;
16744 my $nmax = @{$ri_right} - 1;
16745 return unless ( $nmax >= 2 );
16747 # scan the left ends of first two lines
16750 for my $n ( 1 .. 2 ) {
16751 my $il = $ri_left->[$n];
16752 my $typel = $types_to_go[$il];
16753 my $tokenl = $tokens_to_go[$il];
16755 my $has_leading_op = ( $tokenl =~ /^\w/ )
16756 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
16757 : $is_chain_operator{$typel}; # and, or
16758 return unless ($has_leading_op);
16761 unless ( $tokenl eq $tokbeg
16762 && $nesting_depth_to_go[$il] eq $depth_beg );
16765 $depth_beg = $nesting_depth_to_go[$il];
16768 # now look for any interior tokens of the same types
16769 my $il = $ri_left->[0];
16770 my $ir = $ri_right->[0];
16772 # now make a list of all new break points
16774 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
16775 my $type = $types_to_go[$i];
16776 if ( $is_assignment{$type}
16777 && $nesting_depth_to_go[$i] eq $depth_beg )
16779 if ( $want_break_before{$type} ) {
16780 push @insert_list, $i - 1;
16783 push @insert_list, $i;
16788 # Break after a 'return' followed by a chain of operators
16789 # return ( $^O !~ /win32|dos/i )
16790 # && ( $^O ne 'VMS' )
16791 # && ( $^O ne 'OS2' )
16792 # && ( $^O ne 'MacOS' );
16795 # ( $^O !~ /win32|dos/i )
16796 # && ( $^O ne 'VMS' )
16797 # && ( $^O ne 'OS2' )
16798 # && ( $^O ne 'MacOS' );
16800 if ( $types_to_go[$i] eq 'k'
16801 && $tokens_to_go[$i] eq 'return'
16803 && $nesting_depth_to_go[$i] eq $depth_beg )
16805 push @insert_list, $i;
16808 return unless (@insert_list);
16810 # One final check...
16811 # scan second and third lines and be sure there are no assignments
16812 # we want to avoid breaking at an = to make something like this:
16814 # $html_icons{"$type-$state"}
16815 # or $icon = $html_icons{$type}
16816 # or $icon = $html_icons{$state} )
16817 for my $n ( 1 .. 2 ) {
16818 my $il = $ri_left->[$n];
16819 my $ir = $ri_right->[$n];
16820 foreach my $i ( $il + 1 .. $ir ) {
16821 my $type = $types_to_go[$i];
16823 if ( $is_assignment{$type}
16824 && $nesting_depth_to_go[$i] eq $depth_beg );
16828 # ok, insert any new break point
16829 if (@insert_list) {
16830 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16835 sub insert_final_breaks {
16837 my ( $self, $ri_left, $ri_right ) = @_;
16839 my $nmax = @{$ri_right} - 1;
16841 # scan the left and right end tokens of all lines
16843 my $i_first_colon = -1;
16844 for my $n ( 0 .. $nmax ) {
16845 my $il = $ri_left->[$n];
16846 my $ir = $ri_right->[$n];
16847 my $typel = $types_to_go[$il];
16848 my $typer = $types_to_go[$ir];
16849 return if ( $typel eq '?' );
16850 return if ( $typer eq '?' );
16851 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
16852 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16855 # For long ternary chains,
16856 # if the first : we see has its ? is in the interior
16857 # of a preceding line, then see if there are any good
16858 # breakpoints before the ?.
16859 if ( $i_first_colon > 0 ) {
16860 my $i_question = $mate_index_to_go[$i_first_colon];
16861 if ( $i_question > 0 ) {
16863 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16864 my $token = $tokens_to_go[$ii];
16865 my $type = $types_to_go[$ii];
16867 # For now, a good break is either a comma or,
16868 # in a long chain, a 'return'.
16869 # Patch for RT #126633: added the $nmax>1 check to avoid
16870 # breaking after a return for a simple ternary. For longer
16871 # chains the break after return allows vertical alignment, so
16872 # it is still done. So perltidy -wba='?' will not break
16873 # immediately after the return in the following statement:
16875 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
16876 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
16881 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
16883 && $self->in_same_container_i( $ii, $i_question )
16886 push @insert_list, $ii;
16891 # insert any new break points
16892 if (@insert_list) {
16893 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16900 sub in_same_container_i {
16902 # check to see if tokens at i1 and i2 are in the
16903 # same container, and not separated by a comma, ? or :
16904 # This is an interface between the _to_go arrays to the rLL array
16905 my ( $self, $i1, $i2 ) = @_;
16906 return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
16909 { # sub in_same_container_K
16910 my $ris_break_token;
16911 my $ris_comma_token;
16915 # all cases break on seeing commas at same level
16918 @{$ris_comma_token}{@q} = (1) x scalar(@q);
16920 # Non-ternary text also breaks on seeing any of qw(? : || or )
16921 # Example: we would not want to break at any of these .'s
16922 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
16923 push @q, qw( or || ? : );
16924 @{$ris_break_token}{@q} = (1) x scalar(@q);
16927 sub in_same_container_K {
16929 # Check to see if tokens at K1 and K2 are in the same container,
16930 # and not separated by certain characters: => , ? : || or
16931 # This version uses the newer $rLL data structure
16933 my ( $self, $K1, $K2 ) = @_;
16934 if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
16935 my $rLL = $self->{rLL};
16936 my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
16937 return if ( $depth_1 < 0 );
16938 return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
16940 # Select character set to scan for
16941 my $type_1 = $rLL->[$K1]->[_TYPE_];
16942 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
16944 # Fast preliminary loop to verify that tokens are in the same container
16947 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
16948 last if !defined($KK);
16949 last if ( $KK >= $K2 );
16950 my $depth_K = $rLL->[$KK]->[_SLEVEL_];
16951 return if ( $depth_K < $depth_1 );
16952 next if ( $depth_K > $depth_1 );
16953 if ( $type_1 ne ':' ) {
16954 my $tok_K = $rLL->[$KK]->[_TOKEN_];
16955 return if ( $tok_K eq '?' || $tok_K eq ':' );
16959 # Slow loop checking for certain characters
16961 ###########################################################
16962 # This is potentially a slow routine and not critical.
16963 # For safety just give up for large differences.
16964 # See test file 'infinite_loop.txt'
16965 ###########################################################
16966 return if ( $K2 - $K1 > 200 );
16968 foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
16970 my $depth_K = $rLL->[$K]->[_SLEVEL_];
16971 next if ( $depth_K > $depth_1 );
16972 return if ( $depth_K < $depth_1 ); # redundant, checked above
16973 my $tok = $rLL->[$K]->[_TOKEN_];
16974 return if ( $rbreak->{$tok} );
16980 sub set_continuation_breaks {
16982 # Define an array of indexes for inserting newline characters to
16983 # keep the line lengths below the maximum desired length. There is
16984 # an implied break after the last token, so it need not be included.
16987 # This routine is part of series of routines which adjust line
16988 # lengths. It is only called if a statement is longer than the
16989 # maximum line length, or if a preliminary scanning located
16990 # desirable break points. Sub scan_list has already looked at
16991 # these tokens and set breakpoints (in array
16992 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
16993 # after commas, after opening parens, and before closing parens).
16994 # This routine will honor these breakpoints and also add additional
16995 # breakpoints as necessary to keep the line length below the maximum
16996 # requested. It bases its decision on where the 'bond strength' is
16999 # Output: returns references to the arrays:
17002 # which contain the indexes $i of the first and last tokens on each
17005 # In addition, the array:
17006 # $forced_breakpoint_to_go[$i]
17007 # may be updated to be =1 for any index $i after which there must be
17008 # a break. This signals later routines not to undo the breakpoint.
17010 my ( $self, $saw_good_break ) = @_;
17011 my @i_first = (); # the first index to output
17012 my @i_last = (); # the last index to output
17013 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
17014 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17016 set_bond_strengths();
17019 my $imax = $max_index_to_go;
17020 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17021 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17022 my $i_begin = $imin; # index for starting next iteration
17024 my $leading_spaces = leading_spaces_to_go($imin);
17025 my $line_count = 0;
17026 my $last_break_strength = NO_BREAK;
17027 my $i_last_break = -1;
17028 my $max_bias = 0.001;
17029 my $tiny_bias = 0.0001;
17030 my $leading_alignment_token = "";
17031 my $leading_alignment_type = "";
17033 # see if any ?/:'s are in order
17034 my $colons_in_order = 1;
17036 my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ];
17037 my $colon_count = @colon_list;
17038 foreach (@colon_list) {
17039 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17043 # This is a sufficient but not necessary condition for colon chain
17044 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
17046 #-------------------------------------------------------
17047 # BEGINNING of main loop to set continuation breakpoints
17048 # Keep iterating until we reach the end
17049 #-------------------------------------------------------
17050 while ( $i_begin <= $imax ) {
17051 my $lowest_strength = NO_BREAK;
17052 my $starting_sum = $summed_lengths_to_go[$i_begin];
17055 my $lowest_next_token = '';
17056 my $lowest_next_type = 'b';
17057 my $i_lowest_next_nonblank = -1;
17059 #-------------------------------------------------------
17060 # BEGINNING of inner loop to find the best next breakpoint
17061 #-------------------------------------------------------
17062 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17063 my $type = $types_to_go[$i_test];
17064 my $token = $tokens_to_go[$i_test];
17065 my $next_type = $types_to_go[ $i_test + 1 ];
17066 my $next_token = $tokens_to_go[ $i_test + 1 ];
17067 my $i_next_nonblank = $inext_to_go[$i_test];
17068 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
17069 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17070 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17071 my $strength = $bond_strength_to_go[$i_test];
17072 my $maximum_line_length = maximum_line_length($i_begin);
17074 # use old breaks as a tie-breaker. For example to
17075 # prevent blinkers with -pbp in this code:
17078 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
17081 # At the same time try to prevent a leading * in this code
17082 # with the default formatting:
17085 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
17086 ## * ( $x**( $a - 1 ) )
17087 ## * ( ( 1 - $x )**( $b - 1 ) );
17089 # reduce strength a bit to break ties at an old breakpoint ...
17091 $old_breakpoint_to_go[$i_test]
17093 # which is a 'good' breakpoint, meaning ...
17094 # we don't want to break before it
17095 && !$want_break_before{$type}
17097 # and either we want to break before the next token
17098 # or the next token is not short (i.e. not a '*', '/' etc.)
17099 && $i_next_nonblank <= $imax
17100 && ( $want_break_before{$next_nonblank_type}
17101 || $token_lengths_to_go[$i_next_nonblank] > 2
17102 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
17105 $strength -= $tiny_bias;
17108 # otherwise increase strength a bit if this token would be at the
17109 # maximum line length. This is necessary to avoid blinking
17110 # in the above example when the -iob flag is added.
17114 $summed_lengths_to_go[ $i_test + 1 ] -
17116 if ( $len >= $maximum_line_length ) {
17117 $strength += $tiny_bias;
17121 my $must_break = 0;
17123 # Force an immediate break at certain operators
17124 # with lower level than the start of the line,
17125 # unless we've already seen a better break.
17127 ##############################################
17128 # Note on an issue with a preceding ?
17129 ##############################################
17130 # We don't include a ? in the above list, but there may
17131 # be a break at a previous ? if the line is long.
17132 # Because of this we do not want to force a break if
17133 # there is a previous ? on this line. For now the best way
17134 # to do this is to not break if we have seen a lower strength
17135 # point, which is probably a ?.
17137 # Example of unwanted breaks we are avoiding at a '.' following a ?
17138 # from pod2html using perltidy -gnu:
17140 # ? "\n<A NAME=\""
17142 # . "\">\n$text</A>\n"
17143 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
17146 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17147 || ( $next_nonblank_type eq 'k'
17148 && $next_nonblank_token =~ /^(and|or)$/ )
17150 && ( $nesting_depth_to_go[$i_begin] >
17151 $nesting_depth_to_go[$i_next_nonblank] )
17152 && ( $strength <= $lowest_strength )
17155 set_forced_breakpoint($i_next_nonblank);
17160 # Try to put a break where requested by scan_list
17161 $forced_breakpoint_to_go[$i_test]
17163 # break between ) { in a continued line so that the '{' can
17165 # See similar logic in scan_list which catches instances
17166 # where a line is just something like ') {'. We have to
17167 # be careful because the corresponding block keyword might
17168 # not be on the first line, such as 'for' here:
17172 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
17178 && ( $token eq ')' )
17179 && ( $next_nonblank_type eq '{' )
17180 && ($next_nonblank_block_type)
17181 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
17183 # RT #104427: Dont break before opening sub brace because
17184 # sub block breaks handled at higher level, unless
17185 # it looks like the preceding list is long and broken
17187 $next_nonblank_block_type =~ /^sub\b/
17188 && ( $nesting_depth_to_go[$i_begin] ==
17189 $nesting_depth_to_go[$i_next_nonblank] )
17192 && !$rOpts->{'opening-brace-always-on-right'}
17195 # There is an implied forced break at a terminal opening brace
17196 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17200 # Forced breakpoints must sometimes be overridden, for example
17201 # because of a side comment causing a NO_BREAK. It is easier
17202 # to catch this here than when they are set.
17203 if ( $strength < NO_BREAK - 1 ) {
17204 $strength = $lowest_strength - $tiny_bias;
17209 # quit if a break here would put a good terminal token on
17210 # the next line and we already have a possible break
17213 && ( $next_nonblank_type =~ /^[\;\,]$/ )
17217 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
17219 ) > $maximum_line_length
17223 last if ( $i_lowest >= 0 );
17226 # Avoid a break which would strand a single punctuation
17227 # token. For example, we do not want to strand a leading
17228 # '.' which is followed by a long quoted string.
17229 # But note that we do want to do this with -extrude (l=1)
17230 # so please test any changes to this code on -extrude.
17233 && ( $i_test == $i_begin )
17234 && ( $i_test < $imax )
17235 && ( $token eq $type )
17239 $summed_lengths_to_go[ $i_test + 1 ] -
17241 ) < $maximum_line_length
17245 $i_test = min( $imax, $inext_to_go[$i_test] );
17249 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17252 # break at previous best break if it would have produced
17253 # a leading alignment of certain common tokens, and it
17254 # is different from the latest candidate break
17256 if ($leading_alignment_type);
17258 # Force at least one breakpoint if old code had good
17259 # break It is only called if a breakpoint is required or
17260 # desired. This will probably need some adjustments
17261 # over time. A goal is to try to be sure that, if a new
17262 # side comment is introduced into formatted text, then
17263 # the same breakpoints will occur. scbreak.t
17266 $i_test == $imax # we are at the end
17267 && !$forced_breakpoint_count #
17268 && $saw_good_break # old line had good break
17269 && $type =~ /^[#;\{]$/ # and this line ends in
17270 # ';' or side comment
17271 && $i_last_break < 0 # and we haven't made a break
17272 && $i_lowest >= 0 # and we saw a possible break
17273 && $i_lowest < $imax - 1 # (but not just before this ;)
17274 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17277 # Do not skip past an important break point in a short final
17278 # segment. For example, without this check we would miss the
17279 # break at the final / in the following code:
17282 # ( $tau * $mass_pellet * $q_0 *
17283 # ( 1. - exp( -$t_stop / $tau ) ) -
17284 # 4. * $pi * $factor * $k_ice *
17285 # ( $t_melt - $t_ice ) *
17288 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
17290 if ( $line_count > 2
17291 && $i_lowest < $i_test
17292 && $i_test > $imax - 2
17293 && $nesting_depth_to_go[$i_begin] >
17294 $nesting_depth_to_go[$i_lowest]
17295 && $lowest_strength < $last_break_strength - .5 * WEAK )
17297 # Make this break for math operators for now
17298 my $ir = $inext_to_go[$i_lowest];
17299 my $il = $iprev_to_go[$ir];
17301 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
17302 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
17305 # Update the minimum bond strength location
17306 $lowest_strength = $strength;
17307 $i_lowest = $i_test;
17308 $lowest_next_token = $next_nonblank_token;
17309 $lowest_next_type = $next_nonblank_type;
17310 $i_lowest_next_nonblank = $i_next_nonblank;
17311 last if $must_break;
17313 # set flags to remember if a break here will produce a
17314 # leading alignment of certain common tokens
17315 if ( $line_count > 0
17317 && ( $lowest_strength - $last_break_strength <= $max_bias )
17320 my $i_last_end = $iprev_to_go[$i_begin];
17321 my $tok_beg = $tokens_to_go[$i_begin];
17322 my $type_beg = $types_to_go[$i_begin];
17325 # check for leading alignment of certain tokens
17327 $tok_beg eq $next_nonblank_token
17328 && $is_chain_operator{$tok_beg}
17329 && ( $type_beg eq 'k'
17330 || $type_beg eq $tok_beg )
17331 && $nesting_depth_to_go[$i_begin] >=
17332 $nesting_depth_to_go[$i_next_nonblank]
17335 || ( $tokens_to_go[$i_last_end] eq $token
17336 && $is_chain_operator{$token}
17337 && ( $type eq 'k' || $type eq $token )
17338 && $nesting_depth_to_go[$i_last_end] >=
17339 $nesting_depth_to_go[$i_test] )
17342 $leading_alignment_token = $next_nonblank_token;
17343 $leading_alignment_type = $next_nonblank_type;
17348 my $too_long = ( $i_test >= $imax );
17349 if ( !$too_long ) {
17352 $summed_lengths_to_go[ $i_test + 2 ] -
17354 $too_long = $next_length > $maximum_line_length;
17356 # To prevent blinkers we will avoid leaving a token exactly at
17357 # the line length limit unless it is the last token or one of
17358 # several "good" types.
17360 # The following code was a blinker with -pbp before this
17362 ## $last_nonblank_token eq '('
17363 ## && $is_indirect_object_taker{ $paren_type
17364 ## [$paren_depth] }
17365 # The issue causing the problem is that if the
17366 # term [$paren_depth] gets broken across a line then
17367 # the whitespace routine doesn't see both opening and closing
17368 # brackets and will format like '[ $paren_depth ]'. This
17369 # leads to an oscillation in length depending if we break
17370 # before the closing bracket or not.
17372 && $i_test + 1 < $imax
17373 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
17375 $too_long = $next_length >= $maximum_line_length;
17379 FORMATTER_DEBUG_FLAG_BREAK
17382 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
17383 my $i_testp2 = $i_test + 2;
17384 if ( $i_testp2 > $max_index_to_go + 1 ) {
17385 $i_testp2 = $max_index_to_go + 1;
17387 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
17388 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
17390 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
17393 # allow one extra terminal token after exceeding line length
17394 # if it would strand this token.
17395 if ( $rOpts_fuzzy_line_length
17397 && $i_lowest == $i_test
17398 && $token_lengths_to_go[$i_test] > 1
17399 && $next_nonblank_type =~ /^[\;\,]$/ )
17406 ( $i_test == $imax ) # we're done if no more tokens,
17408 ( $i_lowest >= 0 ) # or no more space and we have a break
17414 #-------------------------------------------------------
17415 # END of inner loop to find the best next breakpoint
17416 # Now decide exactly where to put the breakpoint
17417 #-------------------------------------------------------
17419 # it's always ok to break at imax if no other break was found
17420 if ( $i_lowest < 0 ) { $i_lowest = $imax }
17422 # semi-final index calculation
17423 my $i_next_nonblank = $inext_to_go[$i_lowest];
17424 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
17425 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17427 #-------------------------------------------------------
17428 # ?/: rule 1 : if a break here will separate a '?' on this
17429 # line from its closing ':', then break at the '?' instead.
17430 #-------------------------------------------------------
17431 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17432 next unless ( $tokens_to_go[$i] eq '?' );
17434 # do not break if probable sequence of ?/: statements
17435 next if ($is_colon_chain);
17437 # do not break if statement is broken by side comment
17439 if ( $tokens_to_go[$max_index_to_go] eq '#'
17440 && $self->terminal_type_i( 0, $max_index_to_go ) !~
17443 # no break needed if matching : is also on the line
17445 if ( $mate_index_to_go[$i] >= 0
17446 && $mate_index_to_go[$i] <= $i_next_nonblank );
17449 if ( $want_break_before{'?'} ) { $i_lowest-- }
17453 #-------------------------------------------------------
17454 # END of inner loop to find the best next breakpoint:
17455 # Break the line after the token with index i=$i_lowest
17456 #-------------------------------------------------------
17458 # final index calculation
17459 $i_next_nonblank = $inext_to_go[$i_lowest];
17460 $next_nonblank_type = $types_to_go[$i_next_nonblank];
17461 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17463 FORMATTER_DEBUG_FLAG_BREAK
17465 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
17467 #-------------------------------------------------------
17468 # ?/: rule 2 : if we break at a '?', then break at its ':'
17470 # Note: this rule is also in sub scan_list to handle a break
17471 # at the start and end of a line (in case breaks are dictated
17472 # by side comments).
17473 #-------------------------------------------------------
17474 if ( $next_nonblank_type eq '?' ) {
17475 set_closing_breakpoint($i_next_nonblank);
17477 elsif ( $types_to_go[$i_lowest] eq '?' ) {
17478 set_closing_breakpoint($i_lowest);
17481 #-------------------------------------------------------
17482 # ?/: rule 3 : if we break at a ':' then we save
17483 # its location for further work below. We may need to go
17484 # back and break at its '?'.
17485 #-------------------------------------------------------
17486 if ( $next_nonblank_type eq ':' ) {
17487 push @i_colon_breaks, $i_next_nonblank;
17489 elsif ( $types_to_go[$i_lowest] eq ':' ) {
17490 push @i_colon_breaks, $i_lowest;
17493 # here we should set breaks for all '?'/':' pairs which are
17494 # separated by this line
17498 # save this line segment, after trimming blanks at the ends
17500 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17502 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17504 # set a forced breakpoint at a container opening, if necessary, to
17505 # signal a break at a closing container. Excepting '(' for now.
17506 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
17507 && !$forced_breakpoint_to_go[$i_lowest] )
17509 set_closing_breakpoint($i_lowest);
17512 # get ready to go again
17513 $i_begin = $i_lowest + 1;
17514 $last_break_strength = $lowest_strength;
17515 $i_last_break = $i_lowest;
17516 $leading_alignment_token = "";
17517 $leading_alignment_type = "";
17518 $lowest_next_token = '';
17519 $lowest_next_type = 'b';
17521 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17525 # update indentation size
17526 if ( $i_begin <= $imax ) {
17527 $leading_spaces = leading_spaces_to_go($i_begin);
17531 #-------------------------------------------------------
17532 # END of main loop to set continuation breakpoints
17533 # Now go back and make any necessary corrections
17534 #-------------------------------------------------------
17536 #-------------------------------------------------------
17537 # ?/: rule 4 -- if we broke at a ':', then break at
17538 # corresponding '?' unless this is a chain of ?: expressions
17539 #-------------------------------------------------------
17540 if (@i_colon_breaks) {
17542 # using a simple method for deciding if we are in a ?/: chain --
17543 # this is a chain if it has multiple ?/: pairs all in order;
17545 # Note that if line starts in a ':' we count that above as a break
17546 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17548 unless ($is_chain) {
17549 my @insert_list = ();
17550 foreach (@i_colon_breaks) {
17551 my $i_question = $mate_index_to_go[$_];
17552 if ( $i_question >= 0 ) {
17553 if ( $want_break_before{'?'} ) {
17554 $i_question = $iprev_to_go[$i_question];
17557 if ( $i_question >= 0 ) {
17558 push @insert_list, $i_question;
17561 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
17565 return ( \@i_first, \@i_last, $colon_count );
17568 sub insert_additional_breaks {
17570 # this routine will add line breaks at requested locations after
17571 # sub set_continuation_breaks has made preliminary breaks.
17573 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
17576 my $line_number = 0;
17577 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
17579 $i_f = $ri_first->[$line_number];
17580 $i_l = $ri_last->[$line_number];
17581 while ( $i_break_left >= $i_l ) {
17584 # shouldn't happen unless caller passes bad indexes
17585 if ( $line_number >= @{$ri_last} ) {
17587 "Non-fatal program bug: couldn't set break at $i_break_left\n"
17589 report_definite_bug();
17592 $i_f = $ri_first->[$line_number];
17593 $i_l = $ri_last->[$line_number];
17596 # Do not leave a blank at the end of a line; back up if necessary
17597 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
17599 my $i_break_right = $inext_to_go[$i_break_left];
17600 if ( $i_break_left >= $i_f
17601 && $i_break_left < $i_l
17602 && $i_break_right > $i_f
17603 && $i_break_right <= $i_l )
17605 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
17606 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
17612 sub set_closing_breakpoint {
17614 # set a breakpoint at a matching closing token
17615 # at present, this is only used to break at a ':' which matches a '?'
17616 my $i_break = shift;
17618 if ( $mate_index_to_go[$i_break] >= 0 ) {
17620 # CAUTION: infinite recursion possible here:
17621 # set_closing_breakpoint calls set_forced_breakpoint, and
17622 # set_forced_breakpoint call set_closing_breakpoint
17623 # ( test files attrib.t, BasicLyx.pm.html).
17624 # Don't reduce the '2' in the statement below
17625 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17627 # break before } ] and ), but sub set_forced_breakpoint will decide
17628 # to break before or after a ? and :
17629 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17630 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
17634 my $type_sequence = $type_sequence_to_go[$i_break];
17635 if ($type_sequence) {
17636 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
17637 $postponed_breakpoint{$type_sequence} = 1;
17643 sub compare_indentation_levels {
17645 # check to see if output line tabbing agrees with input line
17646 # this can be very useful for debugging a script which has an extra
17648 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
17649 if ( $guessed_indentation_level ne $structural_indentation_level ) {
17650 $last_tabbing_disagreement = $input_line_number;
17652 if ($in_tabbing_disagreement) {
17655 $tabbing_disagreement_count++;
17657 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17658 write_logfile_entry(
17659 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
17662 $in_tabbing_disagreement = $input_line_number;
17663 $first_tabbing_disagreement = $in_tabbing_disagreement
17664 unless ($first_tabbing_disagreement);
17669 if ($in_tabbing_disagreement) {
17671 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17672 write_logfile_entry(
17673 "End indentation disagreement from input line $in_tabbing_disagreement\n"
17676 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
17677 write_logfile_entry(
17678 "No further tabbing disagreements will be noted\n");
17681 $in_tabbing_disagreement = 0;