1 #####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 # WARNING: This is not a real class for speed reasons. Only one
7 # Formatter may be used.
9 #####################################################################
11 package Perl::Tidy::Formatter;
15 our $VERSION = '20190601';
17 # The Tokenizer will be loaded with the Formatter
18 ##use Perl::Tidy::Tokenizer; # for is_keyword()
22 Perl::Tidy::Die($msg);
23 croak "unexpected return from Perl::Tidy::Die";
28 Perl::Tidy::Warn($msg);
34 Perl::Tidy::Exit($msg);
35 croak "unexpected return from Perl::Tidy::Exit";
40 # Codes for insertion and deletion of blanks
41 use constant DELETE => 0;
42 use constant STABLE => 1;
43 use constant INSERT => 2;
45 # Caution: these debug flags produce a lot of output
46 # They should all be 0 except when debugging small scripts
47 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
48 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
49 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
50 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
51 use constant FORMATTER_DEBUG_FLAG_CI => 0;
52 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
53 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
54 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
55 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
56 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
57 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
58 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
59 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
60 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
62 my $debug_warning = sub {
63 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
66 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
67 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
68 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
69 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
70 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
71 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
72 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
73 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
74 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
75 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
76 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
77 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
78 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
79 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
86 $gnu_position_predictor
87 $line_start_index_to_go
88 $last_indentation_written
89 $last_unadjusted_indentation
91 $last_output_short_opening_token
94 $saw_VERSION_in_this_file
100 $last_output_indentation
107 @container_environment_to_go
109 @forced_breakpoint_to_go
111 @summed_lengths_to_go
113 @leading_spaces_to_go
114 @reduced_spaces_to_go
115 @matching_token_to_go
120 @old_breakpoint_to_go
127 %saved_opening_indentation
130 $comma_count_in_batch
131 $last_nonblank_index_to_go
132 $last_nonblank_type_to_go
133 $last_nonblank_token_to_go
134 $last_last_nonblank_index_to_go
135 $last_last_nonblank_type_to_go
136 $last_last_nonblank_token_to_go
137 @nonblank_lines_at_depth
140 @whitespace_level_stack
141 $whitespace_last_level
143 $format_skipping_pattern_begin
144 $format_skipping_pattern_end
146 $forced_breakpoint_count
147 $forced_breakpoint_undo_count
148 @forced_breakpoint_undo_stack
149 %postponed_breakpoint
153 $first_embedded_tab_at
154 $last_embedded_tab_at
155 $deleted_semicolon_count
156 $first_deleted_semicolon_at
157 $last_deleted_semicolon_at
158 $added_semicolon_count
159 $first_added_semicolon_at
160 $last_added_semicolon_at
161 $first_tabbing_disagreement
162 $last_tabbing_disagreement
163 $in_tabbing_disagreement
164 $tabbing_disagreement_count
167 $last_line_leading_type
168 $last_line_leading_level
169 $last_last_line_leading_level
172 %block_opening_line_number
173 $csc_new_statement_ok
176 $accumulating_text_for_block
178 $rleading_block_if_elsif_text
179 $leading_block_text_level
180 $leading_block_text_length_exceeded
181 $leading_block_text_line_length
182 $leading_block_text_line_number
183 $closing_side_comment_prefix_pattern
184 $closing_side_comment_list_pattern
186 $blank_lines_after_opening_block_pattern
187 $blank_lines_before_closing_block_pattern
191 $last_last_nonblank_token
192 $last_last_nonblank_type
193 $last_nonblank_block_type
196 %is_if_brace_follower
200 %is_last_next_redo_return
201 %is_other_brace_follower
202 %is_else_brace_follower
203 %is_anon_sub_brace_follower
204 %is_anon_sub_1_brace_follower
206 %is_sort_map_grep_eval
207 %is_sort_map_grep_eval_do
208 %is_block_without_semicolon
213 %is_if_unless_and_or_last_next_redo_return
214 %ok_to_add_semicolon_for_block_type
220 $is_static_block_comment
221 $index_start_one_line_block
222 $semicolons_before_block_self_destruct
223 $index_max_forced_break
226 $vertical_aligner_object
233 $static_block_comment_pattern
234 $static_side_comment_pattern
235 %opening_vertical_tightness
236 %closing_vertical_tightness
237 %closing_token_indentation
238 $some_closing_token_indentation
244 $block_brace_vertical_tightness_pattern
245 $keyword_group_list_pattern
246 $keyword_group_list_comment_pattern
249 $rOpts_add_whitespace
250 $rOpts_block_brace_tightness
251 $rOpts_block_brace_vertical_tightness
252 $rOpts_brace_left_and_indent
253 $rOpts_comma_arrow_breakpoints
254 $rOpts_break_at_old_keyword_breakpoints
255 $rOpts_break_at_old_comma_breakpoints
256 $rOpts_break_at_old_logical_breakpoints
257 $rOpts_break_at_old_method_breakpoints
258 $rOpts_break_at_old_ternary_breakpoints
259 $rOpts_break_at_old_attribute_breakpoints
260 $rOpts_closing_side_comment_else_flag
261 $rOpts_closing_side_comment_maximum_text
262 $rOpts_continuation_indentation
263 $rOpts_delete_old_whitespace
264 $rOpts_fuzzy_line_length
265 $rOpts_indent_columns
266 $rOpts_line_up_parentheses
267 $rOpts_maximum_fields_per_table
268 $rOpts_maximum_line_length
269 $rOpts_variable_maximum_line_length
270 $rOpts_short_concatenation_item_length
271 $rOpts_keep_old_blank_lines
272 $rOpts_ignore_old_breakpoints
273 $rOpts_format_skipping
274 $rOpts_space_function_paren
275 $rOpts_space_keyword_paren
276 $rOpts_keep_interior_semicolons
277 $rOpts_ignore_side_comment_lengths
278 $rOpts_stack_closing_block_brace
279 $rOpts_space_backslash_quote
280 $rOpts_whitespace_cycle
281 $rOpts_one_line_block_semicolons
285 %is_keyword_returning_list
303 %weld_len_left_closing
304 %weld_len_right_closing
305 %weld_len_left_opening
306 %weld_len_right_opening
308 $rcuddled_block_types
319 # Array index names for token variables
322 _BLOCK_TYPE_ => $i++,
324 _CONTAINER_ENVIRONMENT_ => $i++,
325 _CONTAINER_TYPE_ => $i++,
326 _CUMULATIVE_LENGTH_ => $i++,
327 _LINE_INDEX_ => $i++,
328 _KNEXT_SEQ_ITEM_ => $i++,
330 _LEVEL_TRUE_ => $i++,
334 _TYPE_SEQUENCE_ => $i++,
336 $NVARS = 1 + _TYPE_SEQUENCE_;
338 # default list of block types for which -bli would apply
339 $bli_list_string = 'if else elsif unless while for foreach do : sub';
344 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
345 <= >= == =~ !~ != ++ -- /= x=
347 @is_digraph{@q} = (1) x scalar(@q);
349 @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
350 @is_trigraph{@q} = (1) x scalar(@q);
353 = **= += *= &= <<= &&=
358 @is_assignment{@q} = (1) x scalar(@q);
368 @is_keyword_returning_list{@q} = (1) x scalar(@q);
370 @q = qw(is if unless and or err last next redo return);
371 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
373 @q = qw(last next redo return);
374 @is_last_next_redo_return{@q} = (1) x scalar(@q);
376 @q = qw(sort map grep);
377 @is_sort_map_grep{@q} = (1) x scalar(@q);
379 @q = qw(sort map grep eval);
380 @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
382 @q = qw(sort map grep eval do);
383 @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
386 @is_if_unless{@q} = (1) x scalar(@q);
389 @is_and_or{@q} = (1) x scalar(@q);
391 # Identify certain operators which often occur in chains.
392 # Note: the minus (-) causes a side effect of padding of the first line in
393 # something like this (by sub set_logical_padding):
394 # Checkbutton => 'Transmission checked',
395 # -variable => \$TRANS
396 # This usually improves appearance so it seems ok.
397 @q = qw(&& || and or : ? . + - * /);
398 @is_chain_operator{@q} = (1) x scalar(@q);
400 # We can remove semicolons after blocks preceded by these keywords
402 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
403 unless while until for foreach given when default);
404 @is_block_without_semicolon{@q} = (1) x scalar(@q);
406 # We will allow semicolons to be added within these block types
407 # as well as sub and package blocks.
409 # 1. Note that these keywords are omitted:
410 # switch case given when default sort map grep
411 # 2. It is also ok to add for sub and package blocks and a labeled block
412 # 3. But not okay for other perltidy types including:
414 # 4. Test files: blktype.t, blktype1.t, semicolon.t
416 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
417 unless do while until eval for foreach );
418 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
420 # 'L' is token for opening { at hash key
422 @is_opening_type{@q} = (1) x scalar(@q);
424 # 'R' is token for closing } at hash key
426 @is_closing_type{@q} = (1) x scalar(@q);
429 @is_opening_token{@q} = (1) x scalar(@q);
432 @is_closing_token{@q} = (1) x scalar(@q);
434 # Patterns for standardizing matches to block types for regular subs and
435 # anonymous subs. Examples
436 # 'sub process' is a named sub
437 # 'sub ::m' is a named sub
438 # 'sub' is an anonymous sub
439 # 'sub:' is a label, not a sub
440 # 'substr' is a keyword
441 $SUB_PATTERN = '^sub\s+(::|\w)';
442 $ASUB_PATTERN = '^sub$';
446 use constant WS_YES => 1;
447 use constant WS_OPTIONAL => 0;
448 use constant WS_NO => -1;
450 # Token bond strengths.
451 use constant NO_BREAK => 10000;
452 use constant VERY_STRONG => 100;
453 use constant STRONG => 2.1;
454 use constant NOMINAL => 1.1;
455 use constant WEAK => 0.8;
456 use constant VERY_WEAK => 0.55;
458 # values for testing indexes in output array
459 use constant UNDEFINED_INDEX => -1;
461 # Maximum number of little messages; probably need not be changed.
462 use constant MAX_NAG_MESSAGES => 6;
464 # increment between sequence numbers for each type
465 # For example, ?: pairs might have numbers 7,11,15,...
466 use constant TYPE_SEQUENCE_INCREMENT => 4;
470 # methods to count instances
472 sub get_count { return $_count; }
473 sub _increment_count { return ++$_count }
474 sub _decrement_count { return --$_count }
479 # trim leading and trailing whitespace from a string
488 my $max = shift @vals;
489 foreach my $val (@vals) {
490 $max = ( $max < $val ) ? $val : $max;
497 my $min = shift @vals;
498 foreach my $val (@vals) {
499 $min = ( $min > $val ) ? $val : $min;
506 # given a string containing words separated by whitespace,
507 # return the list of words
512 return split( /\s+/, $str );
516 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
518 # Check the keys of a hash:
519 # $rtest = ref to hash to test
520 # $rvalid = ref to hash with valid keys
522 # $msg = a message to write in case of error
523 # $exact_match defines the type of check:
524 # = false: test hash must not have unknown key
525 # = true: test hash must have exactly same keys as known hash
527 grep { !exists $rvalid->{$_} } keys %{$rtest};
529 grep { !exists $rtest->{$_} } keys %{$rvalid};
530 my $error = @unknown_keys;
531 if ($exact_match) { $error ||= @missing_keys }
534 my @expected_keys = sort keys %{$rvalid};
535 @unknown_keys = sort @unknown_keys;
537 ------------------------------------------------------------------------
538 Program error detected checking hash keys
540 Expected keys: (@expected_keys)
541 Unknown key(s): (@unknown_keys)
542 Missing key(s): (@missing_keys)
543 ------------------------------------------------------------------------
549 # interface to Perl::Tidy::Logger routines
552 if ($logger_object) { $logger_object->warning($msg); }
558 if ($logger_object) {
559 $logger_object->complain($msg);
564 sub write_logfile_entry {
566 if ($logger_object) {
567 $logger_object->write_logfile_entry(@msg);
574 if ($logger_object) { $logger_object->black_box(@msg); }
578 sub report_definite_bug {
579 if ($logger_object) {
580 $logger_object->report_definite_bug();
585 sub get_saw_brace_error {
586 if ($logger_object) {
587 return $logger_object->get_saw_brace_error();
592 sub we_are_at_the_last_line {
593 if ($logger_object) {
594 $logger_object->we_are_at_the_last_line();
599 # interface to Perl::Tidy::Diagnostics routine
600 sub write_diagnostics {
602 if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
606 sub get_added_semicolon_count {
608 return $added_semicolon_count;
613 $self->_decrement_count();
617 sub get_output_line_number {
618 return $vertical_aligner_object->get_output_line_number();
623 my ( $class, @args ) = @_;
625 # we are given an object with a write_line() method to take lines
627 sink_object => undef,
628 diagnostics_object => undef,
629 logger_object => undef,
631 my %args = ( %defaults, @args );
633 $logger_object = $args{logger_object};
634 $diagnostics_object = $args{diagnostics_object};
636 # we create another object with a get_line() and peek_ahead() method
637 my $sink_object = $args{sink_object};
638 $file_writer_object =
639 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
641 # initialize the leading whitespace stack to negative levels
642 # so that we can never run off the end of the stack
643 $peak_batch_size = 0; # flag to determine if we have output code
644 $gnu_position_predictor = 0; # where the current token is predicted to be
645 $max_gnu_stack_index = 0;
646 $max_gnu_item_index = -1;
647 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
649 $last_output_indentation = 0;
650 $last_indentation_written = 0;
651 $last_unadjusted_indentation = 0;
652 $last_leading_token = "";
653 $last_output_short_opening_token = 0;
655 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
656 $saw_END_or_DATA_ = 0;
658 @block_type_to_go = ();
659 @type_sequence_to_go = ();
660 @container_environment_to_go = ();
661 @bond_strength_to_go = ();
662 @forced_breakpoint_to_go = ();
663 @summed_lengths_to_go = (); # line length to start of ith token
664 @token_lengths_to_go = ();
666 @matching_token_to_go = ();
667 @mate_index_to_go = ();
668 @ci_levels_to_go = ();
669 @nesting_depth_to_go = (0);
671 @old_breakpoint_to_go = ();
675 @leading_spaces_to_go = ();
676 @reduced_spaces_to_go = ();
680 @whitespace_level_stack = ();
681 $whitespace_last_level = -1;
684 @has_broken_sublist = ();
685 @want_comma_break = ();
688 $first_tabbing_disagreement = 0;
689 $last_tabbing_disagreement = 0;
690 $tabbing_disagreement_count = 0;
691 $in_tabbing_disagreement = 0;
692 $input_line_tabbing = undef;
694 $last_last_line_leading_level = 0;
695 $last_line_leading_level = 0;
696 $last_line_leading_type = '#';
698 $last_nonblank_token = ';';
699 $last_nonblank_type = ';';
700 $last_last_nonblank_token = ';';
701 $last_last_nonblank_type = ';';
702 $last_nonblank_block_type = "";
703 $last_output_level = 0;
704 $looking_for_else = 0;
705 $embedded_tab_count = 0;
706 $first_embedded_tab_at = 0;
707 $last_embedded_tab_at = 0;
708 $deleted_semicolon_count = 0;
709 $first_deleted_semicolon_at = 0;
710 $last_deleted_semicolon_at = 0;
711 $added_semicolon_count = 0;
712 $first_added_semicolon_at = 0;
713 $last_added_semicolon_at = 0;
714 $is_static_block_comment = 0;
715 %postponed_breakpoint = ();
717 # variables for adding side comments
718 %block_leading_text = ();
719 %block_opening_line_number = ();
720 $csc_new_statement_ok = 1;
721 %csc_block_label = ();
723 %saved_opening_indentation = ();
725 reset_block_text_accumulator();
727 prepare_for_new_input_lines();
729 $vertical_aligner_object =
730 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
731 $logger_object, $diagnostics_object );
733 if ( $rOpts->{'entab-leading-whitespace'} ) {
735 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
738 elsif ( $rOpts->{'tabs'} ) {
739 write_logfile_entry("Indentation will be with a tab character\n");
743 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
746 # This hash holds the main data structures for formatting
747 # All hash keys must be defined here.
749 rlines => [], # = ref to array of lines of the file
750 rlines_new => [], # = ref to array of output lines
751 # (FOR FUTURE DEVELOPMENT)
752 rLL => [], # = ref to array with all tokens
753 # in the file. LL originally meant
754 # 'Linked List'. Linked lists were a
755 # bad idea but LL is easy to type.
756 Klimit => undef, # = maximum K index for rLL. This is
757 # needed to catch any autovivification
759 rnested_pairs => [], # for welding decisions
760 K_opening_container => {}, # for quickly traversing structure
761 K_closing_container => {}, # for quickly traversing structure
762 K_opening_ternary => {}, # for quickly traversing structure
763 K_closing_ternary => {}, # for quickly traversing structure
764 rK_phantom_semicolons =>
765 undef, # for undoing phantom semicolons if iterating
766 rpaired_to_inner_container => {},
767 rbreak_container => {}, # prevent one-line blocks
768 rvalid_self_keys => [], # for checking
769 valign_batch_count => 0,
771 my @valid_keys = keys %{$formatter_self};
772 $formatter_self->{rvalid_self_keys} = \@valid_keys;
774 bless $formatter_self, $class;
776 # Safety check..this is not a class yet
777 if ( _increment_count() > 1 ) {
779 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
781 return $formatter_self;
784 # Future routines for storing new lines
786 my ( $self, $rline ) = @_;
788 # my $rline = $rlines->[$index_old];
789 # push @{$rlines_new}, $rline;
794 my ( $self, $index_old ) = @_;
796 # TODO: This will copy line with index $index_old to the new line array
797 # my $rlines = $self->{rlines};
798 # my $rline = $rlines->[$index_old];
799 # $self->push_line($rline);
803 sub push_blank_line {
807 # $self->push_line($rline);
812 my ( $self, $Kmin, $Kmax ) = @_;
814 # TODO: This will store the values for one new line of CODE
815 # CHECK TOKEN RANGE HERE
816 # $self->push_line($rline);
820 sub increment_valign_batch_count {
822 return ++$self->{valign_batch_count};
825 sub get_valign_batch_count {
827 return $self->{valign_batch_count};
833 # This routine is called for errors that really should not occur
834 # except if there has been a bug introduced by a recent program change
835 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
836 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
837 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
840 ==============================================================================
841 Fault detected at line $line0 of sub '$subroutine1'
843 which was called from line $line1 of sub '$subroutine2'
845 This is probably an error introduced by a recent programming change.
846 ==============================================================================
849 # This is for Perl-Critic
853 sub check_self_hash {
855 my @valid_self_keys = @{ $self->{rvalid_self_keys} };
857 @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
858 check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
862 sub check_token_array {
865 # Check for errors in the array of tokens
866 # Uses package variable $NVARS
867 $self->check_self_hash();
868 my $rLL = $self->{rLL};
869 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
870 my $nvars = @{ $rLL->[$KK] };
871 if ( $nvars != $NVARS ) {
872 my $type = $rLL->[$KK]->[_TYPE_];
873 $type = '*' unless defined($type);
875 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
878 foreach my $var ( _TOKEN_, _TYPE_ ) {
879 if ( !defined( $rLL->[$KK]->[$var] ) ) {
880 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
881 Fault("Undefined variable $var for K=$KK, line=$iline\n");
888 sub set_rLL_max_index {
891 # Set the limit of the rLL array, assuming that it is correct.
892 # This should only be called by routines after they make changes
894 my $rLL = $self->{rLL};
895 if ( !defined($rLL) ) {
897 # Shouldn't happen because rLL was initialized to be an array ref
898 Fault("Undefined Memory rLL");
900 my $Klimit_old = $self->{Klimit};
903 if ( $num > 0 ) { $Klimit = $num - 1 }
904 $self->{Klimit} = $Klimit;
908 sub get_rLL_max_index {
911 # the memory location $rLL and number of tokens should be obtained
912 # from this routine so that any autovivication can be immediately caught.
913 my $rLL = $self->{rLL};
914 my $Klimit = $self->{Klimit};
915 if ( !defined($rLL) ) {
917 # Shouldn't happen because rLL was initialized to be an array ref
918 Fault("Undefined Memory rLL");
921 if ( $num == 0 && defined($Klimit)
922 || $num > 0 && !defined($Klimit)
923 || $num > 0 && $Klimit != $num - 1 )
926 # Possible autovivification problem...
927 if ( !defined($Klimit) ) { $Klimit = '*' }
928 Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
933 sub prepare_for_new_input_lines {
935 # Remember the largest batch size processed. This is needed
936 # by the pad routine to avoid padding the first nonblank token
937 if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
938 $peak_batch_size = $max_index_to_go;
941 $gnu_sequence_number++; # increment output batch counter
942 %last_gnu_equals = ();
943 %gnu_comma_count = ();
944 %gnu_arrow_count = ();
945 $line_start_index_to_go = 0;
946 $max_gnu_item_index = UNDEFINED_INDEX;
947 $index_max_forced_break = UNDEFINED_INDEX;
948 $max_index_to_go = UNDEFINED_INDEX;
949 $last_nonblank_index_to_go = UNDEFINED_INDEX;
950 $last_nonblank_type_to_go = '';
951 $last_nonblank_token_to_go = '';
952 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
953 $last_last_nonblank_type_to_go = '';
954 $last_last_nonblank_token_to_go = '';
955 $forced_breakpoint_count = 0;
956 $forced_breakpoint_undo_count = 0;
957 $rbrace_follower = undef;
958 $summed_lengths_to_go[0] = 0;
959 $comma_count_in_batch = 0;
960 $starting_in_quote = 0;
962 destroy_one_line_block();
966 sub keyword_group_scan {
969 # Manipulate blank lines around keyword groups (kgb* flags)
970 # Scan all lines looking for runs of consecutive lines beginning with
971 # selected keywords. Example keywords are 'my', 'our', 'local', ... but
972 # they may be anything. We will set flags requesting that blanks be
973 # inserted around and withing them according to input parameters. Note
974 # that we are scanning the lines as they came in in the input stream, so
975 # they are not necessarily well formatted.
977 # The output of this sub is a return hash ref whose keys are the indexes of
978 # lines after which we desire a blank line. For line index i:
979 # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
980 # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
981 my $rhash_of_desires = {};
983 my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
984 my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
985 my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
986 my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
987 my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
989 # A range of sizes can be input with decimal notation like 'min.max' with
990 # any number of dots between the two numbers. Examples:
991 # string => min max matches
999 my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
1000 if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
1001 || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
1004 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
1005 ignoring all -kgb flags
1007 return $rhash_of_desires;
1009 $Opt_size_min = 1 unless ($Opt_size_min);
1011 if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
1012 return $rhash_of_desires;
1015 # codes for $Opt_blanks_before and $Opt_blanks_after:
1016 # 0 = never (delete if exist)
1017 # 1 = stable (keep unchanged)
1018 # 2 = always (insert if missing)
1020 return $rhash_of_desires
1021 unless $Opt_size_min > 0
1022 && ( $Opt_blanks_before != 1
1023 || $Opt_blanks_after != 1
1024 || $Opt_blanks_inside
1025 || $Opt_blanks_delete );
1027 my $Opt_pattern = $keyword_group_list_pattern;
1028 my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
1029 my $Opt_repeat_count =
1030 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
1032 my $rlines = $self->{rlines};
1033 my $rLL = $self->{rLL};
1034 my $K_closing_container = $self->{K_closing_container};
1036 # variables for the current group and subgroups:
1037 my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
1041 # ($ibeg, $iend) = starting and ending line indexes of this entire group
1042 # $count = total number of keywords seen in this entire group
1043 # $level_beg = indententation level of this group
1044 # @group = [ $i, $token, $count ] =list of all keywords & blanks
1045 # @subgroup = $j, index of group where token changes
1046 # @iblanks = line indexes of blank lines in input stream in this group
1047 # where i=starting line index
1048 # token (the keyword)
1049 # count = number of this token in this subgroup
1050 # j = index in group where token changes
1052 # These vars will contain values for the most recently seen line:
1053 my ( $line_type, $CODE_type, $K_first, $K_last );
1055 my $number_of_groups_seen = 0;
1057 ####################
1058 # helper subroutines
1059 ####################
1061 my $insert_blank_after = sub {
1063 $rhash_of_desires->{$i} = 1;
1065 if ( defined( $rhash_of_desires->{$ip} )
1066 && $rhash_of_desires->{$ip} == 2 )
1068 $rhash_of_desires->{$ip} = 0;
1073 my $split_into_sub_groups = sub {
1075 # place blanks around long sub-groups of keywords
1077 return unless ($Opt_blanks_inside);
1079 # loop over sub-groups, index k
1080 push @subgroup, scalar @group;
1082 my $kend = @subgroup - 1;
1083 for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
1085 # index j runs through all keywords found
1086 my $j_b = $subgroup[ $k - 1 ];
1087 my $j_e = $subgroup[$k] - 1;
1089 # index i is the actual line number of a keyword
1090 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
1091 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
1092 my $num = $count_e - $count_b + 1;
1094 # This subgroup runs from line $ib to line $ie-1, but may contain
1096 if ( $num >= $Opt_size_min ) {
1098 # if there are blank lines, we require that at least $num lines
1099 # be non-blank up to the boundary with the next subgroup.
1100 my $nog_b = my $nog_e = 1;
1101 if ( @iblanks && !$Opt_blanks_delete ) {
1102 my $j_bb = $j_b + $num - 1;
1103 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
1104 $nog_b = $count_bb - $count_b + 1 == $num;
1106 my $j_ee = $j_e - ( $num - 1 );
1107 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
1108 $nog_e = $count_e - $count_ee + 1 == $num;
1110 if ( $nog_b && $k > $kbeg ) {
1111 $insert_blank_after->( $i_b - 1 );
1113 if ( $nog_e && $k < $kend ) {
1114 my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
1115 $insert_blank_after->( $i_ep - 1 );
1121 my $delete_if_blank = sub {
1124 # delete line $i if it is blank
1125 return unless ( $i >= 0 && $i < @{$rlines} );
1126 my $line_type = $rlines->[$i]->{_line_type};
1127 return if ( $line_type ne 'CODE' );
1128 my $code_type = $rlines->[$i]->{_code_type};
1129 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
1133 my $delete_inner_blank_lines = sub {
1135 # always remove unwanted trailing blank lines from our list
1136 return unless (@iblanks);
1137 while ( my $ibl = pop(@iblanks) ) {
1138 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
1142 # now mark mark interior blank lines for deletion if requested
1143 return unless ($Opt_blanks_delete);
1145 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
1149 my $end_group = sub {
1151 # end a group of keywords
1152 my ($bad_ending) = @_;
1153 if ( defined($ibeg) && $ibeg >= 0 ) {
1155 # then handle sufficiently large groups
1156 if ( $count >= $Opt_size_min ) {
1158 $number_of_groups_seen++;
1160 # do any blank deletions regardless of the count
1161 $delete_inner_blank_lines->();
1164 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
1166 # patch for hash bang line which is not currently marked as
1167 # a comment; mark it as a comment
1168 if ( $ibeg == 1 && !$code_type ) {
1169 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
1171 if ( $line_text && $line_text =~ /^#/ );
1174 # Do not insert a blank after a comment
1175 # (this could be subject to a flag in the future)
1176 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
1177 if ( $Opt_blanks_before == INSERT ) {
1178 $insert_blank_after->( $ibeg - 1 );
1181 elsif ( $Opt_blanks_before == DELETE ) {
1182 $delete_if_blank->( $ibeg - 1 );
1187 # We will only put blanks before code lines. We could loosen
1188 # this rule a little, but we have to be very careful because
1189 # for example we certainly don't want to drop a blank line
1190 # after a line like this:
1192 if ( $line_type eq 'CODE' && defined($K_first) ) {
1194 # - Do not put a blank before a line of different level
1195 # - Do not put a blank line if we ended the search badly
1196 # - Do not put a blank at the end of the file
1197 # - Do not put a blank line before a hanging side comment
1198 my $level = $rLL->[$K_first]->[_LEVEL_];
1199 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
1201 if ( $level == $level_beg
1204 && $iend < @{$rlines}
1205 && $CODE_type ne 'HSC' )
1207 if ( $Opt_blanks_after == INSERT ) {
1208 $insert_blank_after->($iend);
1210 elsif ( $Opt_blanks_after == DELETE ) {
1211 $delete_if_blank->( $iend + 1 );
1216 $split_into_sub_groups->();
1219 # reset for another group
1229 my $find_container_end = sub {
1231 # If the keyword lines ends with an open token, find the closing token
1232 # '$K_closing' so that we can easily skip past the contents of the
1234 return if ( $K_last <= $K_first );
1236 my $type_last = $rLL->[$KK]->[_TYPE_];
1237 my $tok_last = $rLL->[$KK]->[_TOKEN_];
1238 if ( $type_last eq '#' ) {
1239 $KK = $self->K_previous_nonblank($KK);
1240 $tok_last = $rLL->[$KK]->[_TOKEN_];
1242 if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
1244 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
1245 my $lev = $rLL->[$KK]->[_LEVEL_];
1246 if ( $lev == $level_beg ) {
1247 $K_closing = $K_closing_container->{$type_sequence};
1252 my $add_to_group = sub {
1253 my ( $i, $token, $level ) = @_;
1255 # End the previous group if we have reached the maximum
1257 if ( $Opt_size_max && @group >= $Opt_size_max ) {
1261 if ( @group == 0 ) {
1263 $level_beg = $level;
1271 if ( !@group || $token ne $group[-1]->[1] ) {
1272 push @subgroup, scalar(@group);
1274 push @group, [ $i, $token, $count ];
1276 # remember if this line ends in an open container
1277 $find_container_end->();
1282 ###################################
1283 # loop over all lines of the source
1284 ###################################
1287 foreach my $line_of_tokens ( @{$rlines} ) {
1291 if ( $Opt_repeat_count > 0
1292 && $number_of_groups_seen >= $Opt_repeat_count );
1297 $line_type = $line_of_tokens->{_line_type};
1299 # always end a group at non-CODE
1300 if ( $line_type ne 'CODE' ) { $end_group->(); next }
1302 $CODE_type = $line_of_tokens->{_code_type};
1304 # end any group at a format skipping line
1305 if ( $CODE_type && $CODE_type eq 'FS' ) {
1310 # continue in a verbatim (VB) type; it may be quoted text
1311 if ( $CODE_type eq 'VB' ) {
1312 if ( $ibeg >= 0 ) { $iend = $i; }
1316 # and continue in blank (BL) types
1317 if ( $CODE_type eq 'BL' ) {
1320 push @{iblanks}, $i;
1322 # propagate current subgroup token
1323 my $tok = $group[-1]->[1];
1324 push @group, [ $i, $tok, $count ];
1329 # examine the first token of this line
1330 my $rK_range = $line_of_tokens->{_rK_range};
1331 ( $K_first, $K_last ) = @{$rK_range};
1332 if ( !defined($K_first) ) {
1334 # Unexpected blank line..shouldn't happen
1335 # $rK_range should be defined for line type CODE
1337 "Programming Error: Unexpected Blank Line in sub 'keyword_group_scan'. Ignoring"
1339 return $rhash_of_desires;
1342 my $level = $rLL->[$K_first]->[_LEVEL_];
1343 my $type = $rLL->[$K_first]->[_TYPE_];
1344 my $token = $rLL->[$K_first]->[_TOKEN_];
1345 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
1347 # see if this is a code type we seek (i.e. comment)
1349 && $Opt_comment_pattern
1350 && $CODE_type =~ /$Opt_comment_pattern/o )
1353 my $tok = $CODE_type;
1355 # Continuing a group
1356 if ( $ibeg >= 0 && $level == $level_beg ) {
1357 $add_to_group->( $i, $tok, $level );
1363 # first end old group if any; we might be starting new
1364 # keywords at different level
1365 if ( $ibeg > 0 ) { $end_group->(); }
1366 $add_to_group->( $i, $tok, $level );
1371 # See if it is a keyword we seek, but never start a group in a
1372 # continuation line; the code may be badly formatted.
1375 && $token =~ /$Opt_pattern/o )
1378 # Continuing a keyword group
1379 if ( $ibeg >= 0 && $level == $level_beg ) {
1380 $add_to_group->( $i, $token, $level );
1383 # Start new keyword group
1386 # first end old group if any; we might be starting new
1387 # keywords at different level
1388 if ( $ibeg > 0 ) { $end_group->(); }
1389 $add_to_group->( $i, $token, $level );
1394 # This is not one of our keywords, but we are in a keyword group
1395 # so see if we should continue or quit
1396 elsif ( $ibeg >= 0 ) {
1398 # - bail out on a large level change; we may have walked into a
1399 # data structure or anoymous sub code.
1400 if ( $level > $level_beg + 1 || $level < $level_beg ) {
1405 # - keep going on a continuation line of the same level, since
1406 # it is probably a continuation of our previous keyword,
1407 # - and keep going past hanging side comments because we never
1408 # want to interrupt them.
1409 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
1410 || $CODE_type eq 'HSC' )
1416 # - continue if if we are within in a container which started with
1417 # the line of the previous keyword.
1418 if ( defined($K_closing) && $K_first <= $K_closing ) {
1420 # continue if entire line is within container
1421 if ( $K_last <= $K_closing ) { $iend = $i; next }
1423 # continue at ); or }; or ];
1424 my $KK = $K_closing + 1;
1425 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
1426 if ( $KK < $K_last ) {
1427 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
1428 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
1441 # - end the group if none of the above
1446 # not in a keyword group; continue
1450 # end of loop over all lines
1452 return $rhash_of_desires;
1457 # Loop over old lines to set new line break points
1460 my $rlines = $self->{rlines};
1462 # Note for RT#118553, leave only one newline at the end of a file.
1463 # Example code to do this is in comments below:
1464 # my $Opt_trim_ending_blank_lines = 0;
1465 # if ($Opt_trim_ending_blank_lines) {
1466 # while ( my $line_of_tokens = pop @{$rlines} ) {
1467 # my $line_type = $line_of_tokens->{_line_type};
1468 # if ( $line_type eq 'CODE' ) {
1469 # my $CODE_type = $line_of_tokens->{_code_type};
1470 # next if ( $CODE_type eq 'BL' );
1472 # push @{$rlines}, $line_of_tokens;
1477 # But while this would be a trivial update, it would have very undesirable
1478 # side effects when perltidy is run from within an editor on a small snippet.
1479 # So this is best done with a separate filter, such
1480 # as 'delete_ending_blank_lines.pl' in the examples folder.
1482 # Flag to prevent blank lines when POD occurs in a format skipping sect.
1483 my $in_format_skipping_section;
1485 # set locations for blanks around long runs of keywords
1486 my $rwant_blank_line_after = $self->keyword_group_scan();
1490 foreach my $line_of_tokens ( @{$rlines} ) {
1493 # insert blank lines requested for keyword sequences
1495 && defined( $rwant_blank_line_after->{ $i - 1 } )
1496 && $rwant_blank_line_after->{ $i - 1 } == 1 )
1498 $self->want_blank_line();
1501 my $last_line_type = $line_type;
1502 $line_type = $line_of_tokens->{_line_type};
1503 my $input_line = $line_of_tokens->{_line_text};
1505 # _line_type codes are:
1506 # SYSTEM - system-specific code before hash-bang line
1507 # CODE - line of perl code (including comments)
1508 # POD_START - line starting pod, such as '=head'
1509 # POD - pod documentation text
1510 # POD_END - last line of pod section, '=cut'
1511 # HERE - text of here-document
1512 # HERE_END - last line of here-doc (target word)
1513 # FORMAT - format section
1514 # FORMAT_END - last line of format section, '.'
1515 # DATA_START - __DATA__ line
1516 # DATA - unidentified text following __DATA__
1517 # END_START - __END__ line
1518 # END - unidentified text following __END__
1519 # ERROR - we are in big trouble, probably not a perl script
1521 # put a blank line after an =cut which comes before __END__ and __DATA__
1522 # (required by podchecker)
1523 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
1524 $file_writer_object->reset_consecutive_blank_lines();
1525 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
1526 $self->want_blank_line();
1530 # handle line of code..
1531 if ( $line_type eq 'CODE' ) {
1533 my $CODE_type = $line_of_tokens->{_code_type};
1534 $in_format_skipping_section = $CODE_type eq 'FS';
1536 # Handle blank lines
1537 if ( $CODE_type eq 'BL' ) {
1539 # If keep-old-blank-lines is zero, we delete all
1540 # old blank lines and let the blank line rules generate any
1543 # We also delete lines requested by the keyword-group logic
1544 my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
1545 && $rwant_blank_line_after->{$i} == 2 );
1547 # But the keep-old-blank-lines flag has priority over kgb flags
1548 $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
1550 if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
1552 $file_writer_object->write_blank_code_line(
1553 $rOpts_keep_old_blank_lines == 2 );
1554 $last_line_leading_type = 'b';
1560 # let logger see all non-blank lines of code
1561 my $output_line_number = get_output_line_number();
1562 black_box( $line_of_tokens, $output_line_number );
1565 # Handle Format Skipping (FS) and Verbatim (VB) Lines
1566 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
1567 $self->write_unindented_line("$input_line");
1568 $file_writer_object->reset_consecutive_blank_lines();
1572 # Handle all other lines of code
1573 $self->print_line_of_tokens($line_of_tokens);
1576 # handle line of non-code..
1582 if ( $line_type =~ /^POD/ ) {
1584 # Pod docs should have a preceding blank line. But stay
1585 # out of __END__ and __DATA__ sections, because
1586 # the user may be using this section for any purpose whatsoever
1587 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
1588 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
1589 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
1591 && !$in_format_skipping_section
1592 && $line_type eq 'POD_START'
1593 && !$saw_END_or_DATA_ )
1595 $self->want_blank_line();
1599 # leave the blank counters in a predictable state
1600 # after __END__ or __DATA__
1601 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
1602 $file_writer_object->reset_consecutive_blank_lines();
1603 $saw_END_or_DATA_ = 1;
1606 # write unindented non-code line
1607 if ( !$skip_line ) {
1608 if ($tee_line) { $file_writer_object->tee_on() }
1609 $self->write_unindented_line($input_line);
1610 if ($tee_line) { $file_writer_object->tee_off() }
1617 { ## Beginning of routine to check line hashes
1619 my %valid_line_hash;
1623 # These keys are defined for each line in the formatter
1624 # Each line must have exactly these quantities
1625 my @valid_line_keys = qw(
1628 _guessed_indentation_level
1635 _square_bracket_depth
1637 _ended_in_blank_token
1646 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1649 sub check_line_hashes {
1651 $self->check_self_hash();
1652 my $rlines = $self->{rlines};
1653 foreach my $rline ( @{$rlines} ) {
1654 my $iline = $rline->{_line_number};
1655 my $line_type = $rline->{_line_type};
1656 check_keys( $rline, \%valid_line_hash,
1657 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1662 } ## End check line hashes
1666 # We are caching tokenized lines as they arrive and converting them to the
1667 # format needed for the final formatting.
1668 my ( $self, $line_of_tokens_old ) = @_;
1669 my $rLL = $self->{rLL};
1670 my $Klimit = $self->{Klimit};
1671 my $rlines_new = $self->{rlines};
1674 my $line_of_tokens = {};
1679 _guessed_indentation_level
1685 _square_bracket_depth
1690 $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
1693 # Data needed by Logger
1694 $line_of_tokens->{_level_0} = 0;
1695 $line_of_tokens->{_ci_level_0} = 0;
1696 $line_of_tokens->{_nesting_blocks_0} = "";
1697 $line_of_tokens->{_nesting_tokens_0} = "";
1699 # Needed to avoid trimming quotes
1700 $line_of_tokens->{_ended_in_blank_token} = undef;
1702 my $line_type = $line_of_tokens_old->{_line_type};
1703 my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
1704 if ( $line_type eq 'CODE' ) {
1706 my $rtokens = $line_of_tokens_old->{_rtokens};
1707 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
1708 my $rblock_type = $line_of_tokens_old->{_rblock_type};
1709 my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
1710 my $rcontainer_environment =
1711 $line_of_tokens_old->{_rcontainer_environment};
1712 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
1713 my $rlevels = $line_of_tokens_old->{_rlevels};
1714 my $rslevels = $line_of_tokens_old->{_rslevels};
1715 my $rci_levels = $line_of_tokens_old->{_rci_levels};
1716 my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
1717 my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
1719 my $jmax = @{$rtokens} - 1;
1721 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
1722 foreach my $j ( 0 .. $jmax ) {
1726 _BLOCK_TYPE_, _CONTAINER_TYPE_,
1727 _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
1728 _LEVEL_, _LEVEL_TRUE_,
1729 _SLEVEL_, _CI_LEVEL_,
1733 $rtokens->[$j], $rtoken_type->[$j],
1734 $rblock_type->[$j], $rcontainer_type->[$j],
1735 $rcontainer_environment->[$j], $rtype_sequence->[$j],
1736 $rlevels->[$j], $rlevels->[$j],
1737 $rslevels->[$j], $rci_levels->[$j],
1740 push @{$rLL}, \@tokary;
1743 $Klimit = @{$rLL} - 1;
1745 # Need to remember if we can trim the input line
1746 $line_of_tokens->{_ended_in_blank_token} =
1747 $rtoken_type->[$jmax] eq 'b';
1749 $line_of_tokens->{_level_0} = $rlevels->[0];
1750 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
1751 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
1752 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
1756 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
1757 $line_of_tokens->{_code_type} = "";
1758 $self->{Klimit} = $Klimit;
1760 push @{$rlines_new}, $line_of_tokens;
1764 sub initialize_whitespace_hashes {
1766 # initialize these global hashes, which control the use of
1767 # whitespace around tokens:
1772 # %space_after_keyword
1774 # Many token types are identical to the tokens themselves.
1775 # See the tokenizer for a complete list. Here are some special types:
1777 # f = semicolon in for statement
1780 # Note that :: is excluded since it should be contained in an identifier
1781 # Note that '->' is excluded because it never gets space
1782 # parentheses and brackets are excluded since they are handled specially
1783 # curly braces are included but may be overridden by logic, such as
1786 # NEW_TOKENS: create a whitespace rule here. This can be as
1787 # simple as adding your new letter to @spaces_both_sides, for
1790 my @opening_type = qw< L { ( [ >;
1791 @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
1793 my @closing_type = qw< R } ) ] >;
1794 @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
1796 my @spaces_both_sides = qw#
1797 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
1798 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
1799 &&= ||= //= <=> A k f w F n C Y U G v
1802 my @spaces_left_side = qw<
1803 t ! ~ m p { \ h pp mm Z j
1805 push( @spaces_left_side, '#' ); # avoids warning message
1807 my @spaces_right_side = qw<
1808 ; } ) ] R J ++ -- **=
1810 push( @spaces_right_side, ',' ); # avoids warning message
1812 # Note that we are in a BEGIN block here. Later in processing
1813 # the values of %want_left_space and %want_right_space
1814 # may be overridden by any user settings specified by the
1815 # -wls and -wrs parameters. However the binary_whitespace_rules
1816 # are hardwired and have priority.
1817 @want_left_space{@spaces_both_sides} =
1818 (1) x scalar(@spaces_both_sides);
1819 @want_right_space{@spaces_both_sides} =
1820 (1) x scalar(@spaces_both_sides);
1821 @want_left_space{@spaces_left_side} =
1822 (1) x scalar(@spaces_left_side);
1823 @want_right_space{@spaces_left_side} =
1824 (-1) x scalar(@spaces_left_side);
1825 @want_left_space{@spaces_right_side} =
1826 (-1) x scalar(@spaces_right_side);
1827 @want_right_space{@spaces_right_side} =
1828 (1) x scalar(@spaces_right_side);
1829 $want_left_space{'->'} = WS_NO;
1830 $want_right_space{'->'} = WS_NO;
1831 $want_left_space{'**'} = WS_NO;
1832 $want_right_space{'**'} = WS_NO;
1833 $want_right_space{'CORE::'} = WS_NO;
1835 # These binary_ws_rules are hardwired and have priority over the above
1836 # settings. It would be nice to allow adjustment by the user,
1837 # but it would be complicated to specify.
1839 # hash type information must stay tightly bound
1841 $binary_ws_rules{'i'}{'L'} = WS_NO;
1842 $binary_ws_rules{'i'}{'{'} = WS_YES;
1843 $binary_ws_rules{'k'}{'{'} = WS_YES;
1844 $binary_ws_rules{'U'}{'{'} = WS_YES;
1845 $binary_ws_rules{'i'}{'['} = WS_NO;
1846 $binary_ws_rules{'R'}{'L'} = WS_NO;
1847 $binary_ws_rules{'R'}{'{'} = WS_NO;
1848 $binary_ws_rules{'t'}{'L'} = WS_NO;
1849 $binary_ws_rules{'t'}{'{'} = WS_NO;
1850 $binary_ws_rules{'}'}{'L'} = WS_NO;
1851 $binary_ws_rules{'}'}{'{'} = WS_NO;
1852 $binary_ws_rules{'$'}{'L'} = WS_NO;
1853 $binary_ws_rules{'$'}{'{'} = WS_NO;
1854 $binary_ws_rules{'@'}{'L'} = WS_NO;
1855 $binary_ws_rules{'@'}{'{'} = WS_NO;
1856 $binary_ws_rules{'='}{'L'} = WS_YES;
1857 $binary_ws_rules{'J'}{'J'} = WS_YES;
1859 # the following includes ') {'
1860 # as in : if ( xxx ) { yyy }
1861 $binary_ws_rules{']'}{'L'} = WS_NO;
1862 $binary_ws_rules{']'}{'{'} = WS_NO;
1863 $binary_ws_rules{')'}{'{'} = WS_YES;
1864 $binary_ws_rules{')'}{'['} = WS_NO;
1865 $binary_ws_rules{']'}{'['} = WS_NO;
1866 $binary_ws_rules{']'}{'{'} = WS_NO;
1867 $binary_ws_rules{'}'}{'['} = WS_NO;
1868 $binary_ws_rules{'R'}{'['} = WS_NO;
1870 $binary_ws_rules{']'}{'++'} = WS_NO;
1871 $binary_ws_rules{']'}{'--'} = WS_NO;
1872 $binary_ws_rules{')'}{'++'} = WS_NO;
1873 $binary_ws_rules{')'}{'--'} = WS_NO;
1875 $binary_ws_rules{'R'}{'++'} = WS_NO;
1876 $binary_ws_rules{'R'}{'--'} = WS_NO;
1878 $binary_ws_rules{'i'}{'Q'} = WS_YES;
1879 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
1881 # FIXME: we could to split 'i' into variables and functions
1882 # and have no space for functions but space for variables. For now,
1883 # I have a special patch in the special rules below
1884 $binary_ws_rules{'i'}{'('} = WS_NO;
1886 $binary_ws_rules{'w'}{'('} = WS_NO;
1887 $binary_ws_rules{'w'}{'{'} = WS_YES;
1890 } ## end initialize_whitespace_hashes
1892 sub set_whitespace_flags {
1894 # This routine examines each pair of nonblank tokens and
1895 # sets a flag indicating if white space is needed.
1897 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
1898 # BEFORE token $j is needed, with the following values:
1900 # WS_NO = -1 do not want a space before token $j
1901 # WS_OPTIONAL= 0 optional space or $j is a whitespace
1902 # WS_YES = 1 want a space before token $j
1906 my $rLL = $self->{rLL};
1908 my $rwhitespace_flags = [];
1910 my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
1911 $token, $type, $block_type, $input_line_no );
1912 my $j_tight_closing_paren = -1;
1920 $last_block_type = '';
1921 $last_input_line_no = 0;
1923 my $jmax = @{$rLL} - 1;
1927 # This is some logic moved to a sub to avoid deep nesting of if stmts
1928 my $ws_in_container = sub {
1932 if ( $j + 1 > $jmax ) { return (WS_NO) }
1934 # Patch to count '-foo' as single token so that
1935 # each of $a{-foo} and $a{foo} and $a{'foo'} do
1936 # not get spaces with default formatting.
1940 && $last_token eq '{'
1941 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
1943 # $j_next is where a closing token should be if
1944 # the container has a single token
1945 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
1947 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
1951 if ( $j_next > $jmax ) { return WS_NO }
1952 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
1953 my $type_next = $rLL->[$j_next]->[_TYPE_];
1955 # for tightness = 1, if there is just one token
1956 # within the matching pair, we will keep it tight
1958 $tok_next eq $matching_token{$last_token}
1960 # but watch out for this: [ [ ] (misc.t)
1961 && $last_token ne $token
1963 # double diamond is usually spaced
1969 # remember where to put the space for the closing paren
1970 $j_tight_closing_paren = $j_next;
1976 # main loop over all tokens to define the whitespace flags
1977 for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
1979 my $rtokh = $rLL->[$j];
1982 $rwhitespace_flags->[$j] = WS_OPTIONAL;
1984 if ( $rtokh->[_TYPE_] eq 'b' ) {
1988 # set a default value, to be changed as needed
1990 $last_token = $token;
1992 $last_block_type = $block_type;
1993 $last_input_line_no = $input_line_no;
1994 $token = $rtokh->[_TOKEN_];
1995 $type = $rtokh->[_TYPE_];
1996 $block_type = $rtokh->[_BLOCK_TYPE_];
1997 $input_line_no = $rtokh->[_LINE_INDEX_];
1999 #---------------------------------------------------------------
2000 # Whitespace Rules Section 1:
2001 # Handle space on the inside of opening braces.
2002 #---------------------------------------------------------------
2005 if ( $is_opening_type{$last_type} ) {
2007 $j_tight_closing_paren = -1;
2009 # let us keep empty matched braces together: () {} []
2011 if ( $token eq $matching_token{$last_token} ) {
2021 # we're considering the right of an opening brace
2022 # tightness = 0 means always pad inside with space
2023 # tightness = 1 means pad inside if "complex"
2024 # tightness = 2 means never pad inside with space
2027 if ( $last_type eq '{'
2028 && $last_token eq '{'
2029 && $last_block_type )
2031 $tightness = $rOpts_block_brace_tightness;
2033 else { $tightness = $tightness{$last_token} }
2035 #=============================================================
2036 # Patch for test problem <<snippets/fabrice_bug.in>>
2037 # We must always avoid spaces around a bare word beginning
2039 # my $before = ${^PREMATCH};
2040 # Because all of the following cause an error in perl:
2041 # my $before = ${ ^PREMATCH };
2042 # my $before = ${ ^PREMATCH};
2043 # my $before = ${^PREMATCH };
2044 # So if brace tightness flag is -bt=0 we must temporarily reset
2045 # to bt=1. Note that here we must set tightness=1 and not 2 so
2046 # that the closing space
2047 # is also avoided (via the $j_tight_closing_paren flag in coding)
2048 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2050 #=============================================================
2052 if ( $tightness <= 0 ) {
2055 elsif ( $tightness > 1 ) {
2059 $ws = $ws_in_container->($j);
2062 } # end setting space flag inside opening tokens
2065 if FORMATTER_DEBUG_FLAG_WHITE;
2067 #---------------------------------------------------------------
2068 # Whitespace Rules Section 2:
2069 # Handle space on inside of closing brace pairs.
2070 #---------------------------------------------------------------
2073 if ( $is_closing_type{$type} ) {
2075 if ( $j == $j_tight_closing_paren ) {
2077 $j_tight_closing_paren = -1;
2082 if ( !defined($ws) ) {
2085 if ( $type eq '}' && $token eq '}' && $block_type ) {
2086 $tightness = $rOpts_block_brace_tightness;
2088 else { $tightness = $tightness{$token} }
2090 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2093 } # end setting space flag inside closing tokens
2097 if FORMATTER_DEBUG_FLAG_WHITE;
2099 #---------------------------------------------------------------
2100 # Whitespace Rules Section 3:
2101 # Use the binary rule table.
2102 #---------------------------------------------------------------
2103 if ( !defined($ws) ) {
2104 $ws = $binary_ws_rules{$last_type}{$type};
2108 if FORMATTER_DEBUG_FLAG_WHITE;
2110 #---------------------------------------------------------------
2111 # Whitespace Rules Section 4:
2112 # Handle some special cases.
2113 #---------------------------------------------------------------
2114 if ( $token eq '(' ) {
2116 # This will have to be tweaked as tokenization changes.
2117 # We usually want a space at '} (', for example:
2118 # <<snippets/space1.in>>
2119 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2122 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2123 # At present, the above & block is marked as type L/R so this case
2124 # won't go through here.
2125 if ( $last_type eq '}' ) { $ws = WS_YES }
2127 # NOTE: some older versions of Perl had occasional problems if
2128 # spaces are introduced between keywords or functions and opening
2129 # parens. So the default is not to do this except is certain
2130 # cases. The current Perl seems to tolerate spaces.
2132 # Space between keyword and '('
2133 elsif ( $last_type eq 'k' ) {
2135 unless ( $rOpts_space_keyword_paren
2136 || $space_after_keyword{$last_token} );
2139 # Space between function and '('
2140 # -----------------------------------------------------
2141 # 'w' and 'i' checks for something like:
2142 # myfun( &myfun( ->myfun(
2143 # -----------------------------------------------------
2144 elsif (( $last_type =~ /^[wUG]$/ )
2145 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
2147 $ws = WS_NO unless ($rOpts_space_function_paren);
2150 # space between something like $i and ( in <<snippets/space2.in>>
2151 # for $i ( 0 .. 20 ) {
2152 # FIXME: eventually, type 'i' needs to be split into multiple
2153 # token types so this can be a hardwired rule.
2154 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2158 # allow constant function followed by '()' to retain no space
2159 elsif ($last_type eq 'C'
2160 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2166 # patch for SWITCH/CASE: make space at ']{' optional
2167 # since the '{' might begin a case or when block
2168 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2172 # keep space between 'sub' and '{' for anonymous sub definition
2173 if ( $type eq '{' ) {
2174 if ( $last_token eq 'sub' ) {
2178 # this is needed to avoid no space in '){'
2179 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
2181 # avoid any space before the brace or bracket in something like
2182 # @opts{'a','b',...}
2183 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
2188 elsif ( $type eq 'i' ) {
2190 # never a space before ->
2191 if ( $token =~ /^\-\>/ ) {
2196 # retain any space between '-' and bare word
2197 elsif ( $type eq 'w' || $type eq 'C' ) {
2198 $ws = WS_OPTIONAL if $last_type eq '-';
2200 # never a space before ->
2201 if ( $token =~ /^\-\>/ ) {
2206 # retain any space between '-' and bare word; for example
2207 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2208 # $myhash{USER-NAME}='steve';
2209 elsif ( $type eq 'm' || $type eq '-' ) {
2210 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2213 # always space before side comment
2214 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2216 # always preserver whatever space was used after a possible
2217 # filehandle (except _) or here doc operator
2220 && ( ( $last_type eq 'Z' && $last_token ne '_' )
2221 || $last_type eq 'h' )
2227 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
2228 # allow a space between a backslash and single or double quote
2229 # to avoid fooling html formatters
2230 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
2231 if ($rOpts_space_backslash_quote) {
2232 if ( $rOpts_space_backslash_quote == 1 ) {
2235 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2236 else { } # shouldnt happen
2245 if FORMATTER_DEBUG_FLAG_WHITE;
2247 #---------------------------------------------------------------
2248 # Whitespace Rules Section 5:
2249 # Apply default rules not covered above.
2250 #---------------------------------------------------------------
2252 # If we fall through to here, look at the pre-defined hash tables for
2253 # the two tokens, and:
2254 # if (they are equal) use the common value
2255 # if (either is zero or undef) use the other
2256 # if (either is -1) use it
2270 if ( !defined($ws) ) {
2271 my $wl = $want_left_space{$type};
2272 my $wr = $want_right_space{$last_type};
2273 if ( !defined($wl) ) { $wl = 0 }
2274 if ( !defined($wr) ) { $wr = 0 }
2275 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
2278 if ( !defined($ws) ) {
2281 "WS flag is undefined for tokens $last_token $token\n");
2284 # Treat newline as a whitespace. Otherwise, we might combine
2285 # 'Send' and '-recipients' here according to the above rules:
2286 # <<snippets/space3.in>>
2287 # my $msg = new Fax::Send
2288 # -recipients => $to,
2290 if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
2295 && ( $last_type !~ /^[Zh]$/ ) )
2298 # If this happens, we have a non-fatal but undesirable
2299 # hole in the above rules which should be patched.
2301 "WS flag is zero for tokens $last_token $token\n");
2304 $rwhitespace_flags->[$j] = $ws;
2306 FORMATTER_DEBUG_FLAG_WHITE && do {
2307 my $str = substr( $last_token, 0, 15 );
2308 $str .= ' ' x ( 16 - length($str) );
2309 if ( !defined($ws_1) ) { $ws_1 = "*" }
2310 if ( !defined($ws_2) ) { $ws_2 = "*" }
2311 if ( !defined($ws_3) ) { $ws_3 = "*" }
2312 if ( !defined($ws_4) ) { $ws_4 = "*" }
2314 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
2318 if ( $rOpts->{'tight-secret-operators'} ) {
2319 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
2321 return $rwhitespace_flags;
2322 } ## end sub set_whitespace_flags
2324 sub respace_tokens {
2327 return if $rOpts->{'indent-only'};
2329 # This routine makes all necessary changes to the tokenization after the
2330 # file has been read. This consists mostly of inserting and deleting spaces
2331 # according to the selected parameters. In a few cases non-space characters
2332 # are added, deleted or modified.
2334 # The old tokens are copied one-by-one, with changes, from the old
2335 # linear storage array to a new array.
2337 my $rLL = $self->{rLL};
2338 my $Klimit_old = $self->{Klimit};
2339 my $rlines = $self->{rlines};
2340 my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
2342 my $rLL_new = []; # This is the new array
2345 my $Kmax = @{$rLL} - 1;
2347 # Set the whitespace flags, which indicate the token spacing preference.
2348 my $rwhitespace_flags = $self->set_whitespace_flags();
2350 # we will be setting token lengths as we go
2351 my $cumulative_length = 0;
2353 # We also define these hash indexes giving container token array indexes
2354 # as a function of the container sequence numbers. For example,
2355 my $K_opening_container = {}; # opening [ { or (
2356 my $K_closing_container = {}; # closing ] } or )
2357 my $K_opening_ternary = {}; # opening ? of ternary
2358 my $K_closing_ternary = {}; # closing : of ternary
2360 # List of new K indexes of phantom semicolons
2361 # This will be needed if we want to undo them for iterations
2362 my $rK_phantom_semicolons = [];
2364 # Temporary hashes for adding semicolons
2365 ##my $rKfirst_new = {};
2367 # a sub to link preceding nodes forward to a new node type
2368 my $link_back = sub {
2369 my ( $Ktop, $key ) = @_;
2371 my $Kprev = $Ktop - 1;
2373 && !defined( $rLL_new->[$Kprev]->[$key] ) )
2375 $rLL_new->[$Kprev]->[$key] = $Ktop;
2380 # A sub to store one token in the new array
2381 # All new tokens must be stored by this sub so that it can update
2382 # all data structures on the fly.
2383 my $last_nonblank_type = ';';
2384 my $store_token = sub {
2387 # This will be the index of this item in the new array
2388 my $KK_new = @{$rLL_new};
2390 # check for a sequenced item (i.e., container or ?/:)
2391 my $type_sequence = $item->[_TYPE_SEQUENCE_];
2392 if ($type_sequence) {
2394 $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
2396 my $token = $item->[_TOKEN_];
2397 if ( $is_opening_token{$token} ) {
2399 $K_opening_container->{$type_sequence} = $KK_new;
2401 elsif ( $is_closing_token{$token} ) {
2403 $K_closing_container->{$type_sequence} = $KK_new;
2406 # These are not yet used but could be useful
2408 if ( $token eq '?' ) {
2409 $K_opening_ternary->{$type_sequence} = $KK_new;
2411 elsif ( $token eq ':' ) {
2412 $K_closing_ternary->{$type_sequence} = $KK_new;
2416 Fault("Ugh: shouldn't happen");
2421 # find the length of this token
2422 my $token_length = length( $item->[_TOKEN_] );
2424 # and update the cumulative length
2425 $cumulative_length += $token_length;
2427 # Save the length sum to just AFTER this token
2428 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
2430 my $type = $item->[_TYPE_];
2431 if ( $type ne 'b' ) { $last_nonblank_type = $type }
2433 # and finally, add this item to the new array
2434 push @{$rLL_new}, $item;
2437 my $store_token_and_space = sub {
2438 my ( $item, $want_space ) = @_;
2440 # store a token with preceding space if requested and needed
2442 # First store the space
2445 && $rLL_new->[-1]->[_TYPE_] ne 'b'
2446 && $rOpts_add_whitespace )
2448 my $rcopy = copy_token_as_type( $item, 'b', ' ' );
2449 $rcopy->[_LINE_INDEX_] =
2450 $rLL_new->[-1]->[_LINE_INDEX_];
2451 $store_token->($rcopy);
2455 $store_token->($item);
2461 my $Kn = $self->K_next_nonblank($KK);
2462 while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
2464 $Kn = $self->K_next_nonblank($Kn);
2469 my $add_phantom_semicolon = sub {
2473 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
2474 return unless ( defined($Kp) );
2476 # we are only adding semicolons for certain block types
2477 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
2479 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
2480 || $block_type =~ /^(sub|package)/
2481 || $block_type =~ /^\w+\:$/ );
2483 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
2485 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
2486 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
2488 # Do not add a semicolon if...
2492 # it would follow a comment (and be isolated)
2493 $previous_nonblank_type eq '#'
2495 # it follows a code block ( because they are not always wanted
2496 # there and may add clutter)
2497 || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
2499 # it would follow a label
2500 || $previous_nonblank_type eq 'J'
2502 # it would be inside a 'format' statement (and cause syntax error)
2503 || ( $previous_nonblank_type eq 'k'
2504 && $previous_nonblank_token =~ /format/ )
2506 # if it would prevent welding two containers
2507 || $rpaired_to_inner_container->{$type_sequence}
2511 # We will insert an empty semicolon here as a placeholder. Later, if
2512 # it becomes the last token on a line, we will bring it to life. The
2513 # advantage of doing this is that (1) we just have to check line
2514 # endings, and (2) the phantom semicolon has zero width and therefore
2515 # won't cause needless breaks of one-line blocks.
2517 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
2518 && $want_left_space{';'} == WS_NO )
2521 # convert the blank into a semicolon..
2522 # be careful: we are working on the new stack top
2523 # on a token which has been stored.
2524 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
2526 # Convert the existing blank to:
2527 # a phantom semicolon for one_line_block option = 0 or 1
2528 # a real semicolon for one_line_block option = 2
2529 my $tok = $rOpts_one_line_block_semicolons == 2 ? ';' : '';
2531 $rLL_new->[$Ktop]->[_TOKEN_] = $tok; # zero length if phantom
2532 $rLL_new->[$Ktop]->[_TYPE_] = ';';
2533 $rLL_new->[$Ktop]->[_SLEVEL_] =
2534 $rLL->[$KK]->[_SLEVEL_];
2536 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
2538 # Then store a new blank
2539 $store_token->($rcopy);
2543 # insert a new token
2544 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
2545 $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
2546 $store_token->($rcopy);
2547 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
2553 # Check that a quote looks okay
2554 # This sub works but needs to by sync'd with the log file output
2555 # before it can be used.
2556 my ( $KK, $Kfirst ) = @_;
2557 my $token = $rLL->[$KK]->[_TOKEN_];
2558 note_embedded_tab() if ( $token =~ "\t" );
2560 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
2561 return unless ( defined($Kp) );
2562 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
2563 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
2565 my $previous_nonblank_type_2 = 'b';
2566 my $previous_nonblank_token_2 = "";
2567 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
2568 if ( defined($Kpp) ) {
2569 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
2570 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
2573 my $Kn = $self->K_next_nonblank($KK);
2574 my $next_nonblank_token = "";
2575 if ( defined($Kn) ) {
2576 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
2579 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
2580 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
2582 # make note of something like '$var = s/xxx/yyy/;'
2583 # in case it should have been '$var =~ s/xxx/yyy/;'
2585 $token =~ /^(s|tr|y|m|\/)/
2586 && $previous_nonblank_token =~ /^(=|==|!=)$/
2588 # preceded by simple scalar
2589 && $previous_nonblank_type_2 eq 'i'
2590 && $previous_nonblank_token_2 =~ /^\$/
2592 # followed by some kind of termination
2593 # (but give complaint if we can not see far enough ahead)
2594 && $next_nonblank_token =~ /^[; \)\}]$/
2596 # scalar is not declared
2597 && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
2600 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
2602 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
2607 # Main loop over all lines of the file
2612 # Testing option to break qw. Do not use; it can make a mess.
2613 my $ALLOW_BREAK_MULTILINE_QW = 0;
2614 my $in_multiline_qw;
2615 foreach my $line_of_tokens ( @{$rlines} ) {
2617 $input_line_number = $line_of_tokens->{_line_number};
2618 my $last_line_type = $line_type;
2619 $line_type = $line_of_tokens->{_line_type};
2620 next unless ( $line_type eq 'CODE' );
2621 my $last_CODE_type = $CODE_type;
2622 $CODE_type = $line_of_tokens->{_code_type};
2623 my $rK_range = $line_of_tokens->{_rK_range};
2624 my ( $Kfirst, $Klast ) = @{$rK_range};
2625 next unless defined($Kfirst);
2627 # Check for correct sequence of token indexes...
2628 # An error here means that sub write_line() did not correctly
2629 # package the tokenized lines as it received them.
2630 if ( defined($last_K_out) ) {
2631 if ( $Kfirst != $last_K_out + 1 ) {
2633 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
2638 if ( $Kfirst != 0 ) {
2639 Fault("Program Bug: first K is $Kfirst but should be 0");
2642 $last_K_out = $Klast;
2644 # Handle special lines of code
2645 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
2647 # CODE_types are as follows.
2649 # 'VB' = Verbatim - line goes out verbatim
2650 # 'FS' = Format Skipping - line goes out verbatim, no blanks
2651 # 'IO' = Indent Only - only indentation may be changed
2652 # 'NIN' = No Internal Newlines - line does not get broken
2653 # 'HSC'=Hanging Side Comment - fix this hanging side comment
2654 # 'BC'=Block Comment - an ordinary full line comment
2655 # 'SBC'=Static Block Comment - a block comment which does not get
2657 # 'SBCX'=Static Block Comment Without Leading Space
2658 # 'DEL'=Delete this line
2659 # 'VER'=VERSION statement
2660 # '' or (undefined) - no restructions
2662 # For a hanging side comment we insert an empty quote before
2663 # the comment so that it becomes a normal side comment and
2664 # will be aligned by the vertical aligner
2665 if ( $CODE_type eq 'HSC' ) {
2667 # Safety Check: This must be a line with one token (a comment)
2668 my $rtoken_vars = $rLL->[$Kfirst];
2669 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
2671 # Note that even if the flag 'noadd-whitespace' is set, we
2672 # will make an exception here and allow a blank to be
2673 # inserted to push the comment to the right. We can think
2674 # of this as an adjustment of indentation rather than
2675 # whitespace between tokens. This will also prevent the
2676 # hanging side comment from getting converted to a block
2677 # comment if whitespace gets deleted, as for example with
2678 # the -extrude and -mangle options.
2679 my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
2680 $store_token->($rcopy);
2681 $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
2682 $store_token->($rcopy);
2683 $store_token->($rtoken_vars);
2688 # This line was mis-marked by sub scan_comment
2690 "Program bug. A hanging side comment has been mismarked"
2695 # Copy tokens unchanged
2696 foreach my $KK ( $Kfirst .. $Klast ) {
2697 $store_token->( $rLL->[$KK] );
2702 # Handle normal line..
2704 # Insert any essential whitespace between lines
2705 # if last line was normal CODE.
2706 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
2707 # because comments may disappear.
2708 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
2709 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
2710 my $Kp = $self->K_previous_code( undef, $rLL_new );
2711 if ( $last_line_type eq 'CODE'
2712 && $type_next ne 'b'
2715 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
2716 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
2718 my ( $token_pp, $type_pp );
2719 my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
2720 if ( defined($Kpp) ) {
2721 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
2722 $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
2730 is_essential_whitespace(
2731 $token_pp, $type_pp, $token_p,
2732 $type_p, $token_next, $type_next,
2737 # Copy this first token as blank, but use previous line number
2738 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
2739 $rcopy->[_LINE_INDEX_] =
2740 $rLL_new->[-1]->[_LINE_INDEX_];
2741 $store_token->($rcopy);
2745 # loop to copy all tokens on this line, with any changes
2747 for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
2748 $rtoken_vars = $rLL->[$KK];
2749 my $token = $rtoken_vars->[_TOKEN_];
2750 my $type = $rtoken_vars->[_TYPE_];
2751 my $last_type_sequence = $type_sequence;
2752 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
2754 # Handle a blank space ...
2755 if ( $type eq 'b' ) {
2757 # Delete it if not wanted by whitespace rules
2758 # or we are deleting all whitespace
2759 # Note that whitespace flag is a flag indicating whether a
2760 # white space BEFORE the token is needed
2761 next if ( $KK >= $Kmax ); # skip terminal blank
2762 my $Knext = $KK + 1;
2763 my $ws = $rwhitespace_flags->[$Knext];
2765 || $rOpts_delete_old_whitespace )
2768 # FIXME: maybe switch to using _new
2769 my $Kp = $self->K_previous_nonblank($KK);
2770 next unless defined($Kp);
2771 my $token_p = $rLL->[$Kp]->[_TOKEN_];
2772 my $type_p = $rLL->[$Kp]->[_TYPE_];
2774 my ( $token_pp, $type_pp );
2776 #my $Kpp = $K_previous_nonblank->($Kp);
2777 my $Kpp = $self->K_previous_nonblank($Kp);
2778 if ( defined($Kpp) ) {
2779 $token_pp = $rLL->[$Kpp]->[_TOKEN_];
2780 $type_pp = $rLL->[$Kpp]->[_TYPE_];
2786 my $token_next = $rLL->[$Knext]->[_TOKEN_];
2787 my $type_next = $rLL->[$Knext]->[_TYPE_];
2789 my $do_not_delete = is_essential_whitespace(
2790 $token_pp, $type_pp, $token_p,
2791 $type_p, $token_next, $type_next,
2794 next unless ($do_not_delete);
2797 # make it just one character if allowed
2798 if ($rOpts_add_whitespace) {
2799 $rtoken_vars->[_TOKEN_] = ' ';
2801 $store_token->($rtoken_vars);
2805 # Handle a nonblank token...
2807 # check for a qw quote
2808 if ( $type eq 'q' ) {
2810 # trim blanks from right of qw quotes
2811 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
2814 $rtoken_vars->[_TOKEN_] = $token;
2815 note_embedded_tab() if ( $token =~ "\t" );
2817 if ($in_multiline_qw) {
2819 # If we are at the end of a multiline qw ..
2820 if ( $in_multiline_qw == $KK ) {
2822 # Split off the closing delimiter character
2823 # so that the formatter can put a line break there if necessary
2825 my $part2 = substr( $part1, -1, 1, "" );
2829 copy_token_as_type( $rtoken_vars, 'q', $part1 );
2830 $store_token->($rcopy);
2832 $rtoken_vars->[_TOKEN_] = $token;
2835 $in_multiline_qw = undef;
2837 # store without preceding blank
2838 $store_token->($rtoken_vars);
2842 # continuing a multiline qw
2843 $store_token->($rtoken_vars);
2850 # we are encountered new qw token...see if multiline
2851 my $K_end = $K_end_q->($KK);
2852 if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
2854 # Starting multiline qw...
2855 # set flag equal to the ending K
2856 $in_multiline_qw = $K_end;
2858 # Split off the leading part
2859 # so that the formatter can put a line break there if necessary
2860 if ( $token =~ /^(qw\s*.)(.*)$/ ) {
2865 copy_token_as_type( $rtoken_vars, 'q',
2867 $store_token_and_space->(
2868 $rcopy, $rwhitespace_flags->[$KK] == WS_YES
2871 $rtoken_vars->[_TOKEN_] = $token;
2873 # Second part goes without intermediate blank
2874 $store_token->($rtoken_vars);
2881 # this is a new single token qw -
2882 # store with possible preceding blank
2883 $store_token_and_space->(
2884 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
2889 } ## end if ( $type eq 'q' )
2891 # Modify certain tokens here for whitespace
2892 # The following is not yet done, but could be:
2894 elsif ( $type =~ /^[wit]$/ ) {
2896 # Examples: <<snippets/space1.in>>
2897 # change '$ var' to '$var' etc
2898 # '-> new' to '->new'
2899 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
2901 $rtoken_vars->[_TOKEN_] = $token;
2904 # Split identifiers with leading arrows, inserting blanks if
2905 # necessary. It is easier and safer here than in the
2906 # tokenizer. For example '->new' becomes two tokens, '->' and
2907 # 'new' with a possible blank between.
2909 # Note: there is a related patch in sub set_whitespace_flags
2910 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
2911 my $token_save = $1;
2912 my $type_save = $type;
2914 # store a blank to left of arrow if necessary
2915 my $Kprev = $self->K_previous_nonblank($KK);
2916 if ( defined($Kprev)
2917 && $rLL->[$Kprev]->[_TYPE_] ne 'b'
2918 && $rOpts_add_whitespace
2919 && $want_left_space{'->'} == WS_YES )
2922 copy_token_as_type( $rtoken_vars, 'b', ' ' );
2923 $store_token->($rcopy);
2926 # then store the arrow
2927 my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
2928 $store_token->($rcopy);
2930 # then reset the current token to be the remainder,
2931 # and reset the whitespace flag according to the arrow
2932 $token = $rtoken_vars->[_TOKEN_] = $token_save;
2933 $type = $rtoken_vars->[_TYPE_] = $type_save;
2934 $store_token->($rtoken_vars);
2938 if ( $token =~ /$SUB_PATTERN/ ) {
2939 $token =~ s/\s+/ /g;
2940 $rtoken_vars->[_TOKEN_] = $token;
2943 # trim identifiers of trailing blanks which can occur
2944 # under some unusual circumstances, such as if the
2945 # identifier 'witch' has trailing blanks on input here:
2949 # () # prototype may be on new line ...
2951 if ( $type eq 'i' ) {
2952 $token =~ s/\s+$//g;
2953 $rtoken_vars->[_TOKEN_] = $token;
2957 # change 'LABEL :' to 'LABEL:'
2958 elsif ( $type eq 'J' ) {
2960 $rtoken_vars->[_TOKEN_] = $token;
2963 # patch to add space to something like "x10"
2964 # This avoids having to split this token in the pre-tokenizer
2965 elsif ( $type eq 'n' ) {
2966 if ( $token =~ /^x\d+/ ) {
2968 $rtoken_vars->[_TOKEN_] = $token;
2972 # check a quote for problems
2973 elsif ( $type eq 'Q' ) {
2975 # This is ready to go but is commented out because there is
2976 # still identical logic in sub break_lines.
2977 # $check_Q->($KK, $Kfirst);
2980 elsif ($type_sequence) {
2982 # if ( $is_opening_token{$token} ) {
2985 if ( $is_closing_token{$token} ) {
2987 # Insert a tentative missing semicolon if the next token is
2988 # a closing block brace
2993 # not preceded by a ';'
2994 && $last_nonblank_type ne ';'
2996 # and this is not a VERSION stmt (is all one line, we are not
2997 # inserting semicolons on one-line blocks)
2998 && $CODE_type ne 'VER'
3000 # and we are allowed to add semicolons
3001 && $rOpts->{'add-semicolons'}
3004 $add_phantom_semicolon->($KK);
3009 # Store this token with possible previous blank
3010 $store_token_and_space->(
3011 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
3017 # Reset memory to be the new array
3018 $self->{rLL} = $rLL_new;
3019 $self->set_rLL_max_index();
3020 $self->{K_opening_container} = $K_opening_container;
3021 $self->{K_closing_container} = $K_closing_container;
3022 $self->{K_opening_ternary} = $K_opening_ternary;
3023 $self->{K_closing_ternary} = $K_closing_ternary;
3024 $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
3026 # make sure the new array looks okay
3027 $self->check_token_array();
3029 # reset the token limits of each line
3030 $self->resync_lines_and_tokens();
3037 my $Last_line_had_side_comment;
3038 my $In_format_skipping_section;
3039 my $Saw_VERSION_in_this_file;
3043 my $rlines = $self->{rlines};
3045 $Last_line_had_side_comment = undef;
3046 $In_format_skipping_section = undef;
3047 $Saw_VERSION_in_this_file = undef;
3049 # Loop over all lines
3050 foreach my $line_of_tokens ( @{$rlines} ) {
3051 my $line_type = $line_of_tokens->{_line_type};
3052 next unless ( $line_type eq 'CODE' );
3053 my $CODE_type = $self->get_CODE_type($line_of_tokens);
3054 $line_of_tokens->{_code_type} = $CODE_type;
3060 my ( $self, $line_of_tokens ) = @_;
3062 # We are looking at a line of code and setting a flag to
3063 # describe any special processing that it requires
3065 # Possible CODE_types are as follows.
3067 # 'VB' = Verbatim - line goes out verbatim
3068 # 'IO' = Indent Only - line goes out unchanged except for indentation
3069 # 'NIN' = No Internal Newlines - line does not get broken
3070 # 'HSC'=Hanging Side Comment - fix this hanging side comment
3071 # 'BC'=Block Comment - an ordinary full line comment
3072 # 'SBC'=Static Block Comment - a block comment which does not get
3074 # 'SBCX'=Static Block Comment Without Leading Space
3075 # 'DEL'=Delete this line
3076 # 'VER'=VERSION statement
3077 # '' or (undefined) - no restructions
3079 my $rLL = $self->{rLL};
3080 my $Klimit = $self->{Klimit};
3082 my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
3083 my $no_internal_newlines = 1 - $rOpts_add_newlines;
3084 if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
3086 # extract what we need for this line..
3088 # Global value for error messages:
3089 $input_line_number = $line_of_tokens->{_line_number};
3091 my $rK_range = $line_of_tokens->{_rK_range};
3092 my ( $Kfirst, $Klast ) = @{$rK_range};
3094 if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
3095 my $input_line = $line_of_tokens->{_line_text};
3096 my $in_continued_quote = my $starting_in_quote =
3097 $line_of_tokens->{_starting_in_quote};
3098 my $in_quote = $line_of_tokens->{_ending_in_quote};
3099 my $ending_in_quote = $in_quote;
3100 my $guessed_indentation_level =
3101 $line_of_tokens->{_guessed_indentation_level};
3103 my $is_static_block_comment = 0;
3105 # Handle a continued quote..
3106 if ($in_continued_quote) {
3108 # A line which is entirely a quote or pattern must go out
3109 # verbatim. Note: the \n is contained in $input_line.
3111 if ( ( $input_line =~ "\t" ) ) {
3112 note_embedded_tab();
3114 $Last_line_had_side_comment = 0;
3119 my $is_block_comment =
3120 ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
3122 # Write line verbatim if we are in a formatting skip section
3123 if ($In_format_skipping_section) {
3124 $Last_line_had_side_comment = 0;
3126 # Note: extra space appended to comment simplifies pattern matching
3127 if ( $is_block_comment
3128 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
3129 /$format_skipping_pattern_end/o )
3131 $In_format_skipping_section = 0;
3132 write_logfile_entry("Exiting formatting skip section\n");
3137 # See if we are entering a formatting skip section
3138 if ( $rOpts_format_skipping
3139 && $is_block_comment
3140 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
3141 /$format_skipping_pattern_begin/o )
3143 $In_format_skipping_section = 1;
3144 write_logfile_entry("Entering formatting skip section\n");
3145 $Last_line_had_side_comment = 0;
3149 # ignore trailing blank tokens (they will get deleted later)
3150 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
3154 # Handle a blank line..
3156 $Last_line_had_side_comment = 0;
3160 # see if this is a static block comment (starts with ## by default)
3161 my $is_static_block_comment_without_leading_space = 0;
3162 if ( $is_block_comment
3163 && $rOpts->{'static-block-comments'}
3164 && $input_line =~ /$static_block_comment_pattern/o )
3166 $is_static_block_comment = 1;
3167 $is_static_block_comment_without_leading_space =
3168 substr( $input_line, 0, 1 ) eq '#';
3171 # Check for comments which are line directives
3172 # Treat exactly as static block comments without leading space
3173 # reference: perlsyn, near end, section Plain Old Comments (Not!)
3174 # example: '# line 42 "new_filename.plx"'
3177 && $input_line =~ /^\# \s*
3179 (?:\s("?)([^"]+)\2)? \s*
3183 $is_static_block_comment = 1;
3184 $is_static_block_comment_without_leading_space = 1;
3187 # look for hanging side comment
3190 && $Last_line_had_side_comment # last line had side comment
3191 && $input_line =~ /^\s/ # there is some leading space
3192 && !$is_static_block_comment # do not make static comment hanging
3193 && $rOpts->{'hanging-side-comments'} # user is allowing
3194 # hanging side comments
3198 $Last_line_had_side_comment = 1;
3202 # remember if this line has a side comment
3203 $Last_line_had_side_comment =
3204 ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
3206 # Handle a block (full-line) comment..
3207 if ($is_block_comment) {
3209 if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
3211 # TRIM COMMENTS -- This could be turned off as a option
3212 $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
3214 if ($is_static_block_comment_without_leading_space) {
3217 elsif ($is_static_block_comment) {
3226 # NOTE: This does not work yet. Version in print-line-of-tokens
3227 # is Still used until fixed
3229 # compare input/output indentation except for continuation lines
3230 # (because they have an unknown amount of initial blank space)
3231 # and lines which are quotes (because they may have been outdented)
3232 # Note: this test is placed here because we know the continuation flag
3233 # at this point, which allows us to avoid non-meaningful checks.
3234 my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
3235 compare_indentation_levels( $guessed_indentation_level,
3236 $structural_indentation_level )
3237 unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
3238 || $guessed_indentation_level == 0
3239 && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
3242 # Patch needed for MakeMaker. Do not break a statement
3243 # in which $VERSION may be calculated. See MakeMaker.pm;
3244 # this is based on the coding in it.
3245 # The first line of a file that matches this will be eval'd:
3246 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
3248 # *VERSION = \'1.01';
3249 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
3250 # We will pass such a line straight through without breaking
3251 # it unless -npvl is used.
3253 # Patch for problem reported in RT #81866, where files
3254 # had been flattened into a single line and couldn't be
3255 # tidied without -npvl. There are two parts to this patch:
3256 # First, it is not done for a really long line (80 tokens for now).
3257 # Second, we will only allow up to one semicolon
3258 # before the VERSION. We need to allow at least one semicolon
3259 # for statements like this:
3260 # require Exporter; our $VERSION = $Exporter::VERSION;
3261 # where both statements must be on a single line for MakeMaker
3263 my $is_VERSION_statement = 0;
3264 if ( !$Saw_VERSION_in_this_file
3267 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
3269 $Saw_VERSION_in_this_file = 1;
3270 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
3277 sub find_nested_pairs {
3280 my $rLL = $self->{rLL};
3281 return unless ( defined($rLL) && @{$rLL} );
3283 # We define an array of pairs of nested containers
3286 # We also set the following hash values to identify container pairs for
3287 # which the opening and closing tokens are adjacent in the token stream:
3288 # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
3289 # $seqno_in are the seqence numbers of the outer and inner containers of
3290 # the pair We need these later to decide if we can insert a missing
3292 my $rpaired_to_inner_container = {};
3294 # This local hash remembers if an outer container has a close following
3296 # The key is the outer sequence number
3297 # The value is the token_hash of the inner container
3299 my %has_close_following_opening;
3301 # Names of calling routines can either be marked as 'i' or 'w',
3302 # and they may invoke a sub call with an '->'. We will consider
3303 # any consecutive string of such types as a single unit when making
3304 # weld decisions. We also allow a leading !
3305 my $is_name_type = {
3315 return $type && $is_name_type->{$type};
3319 my $last_last_container;
3320 my $last_nonblank_token_vars;
3323 my $nonblank_token_count = 0;
3325 # loop over all tokens
3326 foreach my $rtoken_vars ( @{$rLL} ) {
3328 my $type = $rtoken_vars->[_TYPE_];
3330 next if ( $type eq 'b' );
3332 # long identifier-like items are counted as a single item
3333 $nonblank_token_count++
3334 unless ( $is_name->($type)
3335 && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
3337 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
3338 if ($type_sequence) {
3340 my $token = $rtoken_vars->[_TOKEN_];
3342 if ( $is_opening_token{$token} ) {
3344 # following previous opening token ...
3345 if ( $last_container
3346 && $is_opening_token{ $last_container->[_TOKEN_] } )
3349 # adjacent to this one
3350 my $tok_diff = $nonblank_token_count - $last_count;
3352 my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
3355 || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
3358 # remember this pair...
3359 my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
3360 my $inner_seqno = $type_sequence;
3361 $has_close_following_opening{$outer_seqno} =
3367 elsif ( $is_closing_token{$token} ) {
3369 # if the corresponding opening token had an adjacent opening
3370 if ( $has_close_following_opening{$type_sequence}
3371 && $is_closing_token{ $last_container->[_TOKEN_] }
3372 && $has_close_following_opening{$type_sequence}
3373 ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
3376 # The closing weld tokens must be adjacent
3377 # NOTE: so intermediate commas and semicolons
3378 # can currently block a weld. This is something
3379 # that could be fixed in the future by including
3380 # a flag to delete un-necessary commas and semicolons.
3381 my $tok_diff = $nonblank_token_count - $last_count;
3383 if ( $tok_diff == 1 ) {
3385 # This is a closely nested pair ..
3386 my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
3387 my $outer_seqno = $type_sequence;
3388 $rpaired_to_inner_container->{$outer_seqno} =
3391 push @nested_pairs, [ $inner_seqno, $outer_seqno ];
3396 $last_last_container = $last_container;
3397 $last_container = $rtoken_vars;
3398 $last_count = $nonblank_token_count;
3400 $last_nonblank_token_vars = $rtoken_vars;
3402 $self->{rnested_pairs} = \@nested_pairs;
3403 $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
3409 # a debug routine, not normally used
3410 my ( $self, $msg ) = @_;
3411 my $rLL = $self->{rLL};
3412 my $nvars = @{$rLL};
3413 print STDERR "$msg\n";
3414 print STDERR "ntokens=$nvars\n";
3415 print STDERR "K\t_TOKEN_\t_TYPE_\n";
3418 foreach my $item ( @{$rLL} ) {
3419 print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
3425 sub get_old_line_index {
3426 my ( $self, $K ) = @_;
3427 my $rLL = $self->{rLL};
3428 return 0 unless defined($K);
3429 return $rLL->[$K]->[_LINE_INDEX_];
3432 sub get_old_line_count {
3433 my ( $self, $Kbeg, $Kend ) = @_;
3434 my $rLL = $self->{rLL};
3435 return 0 unless defined($Kbeg);
3436 return 0 unless defined($Kend);
3437 return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
3441 my ( $self, $KK, $rLL ) = @_;
3443 # return the index K of the next nonblank, non-comment token
3444 return unless ( defined($KK) && $KK >= 0 );
3446 # use the standard array unless given otherwise
3447 $rLL = $self->{rLL} unless ( defined($rLL) );
3450 while ( $Knnb < $Num ) {
3451 if ( !defined( $rLL->[$Knnb] ) ) {
3452 Fault("Undefined entry for k=$Knnb");
3454 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
3455 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
3464 sub K_next_nonblank {
3465 my ( $self, $KK, $rLL ) = @_;
3467 # return the index K of the next nonblank token
3468 return unless ( defined($KK) && $KK >= 0 );
3470 # use the standard array unless given otherwise
3471 $rLL = $self->{rLL} unless ( defined($rLL) );
3474 while ( $Knnb < $Num ) {
3475 if ( !defined( $rLL->[$Knnb] ) ) {
3476 Fault("Undefined entry for k=$Knnb");
3478 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
3484 sub K_previous_code {
3486 # return the index K of the previous nonblank, non-comment token
3487 # Call with $KK=undef to start search at the top of the array
3488 my ( $self, $KK, $rLL ) = @_;
3490 # use the standard array unless given otherwise
3491 $rLL = $self->{rLL} unless ( defined($rLL) );
3493 if ( !defined($KK) ) { $KK = $Num }
3494 elsif ( $KK > $Num ) {
3496 # The caller should make the first call with KK_new=undef to
3499 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
3503 while ( $Kpnb >= 0 ) {
3504 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
3505 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
3514 sub K_previous_nonblank {
3516 # return index of previous nonblank token before item K;
3517 # Call with $KK=undef to start search at the top of the array
3518 my ( $self, $KK, $rLL ) = @_;
3520 # use the standard array unless given otherwise
3521 $rLL = $self->{rLL} unless ( defined($rLL) );
3523 if ( !defined($KK) ) { $KK = $Num }
3524 elsif ( $KK > $Num ) {
3526 # The caller should make the first call with KK_new=undef to
3529 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
3533 while ( $Kpnb >= 0 ) {
3534 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
3540 sub weld_containers {
3542 # do any welding operations
3545 # initialize weld length hashes needed later for checking line lengths
3546 # TODO: These should eventually be stored in $self rather than be package vars
3547 %weld_len_left_closing = ();
3548 %weld_len_right_closing = ();
3549 %weld_len_left_opening = ();
3550 %weld_len_right_opening = ();
3552 return if ( $rOpts->{'indent-only'} );
3553 return unless ($rOpts_add_newlines);
3555 if ( $rOpts->{'weld-nested-containers'} ) {
3557 # if called, weld_nested_containers must be called before other weld
3558 # operations. # This is because weld_nested_containers could overwrite
3559 # hash values written by weld_cuddled_blocks and weld_nested_quotes.
3560 $self->weld_nested_containers();
3562 $self->weld_nested_quotes();
3565 # Note that weld_nested_containers() changes the _LEVEL_ values, so
3566 # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
3568 # Here is a good test case to Be sure that both cuddling and welding
3569 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
3573 # if ($BOLD_MATH) { (
3574 # $labels, $comment,
3575 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
3577 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
3581 $self->weld_cuddled_blocks();
3586 sub cumulative_length_before_K {
3587 my ( $self, $KK ) = @_;
3588 my $rLL = $self->{rLL};
3589 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3592 sub cumulative_length_after_K {
3593 my ( $self, $KK ) = @_;
3594 my $rLL = $self->{rLL};
3595 return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
3598 sub weld_cuddled_blocks {
3601 # This routine implements the -cb flag by finding the appropriate
3602 # closing and opening block braces and welding them together.
3603 return unless ( %{$rcuddled_block_types} );
3605 my $rLL = $self->{rLL};
3606 return unless ( defined($rLL) && @{$rLL} );
3607 my $rbreak_container = $self->{rbreak_container};
3609 my $K_opening_container = $self->{K_opening_container};
3610 my $K_closing_container = $self->{K_closing_container};
3612 my $length_to_opening_seqno = sub {
3614 my $KK = $K_opening_container->{$seqno};
3615 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3618 my $length_to_closing_seqno = sub {
3620 my $KK = $K_closing_container->{$seqno};
3621 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3625 my $is_broken_block = sub {
3627 # a block is broken if the input line numbers of the braces differ
3628 # we can only cuddle between broken blocks
3630 my $K_opening = $K_opening_container->{$seqno};
3631 return unless ( defined($K_opening) );
3632 my $K_closing = $K_closing_container->{$seqno};
3633 return unless ( defined($K_closing) );
3634 return $rbreak_container->{$seqno}
3635 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
3636 $rLL->[$K_opening]->[_LINE_INDEX_];
3639 # A stack to remember open chains at all levels:
3640 # $in_chain[$level] = [$chain_type, $type_sequence];
3642 my $CBO = $rOpts->{'cuddled-break-option'};
3644 # loop over structure items to find cuddled pairs
3647 while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
3648 my $rtoken_vars = $rLL->[$KK];
3649 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
3650 if ( !$type_sequence ) {
3651 Fault("sequence = $type_sequence not defined");
3654 # We use the original levels because they get changed by sub
3655 # 'weld_nested_containers'. So if this were to be called before that
3656 # routine, the levels would be wrong and things would go bad.
3657 my $last_level = $level;
3658 $level = $rtoken_vars->[_LEVEL_TRUE_];
3660 if ( $level < $last_level ) { $in_chain[$last_level] = undef }
3661 elsif ( $level > $last_level ) { $in_chain[$level] = undef }
3663 # We are only looking at code blocks
3664 my $token = $rtoken_vars->[_TOKEN_];
3665 my $type = $rtoken_vars->[_TYPE_];
3666 next unless ( $type eq $token );
3668 if ( $token eq '{' ) {
3670 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
3671 if ( !$block_type ) {
3673 # patch for unrecognized block types which may not be labeled
3674 my $Kp = $self->K_previous_nonblank($KK);
3675 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
3676 $Kp = $self->K_previous_nonblank($Kp);
3679 $block_type = $rLL->[$Kp]->[_TOKEN_];
3681 if ( $in_chain[$level] ) {
3683 # we are in a chain and are at an opening block brace.
3684 # See if we are welding this opening brace with the previous
3685 # block brace. Get their identification numbers:
3686 my $closing_seqno = $in_chain[$level]->[1];
3687 my $opening_seqno = $type_sequence;
3689 # The preceding block must be on multiple lines so that its
3690 # closing brace will start a new line.
3691 if ( !$is_broken_block->($closing_seqno) ) {
3692 next unless ( $CBO == 2 );
3693 $rbreak_container->{$closing_seqno} = 1;
3696 # we will let the trailing block be either broken or intact
3697 ## && $is_broken_block->($opening_seqno);
3699 # We can weld the closing brace to its following word ..
3700 my $Ko = $K_closing_container->{$closing_seqno};
3701 my $Kon = $self->K_next_nonblank($Ko);
3703 # ..unless it is a comment
3704 if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
3706 $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
3707 $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
3708 $weld_len_right_closing{$closing_seqno} = $dlen;
3710 # Set flag that we want to break the next container
3711 # so that the cuddled line is balanced.
3712 $rbreak_container->{$opening_seqno} = 1
3719 # We are not in a chain. Start a new chain if we see the
3720 # starting block type.
3721 if ( $rcuddled_block_types->{$block_type} ) {
3722 $in_chain[$level] = [ $block_type, $type_sequence ];
3726 $in_chain[$level] = [ $block_type, $type_sequence ];
3730 elsif ( $token eq '}' ) {
3731 if ( $in_chain[$level] ) {
3733 # We are in a chain at a closing brace. See if this chain
3735 my $Knn = $self->K_next_code($KK);
3738 my $chain_type = $in_chain[$level]->[0];
3739 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
3741 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
3745 # Note that we do not weld yet because we must wait until
3746 # we we are sure that an opening brace for this follows.
3747 $in_chain[$level]->[1] = $type_sequence;
3749 else { $in_chain[$level] = undef }
3757 sub weld_nested_containers {
3760 # This routine implements the -wn flag by "welding together"
3761 # the nested closing and opening tokens which were previously
3762 # identified by sub 'find_nested_pairs'. "welding" simply
3763 # involves setting certain hash values which will be checked
3764 # later during formatting.
3766 my $rLL = $self->{rLL};
3767 my $Klimit = $self->get_rLL_max_index();
3768 my $rnested_pairs = $self->{rnested_pairs};
3769 my $rlines = $self->{rlines};
3770 my $K_opening_container = $self->{K_opening_container};
3771 my $K_closing_container = $self->{K_closing_container};
3773 # Return unless there are nested pairs to weld
3774 return unless defined($rnested_pairs) && @{$rnested_pairs};
3776 # This array will hold the sequence numbers of the tokens to be welded.
3779 # Variables needed for estimating line lengths
3780 my $starting_indent;
3781 my $starting_lentot;
3783 # A tolerance to the length for length estimates. In some rare cases
3784 # this can avoid problems where a final weld slightly exceeds the
3785 # line length and gets broken in a bad spot.
3788 my $excess_length_to_K = sub {
3791 # Estimate the length from the line start to a given token
3792 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
3794 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
3795 return ($excess_length);
3798 my $length_to_opening_seqno = sub {
3800 my $KK = $K_opening_container->{$seqno};
3801 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3805 my $length_to_closing_seqno = sub {
3807 my $KK = $K_closing_container->{$seqno};
3808 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3813 # _oo=outer opening, i.e. first of { {
3814 # _io=inner opening, i.e. second of { {
3815 # _oc=outer closing, i.e. second of } {
3816 # _ic=inner closing, i.e. first of } }
3820 # We are working from outermost to innermost pairs so that
3821 # level changes will be complete when we arrive at the inner pairs.
3823 while ( my $item = pop( @{$rnested_pairs} ) ) {
3824 my ( $inner_seqno, $outer_seqno ) = @{$item};
3826 my $Kouter_opening = $K_opening_container->{$outer_seqno};
3827 my $Kinner_opening = $K_opening_container->{$inner_seqno};
3828 my $Kouter_closing = $K_closing_container->{$outer_seqno};
3829 my $Kinner_closing = $K_closing_container->{$inner_seqno};
3831 my $outer_opening = $rLL->[$Kouter_opening];
3832 my $inner_opening = $rLL->[$Kinner_opening];
3833 my $outer_closing = $rLL->[$Kouter_closing];
3834 my $inner_closing = $rLL->[$Kinner_closing];
3836 my $iline_oo = $outer_opening->[_LINE_INDEX_];
3837 my $iline_io = $inner_opening->[_LINE_INDEX_];
3839 # Set flag saying if this pair starts a new weld
3840 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
3842 # Set flag saying if this pair is adjacent to the previous nesting pair
3843 # (even if previous pair was rejected as a weld)
3844 my $touch_previous_pair =
3845 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
3846 $previous_pair = $item;
3848 # Set a flag if we should not weld. It sometimes looks best not to weld
3849 # when the opening and closing tokens are very close. However, there
3850 # is a danger that we will create a "blinker", which oscillates between
3851 # two semi-stable states, if we do not weld. So the rules for
3852 # not welding have to be carefully defined and tested.
3854 if ( !$touch_previous_pair ) {
3856 # If this pair is not adjacent to the previous pair (skipped or
3857 # not), then measure lengths from the start of line of oo
3859 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
3860 my ( $Kfirst, $Klast ) = @{$rK_range};
3862 $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
3863 $starting_indent = 0;
3864 if ( !$rOpts_variable_maximum_line_length ) {
3865 my $level = $rLL->[$Kfirst]->[_LEVEL_];
3866 $starting_indent = $rOpts_indent_columns * $level;
3869 # DO-NOT-WELD RULE 1:
3870 # Do not weld something that looks like the start of a two-line
3871 # function call, like this: <<snippets/wn6.in>>
3872 # $trans->add_transformation(
3873 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
3874 # We will look for a semicolon after the closing paren.
3876 # We want to weld something complex, like this though
3877 # my $compass = uc( opposite_direction( line_to_canvas_direction(
3878 # @{ $coords[0] }, @{ $coords[1] } ) ) );
3879 # Otherwise we will get a 'blinker'
3881 my $iline_oc = $outer_closing->[_LINE_INDEX_];
3882 if ( $iline_oc <= $iline_oo + 1 ) {
3884 # Look for following semicolon...
3885 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
3886 my $next_nonblank_type =
3887 defined($Knext_nonblank)
3888 ? $rLL->[$Knext_nonblank]->[_TYPE_]
3890 if ( $next_nonblank_type eq ';' ) {
3892 # Then do not weld if no other containers between inner
3893 # opening and closing.
3894 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
3895 if ( $Knext_seq_item == $Kinner_closing ) {
3902 my $iline_ic = $inner_closing->[_LINE_INDEX_];
3904 # DO-NOT-WELD RULE 2:
3905 # Do not weld an opening paren to an inner one line brace block
3906 # We will just use old line numbers for this test and require
3907 # iterations if necessary for convergence
3909 # For example, otherwise we could cause the opening paren
3910 # in the following example to separate from the caller name
3913 # $_[0]->code_handler
3914 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
3916 # Here is another example where we do not want to weld:
3917 # $wrapped->add_around_modifier(
3918 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
3920 # If the one line sub block gets broken due to length or by the
3921 # user, then we can weld. The result will then be:
3922 # $wrapped->add_around_modifier( sub {
3923 # push @tracelog => 'around 1';
3927 if ( $iline_ic == $iline_io ) {
3929 my $token_oo = $outer_opening->[_TOKEN_];
3930 my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
3931 my $token_io = $inner_opening->[_TOKEN_];
3932 $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
3935 # DO-NOT-WELD RULE 3:
3936 # Do not weld if this makes our line too long
3937 $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
3941 # After neglecting a pair, we start measuring from start of point io
3943 $self->cumulative_length_before_K($Kinner_opening);
3944 $starting_indent = 0;
3945 if ( !$rOpts_variable_maximum_line_length ) {
3946 my $level = $inner_opening->[_LEVEL_];
3947 $starting_indent = $rOpts_indent_columns * $level;
3950 # Normally, a broken pair should not decrease indentation of
3951 # intermediate tokens:
3952 ## if ( $last_pair_broken ) { next }
3953 # However, for long strings of welded tokens, such as '{{{{{{...'
3954 # we will allow broken pairs to also remove indentation.
3955 # This will keep very long strings of opening and closing
3956 # braces from marching off to the right. We will do this if the
3957 # number of tokens in a weld before the broken weld is 4 or more.
3958 # This rule will mainly be needed for test scripts, since typical
3959 # welds have fewer than about 4 welded tokens.
3960 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
3963 # otherwise start new weld ...
3964 elsif ($starting_new_weld) {
3968 # ... or extend current weld
3970 unshift @{ $welds[-1] }, $inner_seqno;
3973 # After welding, reduce the indentation level if all intermediate tokens
3974 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
3975 if ( $dlevel != 0 ) {
3976 my $Kstart = $Kinner_opening;
3977 my $Kstop = $Kinner_closing;
3978 for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
3979 $rLL->[$KK]->[_LEVEL_] += $dlevel;
3984 # Define weld lengths needed later to set line breaks
3985 foreach my $item (@welds) {
3987 # sweep from inner to outer
3992 foreach my $outer_seqno ( @{$item} ) {
3996 $length_to_opening_seqno->($inner_seqno) -
3997 $length_to_opening_seqno->($outer_seqno);
4000 $length_to_closing_seqno->($outer_seqno) -
4001 $length_to_closing_seqno->($inner_seqno);
4003 $len_open += $dlen_opening;
4004 $len_close += $dlen_closing;
4008 $weld_len_left_closing{$outer_seqno} = $len_close;
4009 $weld_len_right_opening{$outer_seqno} = $len_open;
4011 $inner_seqno = $outer_seqno;
4014 # sweep from outer to inner
4015 foreach my $seqno ( reverse @{$item} ) {
4016 $weld_len_right_closing{$seqno} =
4017 $len_close - $weld_len_left_closing{$seqno};
4018 $weld_len_left_opening{$seqno} =
4019 $len_open - $weld_len_right_opening{$seqno};
4023 #####################################
4025 #####################################
4029 foreach my $weld (@welds) {
4030 print "\nWeld number $count has seq: (@{$weld})\n";
4031 foreach my $seq ( @{$weld} ) {
4034 left_opening=$weld_len_left_opening{$seq};
4035 right_opening=$weld_len_right_opening{$seq};
4036 left_closing=$weld_len_left_closing{$seq};
4037 right_closing=$weld_len_right_closing{$seq};
4047 sub weld_nested_quotes {
4050 my $rLL = $self->{rLL};
4051 return unless ( defined($rLL) && @{$rLL} );
4053 my $K_opening_container = $self->{K_opening_container};
4054 my $K_closing_container = $self->{K_closing_container};
4055 my $rlines = $self->{rlines};
4057 my $is_single_quote = sub {
4058 my ( $Kbeg, $Kend, $quote_type ) = @_;
4059 foreach my $K ( $Kbeg .. $Kend ) {
4060 my $test_type = $rLL->[$K]->[_TYPE_];
4061 next if ( $test_type eq 'b' );
4062 return if ( $test_type ne $quote_type );
4067 my $excess_line_length = sub {
4068 my ( $KK, $Ktest ) = @_;
4070 # what is the excess length if we add token $Ktest to the line with $KK?
4071 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
4072 my $rK_range = $rlines->[$iline]->{_rK_range};
4073 my ( $Kfirst, $Klast ) = @{$rK_range};
4074 my $starting_lentot =
4075 $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
4076 my $starting_indent = 0;
4078 if ( !$rOpts_variable_maximum_line_length ) {
4079 my $level = $rLL->[$Kfirst]->[_LEVEL_];
4080 $starting_indent = $rOpts_indent_columns * $level;
4083 my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
4085 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
4086 return $excess_length;
4089 # look for single qw quotes nested in containers
4091 while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
4092 my $rtoken_vars = $rLL->[$KK];
4093 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
4094 if ( !$outer_seqno ) {
4095 Fault("sequence = $outer_seqno not defined");
4098 my $token = $rtoken_vars->[_TOKEN_];
4099 if ( $is_opening_token{$token} ) {
4101 # see if the next token is a quote of some type
4102 my $Kn = $self->K_next_nonblank($KK);
4104 my $next_token = $rLL->[$Kn]->[_TOKEN_];
4105 my $next_type = $rLL->[$Kn]->[_TYPE_];
4107 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
4108 && $next_token =~ /^q/ );
4110 # The token before the closing container must also be a quote
4111 my $K_closing = $K_closing_container->{$outer_seqno};
4112 my $Kt_end = $self->K_previous_nonblank($K_closing);
4113 next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
4115 # Do not weld to single-line quotes. Nothing is gained, and it may
4117 next if ( $Kt_end == $Kn );
4119 # Only weld to quotes delimited with container tokens. This is
4120 # because welding to arbitrary quote delimiters can produce code
4121 # which is less readable than without welding.
4122 my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
4124 unless ( $is_closing_token{$closing_delimiter}
4125 || $closing_delimiter eq '>' );
4127 # Now make sure that there is just a single quote in the container
4129 unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
4131 # If welded, the line must not exceed allowed line length
4132 # Assume old line breaks for this estimate.
4133 next if ( $excess_line_length->( $KK, $Kn ) > 0 );
4136 # FIXME: Are these always correct?
4137 $weld_len_left_closing{$outer_seqno} = 1;
4138 $weld_len_right_opening{$outer_seqno} = 2;
4140 # QW PATCH 1 (Testing)
4141 # undo CI for welded quotes
4142 foreach my $K ( $Kn .. $Kt_end ) {
4143 $rLL->[$K]->[_CI_LEVEL_] = 0;
4146 # Change the level of a closing qw token to be that of the outer
4147 # containing token. This will allow -lp indentation to function
4148 # correctly in the vertical aligner.
4149 $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
4157 my ( $seqno, $type_or_tok ) = @_;
4159 # Given the sequence number of a token, and the token or its type,
4160 # return the length of any weld to its left
4164 if ( $is_closing_type{$type_or_tok} ) {
4165 $weld_len = $weld_len_left_closing{$seqno};
4167 elsif ( $is_opening_type{$type_or_tok} ) {
4168 $weld_len = $weld_len_left_opening{$seqno};
4171 if ( !defined($weld_len) ) { $weld_len = 0 }
4175 sub weld_len_right {
4177 my ( $seqno, $type_or_tok ) = @_;
4179 # Given the sequence number of a token, and the token or its type,
4180 # return the length of any weld to its right
4184 if ( $is_closing_type{$type_or_tok} ) {
4185 $weld_len = $weld_len_right_closing{$seqno};
4187 elsif ( $is_opening_type{$type_or_tok} ) {
4188 $weld_len = $weld_len_right_opening{$seqno};
4191 if ( !defined($weld_len) ) { $weld_len = 0 }
4195 sub weld_len_left_to_go {
4198 # Given the index of a token in the 'to_go' array
4199 # return the length of any weld to its left
4200 return if ( $i < 0 );
4202 weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
4206 sub weld_len_right_to_go {
4209 # Given the index of a token in the 'to_go' array
4210 # return the length of any weld to its right
4211 return if ( $i < 0 );
4212 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
4214 weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
4218 sub link_sequence_items {
4220 # This has been merged into 'respace_tokens' but retained for reference
4222 my $rlines = $self->{rlines};
4223 my $rLL = $self->{rLL};
4225 # We walk the token list and make links to the next sequence item.
4226 # We also define these hashes to container tokens using sequence number as
4228 my $K_opening_container = {}; # opening [ { or (
4229 my $K_closing_container = {}; # closing ] } or )
4230 my $K_opening_ternary = {}; # opening ? of ternary
4231 my $K_closing_ternary = {}; # closing : of ternary
4233 # sub to link preceding nodes forward to a new node type
4234 my $link_back = sub {
4235 my ( $Ktop, $key ) = @_;
4237 my $Kprev = $Ktop - 1;
4239 && !defined( $rLL->[$Kprev]->[$key] ) )
4241 $rLL->[$Kprev]->[$key] = $Ktop;
4246 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
4248 $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
4250 my $type = $rLL->[$KK]->[_TYPE_];
4252 next if ( $type eq 'b' );
4254 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
4255 if ($type_sequence) {
4257 $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
4259 my $token = $rLL->[$KK]->[_TOKEN_];
4260 if ( $is_opening_token{$token} ) {
4262 $K_opening_container->{$type_sequence} = $KK;
4264 elsif ( $is_closing_token{$token} ) {
4266 $K_closing_container->{$type_sequence} = $KK;
4269 # These are not yet used but could be useful
4271 if ( $token eq '?' ) {
4272 $K_opening_ternary->{$type_sequence} = $KK;
4274 elsif ( $token eq ':' ) {
4275 $K_closing_ternary->{$type_sequence} = $KK;
4279 Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
4286 $self->{K_opening_container} = $K_opening_container;
4287 $self->{K_closing_container} = $K_closing_container;
4288 $self->{K_opening_ternary} = $K_opening_ternary;
4289 $self->{K_closing_ternary} = $K_closing_ternary;
4293 sub sum_token_lengths {
4296 # This has been merged into 'respace_tokens' but retained for reference
4297 my $rLL = $self->{rLL};
4298 my $cumulative_length = 0;
4299 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
4301 # now set the length of this token
4302 my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
4304 $cumulative_length += $token_length;
4306 # Save the length sum to just AFTER this token
4307 $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
4313 sub resync_lines_and_tokens {
4316 my $rLL = $self->{rLL};
4317 my $Klimit = $self->{Klimit};
4318 my $rlines = $self->{rlines};
4320 # Re-construct the arrays of tokens associated with the original input lines
4321 # since they have probably changed due to inserting and deleting blanks
4322 # and a few other tokens.
4326 # This is the next token and its line index:
4329 if ( defined($rLL) && @{$rLL} ) {
4330 $Kmax = @{$rLL} - 1;
4331 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
4334 my $get_inext = sub {
4335 if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
4337 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
4342 # Remember the most recently output token index
4346 foreach my $line_of_tokens ( @{$rlines} ) {
4348 my $line_type = $line_of_tokens->{_line_type};
4349 if ( $line_type eq 'CODE' ) {
4353 $inext = $get_inext->();
4354 while ( defined($inext) && $inext <= $iline ) {
4355 push @{K_array}, $Knext;
4357 $inext = $get_inext->();
4360 # Delete any terminal blank token
4362 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
4367 # Define the range of K indexes for the line:
4368 # $Kfirst = index of first token on line
4369 # $Klast_out = index of last token on line
4370 my ( $Kfirst, $Klast );
4372 $Kfirst = $K_array[0];
4373 $Klast = $K_array[-1];
4374 $Klast_out = $Klast;
4377 # It is only safe to trim the actual line text if the input
4378 # line had a terminal blank token. Otherwise, we may be
4380 if ( $line_of_tokens->{_ended_in_blank_token} ) {
4381 $line_of_tokens->{_line_text} =~ s/\s+$//;
4383 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
4387 # There shouldn't be any nodes beyond the last one unless we start
4388 # allowing 'link_after' calls
4389 if ( defined($inext) ) {
4391 Fault("unexpected tokens at end of file when reconstructing lines");
4399 my $rlines = $self->{rlines};
4400 foreach my $line ( @{$rlines} ) {
4401 my $input_line = $line->{_line_text};
4402 $self->write_unindented_line($input_line);
4407 sub finish_formatting {
4409 my ( $self, $severe_error ) = @_;
4411 # The file has been tokenized and is ready to be formatted.
4412 # All of the relevant data is stored in $self, ready to go.
4414 # output file verbatim if severe error or no formatting requested
4415 if ( $severe_error || $rOpts->{notidy} ) {
4416 $self->dump_verbatim();
4421 # Make a pass through the lines, looking at lines of CODE and identifying
4422 # special processing needs, such format skipping sections marked by
4424 $self->scan_comments();
4426 # Find nested pairs of container tokens for any welding. This information
4427 # is also needed for adding semicolons, so it is split apart from the
4429 $self->find_nested_pairs();
4431 # Make sure everything looks good
4432 $self->check_line_hashes();
4434 # Future: Place to Begin future Iteration Loop
4435 # foreach my $it_count(1..$maxit) {
4437 # Future: We must reset some things after the first iteration.
4439 # - resetting levels if there was any welding
4440 # - resetting any phantom semicolons
4441 # - dealing with any line numbering issues so we can relate final lines
4442 # line numbers with input line numbers.
4444 # If ($it_count>1) {
4445 # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
4449 # Make a pass through all tokens, adding or deleting any whitespace as
4450 # required. Also make any other changes, such as adding semicolons.
4451 # All token changes must be made here so that the token data structure
4452 # remains fixed for the rest of this iteration.
4453 $self->respace_tokens();
4455 # Implement any welding needed for the -wn or -cb options
4456 $self->weld_containers();
4458 # Finishes formatting and write the result to the line sink.
4459 # Eventually this call should just change the 'rlines' data according to the
4460 # new line breaks and then return so that we can do an internal iteration
4461 # before continuing with the next stages of formatting.
4462 $self->break_lines();
4464 ############################################################
4465 # A possible future decomposition of 'break_lines()' follows.
4467 # - allow perltidy to do an internal iteration which eliminates
4468 # many unnecessary steps, such as re-parsing and vertical alignment.
4469 # This will allow iterations to be automatic.
4470 # - consolidate all length calculations to allow utf8 alignment
4471 ############################################################
4473 # Future: Check for convergence of beginning tokens on CODE lines
4475 # Future: End of Iteration Loop
4477 # Future: add_padding($rargs);
4479 # Future: add_closing_side_comments($rargs);
4481 # Future: vertical_alignment($rargs);
4483 # Future: output results
4485 # A final routine to tie up any loose ends
4490 sub create_one_line_block {
4491 ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
4496 sub destroy_one_line_block {
4497 $index_start_one_line_block = UNDEFINED_INDEX;
4498 $semicolons_before_block_self_destruct = 0;
4502 sub leading_spaces_to_go {
4504 # return the number of indentation spaces for a token in the output stream;
4505 # these were previously stored by 'set_leading_whitespace'.
4508 if ( $ii < 0 ) { $ii = 0 }
4509 return get_spaces( $leading_spaces_to_go[$ii] );
4515 # return the number of leading spaces associated with an indentation
4516 # variable $indentation is either a constant number of spaces or an object
4517 # with a get_spaces method.
4518 my $indentation = shift;
4519 return ref($indentation) ? $indentation->get_spaces() : $indentation;
4522 sub get_recoverable_spaces {
4524 # return the number of spaces (+ means shift right, - means shift left)
4525 # that we would like to shift a group of lines with the same indentation
4526 # to get them to line up with their opening parens
4527 my $indentation = shift;
4528 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
4531 sub get_available_spaces_to_go {
4534 my $item = $leading_spaces_to_go[$ii];
4536 # return the number of available leading spaces associated with an
4537 # indentation variable. $indentation is either a constant number of
4538 # spaces or an object with a get_available_spaces method.
4539 return ref($item) ? $item->get_available_spaces() : 0;
4542 sub new_lp_indentation_item {
4544 # this is an interface to the IndentationItem class
4545 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
4547 # A negative level implies not to store the item in the item_list
4549 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
4551 my $item = Perl::Tidy::IndentationItem->new(
4553 $ci_level, $available_spaces,
4554 $index, $gnu_sequence_number,
4555 $align_paren, $max_gnu_stack_index,
4556 $line_start_index_to_go,
4559 if ( $level >= 0 ) {
4560 $gnu_item_list[$max_gnu_item_index] = $item;
4566 sub set_leading_whitespace {
4568 # This routine defines leading whitespace
4569 # given: the level and continuation_level of a token,
4570 # define: space count of leading string which would apply if it
4571 # were the first token of a new line.
4573 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
4575 # Adjust levels if necessary to recycle whitespace:
4576 # given $level_abs, the absolute level
4577 # define $level, a possibly reduced level for whitespace
4578 my $level = $level_abs;
4579 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
4580 if ( $level_abs < $whitespace_last_level ) {
4581 pop(@whitespace_level_stack);
4583 if ( !@whitespace_level_stack ) {
4584 push @whitespace_level_stack, $level_abs;
4586 elsif ( $level_abs > $whitespace_last_level ) {
4587 $level = $whitespace_level_stack[-1] +
4588 ( $level_abs - $whitespace_last_level );
4591 # 1 Try to break at a block brace
4593 $level > $rOpts_whitespace_cycle
4594 && $last_nonblank_type eq '{'
4595 && $last_nonblank_token eq '{'
4598 # 2 Then either a brace or bracket
4599 || ( $level > $rOpts_whitespace_cycle + 1
4600 && $last_nonblank_token =~ /^[\{\[]$/ )
4602 # 3 Then a paren too
4603 || $level > $rOpts_whitespace_cycle + 2
4608 push @whitespace_level_stack, $level;
4610 $level = $whitespace_level_stack[-1];
4612 $whitespace_last_level = $level_abs;
4614 # modify for -bli, which adds one continuation indentation for
4616 if ( $rOpts_brace_left_and_indent
4617 && $max_index_to_go == 0
4618 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
4623 # patch to avoid trouble when input file has negative indentation.
4624 # other logic should catch this error.
4625 if ( $level < 0 ) { $level = 0 }
4627 #-------------------------------------------
4628 # handle the standard indentation scheme
4629 #-------------------------------------------
4630 unless ($rOpts_line_up_parentheses) {
4632 $ci_level * $rOpts_continuation_indentation +
4633 $level * $rOpts_indent_columns;
4635 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
4637 if ($in_continued_quote) {
4641 $leading_spaces_to_go[$max_index_to_go] = $space_count;
4642 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
4646 #-------------------------------------------------------------
4647 # handle case of -lp indentation..
4648 #-------------------------------------------------------------
4650 # The continued_quote flag means that this is the first token of a
4651 # line, and it is the continuation of some kind of multi-line quote
4652 # or pattern. It requires special treatment because it must have no
4653 # added leading whitespace. So we create a special indentation item
4654 # which is not in the stack.
4655 if ($in_continued_quote) {
4656 my $space_count = 0;
4657 my $available_space = 0;
4658 $level = -1; # flag to prevent storing in item_list
4659 $leading_spaces_to_go[$max_index_to_go] =
4660 $reduced_spaces_to_go[$max_index_to_go] =
4661 new_lp_indentation_item( $space_count, $level, $ci_level,
4662 $available_space, 0 );
4666 # get the top state from the stack
4667 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
4668 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
4669 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
4671 my $type = $types_to_go[$max_index_to_go];
4672 my $token = $tokens_to_go[$max_index_to_go];
4673 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
4675 if ( $type eq '{' || $type eq '(' ) {
4677 $gnu_comma_count{ $total_depth + 1 } = 0;
4678 $gnu_arrow_count{ $total_depth + 1 } = 0;
4680 # If we come to an opening token after an '=' token of some type,
4681 # see if it would be helpful to 'break' after the '=' to save space
4682 my $last_equals = $last_gnu_equals{$total_depth};
4683 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
4685 # find the position if we break at the '='
4686 my $i_test = $last_equals;
4687 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
4690 ##my $too_close = ($i_test==$max_index_to_go-1);
4692 my $test_position = total_line_length( $i_test, $max_index_to_go );
4693 my $mll = maximum_line_length($i_test);
4697 # the equals is not just before an open paren (testing)
4700 # if we are beyond the midpoint
4701 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
4703 # or we are beyond the 1/4 point and there was an old
4704 # break at the equals
4706 $gnu_position_predictor >
4707 $mll - $rOpts_maximum_line_length * 3 / 4
4709 $old_breakpoint_to_go[$last_equals]
4710 || ( $last_equals > 0
4711 && $old_breakpoint_to_go[ $last_equals - 1 ] )
4712 || ( $last_equals > 1
4713 && $types_to_go[ $last_equals - 1 ] eq 'b'
4714 && $old_breakpoint_to_go[ $last_equals - 2 ] )
4720 # then make the switch -- note that we do not set a real
4721 # breakpoint here because we may not really need one; sub
4722 # scan_list will do that if necessary
4723 $line_start_index_to_go = $i_test + 1;
4724 $gnu_position_predictor = $test_position;
4730 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
4732 # Check for decreasing depth ..
4733 # Note that one token may have both decreasing and then increasing
4734 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
4735 # in this example we would first go back to (1,0) then up to (2,0)
4737 if ( $level < $current_level || $ci_level < $current_ci_level ) {
4739 # loop to find the first entry at or completely below this level
4740 my ( $lev, $ci_lev );
4742 if ($max_gnu_stack_index) {
4744 # save index of token which closes this level
4745 $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
4747 # Undo any extra indentation if we saw no commas
4748 my $available_spaces =
4749 $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
4751 my $comma_count = 0;
4752 my $arrow_count = 0;
4753 if ( $type eq '}' || $type eq ')' ) {
4754 $comma_count = $gnu_comma_count{$total_depth};
4755 $arrow_count = $gnu_arrow_count{$total_depth};
4756 $comma_count = 0 unless $comma_count;
4757 $arrow_count = 0 unless $arrow_count;
4759 $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
4760 $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
4762 if ( $available_spaces > 0 ) {
4764 if ( $comma_count <= 0 || $arrow_count > 0 ) {
4766 my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
4768 $gnu_stack[$max_gnu_stack_index]
4769 ->get_sequence_number();
4771 # Be sure this item was created in this batch. This
4772 # should be true because we delete any available
4773 # space from open items at the end of each batch.
4774 if ( $gnu_sequence_number != $seqno
4775 || $i > $max_gnu_item_index )
4778 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
4780 report_definite_bug();
4784 if ( $arrow_count == 0 ) {
4786 ->permanently_decrease_available_spaces(
4791 ->tentatively_decrease_available_spaces(
4794 foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
4796 ->decrease_SPACES($available_spaces);
4803 --$max_gnu_stack_index;
4804 $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
4805 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
4807 # stop when we reach a level at or below the current level
4808 if ( $lev <= $level && $ci_lev <= $ci_level ) {
4810 $gnu_stack[$max_gnu_stack_index]->get_spaces();
4811 $current_level = $lev;
4812 $current_ci_level = $ci_lev;
4817 # reached bottom of stack .. should never happen because
4818 # only negative levels can get here, and $level was forced
4819 # to be positive above.
4822 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
4824 report_definite_bug();
4830 # handle increasing depth
4831 if ( $level > $current_level || $ci_level > $current_ci_level ) {
4833 # Compute the standard incremental whitespace. This will be
4834 # the minimum incremental whitespace that will be used. This
4835 # choice results in a smooth transition between the gnu-style
4836 # and the standard style.
4837 my $standard_increment =
4838 ( $level - $current_level ) * $rOpts_indent_columns +
4839 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
4841 # Now we have to define how much extra incremental space
4842 # ("$available_space") we want. This extra space will be
4843 # reduced as necessary when long lines are encountered or when
4844 # it becomes clear that we do not have a good list.
4845 my $available_space = 0;
4846 my $align_paren = 0;
4849 # initialization on empty stack..
4850 if ( $max_gnu_stack_index == 0 ) {
4851 $space_count = $level * $rOpts_indent_columns;
4854 # if this is a BLOCK, add the standard increment
4855 elsif ($last_nonblank_block_type) {
4856 $space_count += $standard_increment;
4859 # if last nonblank token was not structural indentation,
4860 # just use standard increment
4861 elsif ( $last_nonblank_type ne '{' ) {
4862 $space_count += $standard_increment;
4865 # otherwise use the space to the first non-blank level change token
4868 $space_count = $gnu_position_predictor;
4870 my $min_gnu_indentation =
4871 $gnu_stack[$max_gnu_stack_index]->get_spaces();
4873 $available_space = $space_count - $min_gnu_indentation;
4874 if ( $available_space >= $standard_increment ) {
4875 $min_gnu_indentation += $standard_increment;
4877 elsif ( $available_space > 1 ) {
4878 $min_gnu_indentation += $available_space + 1;
4880 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
4881 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
4882 $min_gnu_indentation += 2;
4885 $min_gnu_indentation += 1;
4889 $min_gnu_indentation += $standard_increment;
4891 $available_space = $space_count - $min_gnu_indentation;
4893 if ( $available_space < 0 ) {
4894 $space_count = $min_gnu_indentation;
4895 $available_space = 0;
4900 # update state, but not on a blank token
4901 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
4903 $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
4905 ++$max_gnu_stack_index;
4906 $gnu_stack[$max_gnu_stack_index] =
4907 new_lp_indentation_item( $space_count, $level, $ci_level,
4908 $available_space, $align_paren );
4910 # If the opening paren is beyond the half-line length, then
4911 # we will use the minimum (standard) indentation. This will
4912 # help avoid problems associated with running out of space
4913 # near the end of a line. As a result, in deeply nested
4914 # lists, there will be some indentations which are limited
4915 # to this minimum standard indentation. But the most deeply
4916 # nested container will still probably be able to shift its
4917 # parameters to the right for proper alignment, so in most
4918 # cases this will not be noticeable.
4919 if ( $available_space > 0 && $space_count > $halfway ) {
4920 $gnu_stack[$max_gnu_stack_index]
4921 ->tentatively_decrease_available_spaces($available_space);
4926 # Count commas and look for non-list characters. Once we see a
4927 # non-list character, we give up and don't look for any more commas.
4928 if ( $type eq '=>' ) {
4929 $gnu_arrow_count{$total_depth}++;
4931 # tentatively treating '=>' like '=' for estimating breaks
4932 # TODO: this could use some experimentation
4933 $last_gnu_equals{$total_depth} = $max_index_to_go;
4936 elsif ( $type eq ',' ) {
4937 $gnu_comma_count{$total_depth}++;
4940 elsif ( $is_assignment{$type} ) {
4941 $last_gnu_equals{$total_depth} = $max_index_to_go;
4944 # this token might start a new line
4945 # if this is a non-blank..
4946 if ( $type ne 'b' ) {
4951 # this is the first nonblank token of the line
4952 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
4954 # or previous character was one of these:
4955 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
4957 # or previous character was opening and this does not close it
4958 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
4959 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
4961 # or this token is one of these:
4962 || $type =~ /^([\.]|\|\||\&\&)$/
4964 # or this is a closing structure
4965 || ( $last_nonblank_type_to_go eq '}'
4966 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
4968 # or previous token was keyword 'return'
4969 || ( $last_nonblank_type_to_go eq 'k'
4970 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
4972 # or starting a new line at certain keywords is fine
4974 && $is_if_unless_and_or_last_next_redo_return{$token} )
4976 # or this is after an assignment after a closing structure
4978 $is_assignment{$last_nonblank_type_to_go}
4980 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
4982 # and it is significantly to the right
4983 || $gnu_position_predictor > $halfway
4988 check_for_long_gnu_style_lines();
4989 $line_start_index_to_go = $max_index_to_go;
4991 # back up 1 token if we want to break before that type
4992 # otherwise, we may strand tokens like '?' or ':' on a line
4993 if ( $line_start_index_to_go > 0 ) {
4994 if ( $last_nonblank_type_to_go eq 'k' ) {
4996 if ( $want_break_before{$last_nonblank_token_to_go} ) {
4997 $line_start_index_to_go--;
5000 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
5001 $line_start_index_to_go--;
5007 # remember the predicted position of this token on the output line
5008 if ( $max_index_to_go > $line_start_index_to_go ) {
5009 $gnu_position_predictor =
5010 total_line_length( $line_start_index_to_go, $max_index_to_go );
5013 $gnu_position_predictor =
5014 $space_count + $token_lengths_to_go[$max_index_to_go];
5017 # store the indentation object for this token
5018 # this allows us to manipulate the leading whitespace
5019 # (in case we have to reduce indentation to fit a line) without
5020 # having to change any token values
5021 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
5022 $reduced_spaces_to_go[$max_index_to_go] =
5023 ( $max_gnu_stack_index > 0 && $ci_level )
5024 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
5025 : $gnu_stack[$max_gnu_stack_index];
5029 sub check_for_long_gnu_style_lines {
5031 # look at the current estimated maximum line length, and
5032 # remove some whitespace if it exceeds the desired maximum
5034 # this is only for the '-lp' style
5035 return unless ($rOpts_line_up_parentheses);
5037 # nothing can be done if no stack items defined for this line
5038 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
5040 # see if we have exceeded the maximum desired line length
5041 # keep 2 extra free because they are needed in some cases
5042 # (result of trial-and-error testing)
5044 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
5046 return if ( $spaces_needed <= 0 );
5048 # We are over the limit, so try to remove a requested number of
5049 # spaces from leading whitespace. We are only allowed to remove
5050 # from whitespace items created on this batch, since others have
5051 # already been used and cannot be undone.
5052 my @candidates = ();
5055 # loop over all whitespace items created for the current batch
5056 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
5057 my $item = $gnu_item_list[$i];
5059 # item must still be open to be a candidate (otherwise it
5060 # cannot influence the current token)
5061 next if ( $item->get_closed() >= 0 );
5063 my $available_spaces = $item->get_available_spaces();
5065 if ( $available_spaces > 0 ) {
5066 push( @candidates, [ $i, $available_spaces ] );
5070 return unless (@candidates);
5072 # sort by available whitespace so that we can remove whitespace
5073 # from the maximum available first
5074 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
5076 # keep removing whitespace until we are done or have no more
5077 foreach my $candidate (@candidates) {
5078 my ( $i, $available_spaces ) = @{$candidate};
5079 my $deleted_spaces =
5080 ( $available_spaces > $spaces_needed )
5082 : $available_spaces;
5084 # remove the incremental space from this item
5085 $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
5089 # update the leading whitespace of this item and all items
5090 # that came after it
5091 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
5093 my $old_spaces = $gnu_item_list[$i]->get_spaces();
5094 if ( $old_spaces >= $deleted_spaces ) {
5095 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
5098 # shouldn't happen except for code bug:
5100 my $level = $gnu_item_list[$i_debug]->get_level();
5101 my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
5102 my $old_level = $gnu_item_list[$i]->get_level();
5103 my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
5105 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
5107 report_definite_bug();
5110 $gnu_position_predictor -= $deleted_spaces;
5111 $spaces_needed -= $deleted_spaces;
5112 last unless ( $spaces_needed > 0 );
5117 sub finish_lp_batch {
5119 # This routine is called once after each output stream batch is
5120 # finished to undo indentation for all incomplete -lp
5121 # indentation levels. It is too risky to leave a level open,
5122 # because then we can't backtrack in case of a long line to follow.
5123 # This means that comments and blank lines will disrupt this
5124 # indentation style. But the vertical aligner may be able to
5125 # get the space back if there are side comments.
5127 # this is only for the 'lp' style
5128 return unless ($rOpts_line_up_parentheses);
5130 # nothing can be done if no stack items defined for this line
5131 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
5133 # loop over all whitespace items created for the current batch
5134 foreach my $i ( 0 .. $max_gnu_item_index ) {
5135 my $item = $gnu_item_list[$i];
5137 # only look for open items
5138 next if ( $item->get_closed() >= 0 );
5140 # Tentatively remove all of the available space
5141 # (The vertical aligner will try to get it back later)
5142 my $available_spaces = $item->get_available_spaces();
5143 if ( $available_spaces > 0 ) {
5145 # delete incremental space for this item
5147 ->tentatively_decrease_available_spaces($available_spaces);
5149 # Reduce the total indentation space of any nodes that follow
5150 # Note that any such nodes must necessarily be dependents
5152 foreach ( $i + 1 .. $max_gnu_item_index ) {
5153 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
5160 sub reduce_lp_indentation {
5162 # reduce the leading whitespace at token $i if possible by $spaces_needed
5163 # (a large value of $spaces_needed will remove all excess space)
5164 # NOTE: to be called from scan_list only for a sequence of tokens
5165 # contained between opening and closing parens/braces/brackets
5167 my ( $i, $spaces_wanted ) = @_;
5168 my $deleted_spaces = 0;
5170 my $item = $leading_spaces_to_go[$i];
5171 my $available_spaces = $item->get_available_spaces();
5174 $available_spaces > 0
5175 && ( ( $spaces_wanted <= $available_spaces )
5176 || !$item->get_have_child() )
5180 # we'll remove these spaces, but mark them as recoverable
5182 $item->tentatively_decrease_available_spaces($spaces_wanted);
5185 return $deleted_spaces;
5188 sub token_sequence_length {
5190 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
5191 # returns 0 if $ibeg > $iend (shouldn't happen)
5192 my ( $ibeg, $iend ) = @_;
5193 return 0 if ( $iend < 0 || $ibeg > $iend );
5194 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
5195 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
5198 sub total_line_length {
5200 # return length of a line of tokens ($ibeg .. $iend)
5201 my ( $ibeg, $iend ) = @_;
5202 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
5205 sub maximum_line_length_for_level {
5207 # return maximum line length for line starting with a given level
5208 my $maximum_line_length = $rOpts_maximum_line_length;
5210 # Modify if -vmll option is selected
5211 if ($rOpts_variable_maximum_line_length) {
5213 if ( $level < 0 ) { $level = 0 }
5214 $maximum_line_length += $level * $rOpts_indent_columns;
5216 return $maximum_line_length;
5219 sub maximum_line_length {
5221 # return maximum line length for line starting with the token at given index
5223 return maximum_line_length_for_level( $levels_to_go[$ii] );
5226 sub excess_line_length {
5228 # return number of characters by which a line of tokens ($ibeg..$iend)
5229 # exceeds the allowable line length.
5230 my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
5232 # Include left and right weld lengths unless requested not to
5233 my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
5234 my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
5236 return total_line_length( $ibeg, $iend ) + $wl + $wr -
5237 maximum_line_length($ibeg);
5242 # flush buffer and write any informative messages
5246 $file_writer_object->decrement_output_line_number()
5247 ; # fix up line number since it was incremented
5248 we_are_at_the_last_line();
5249 if ( $added_semicolon_count > 0 ) {
5250 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
5252 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
5253 write_logfile_entry("$added_semicolon_count $what added:\n");
5254 write_logfile_entry(
5255 " $first at input line $first_added_semicolon_at\n");
5257 if ( $added_semicolon_count > 1 ) {
5258 write_logfile_entry(
5259 " Last at input line $last_added_semicolon_at\n");
5261 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
5262 write_logfile_entry("\n");
5265 if ( $deleted_semicolon_count > 0 ) {
5266 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
5268 ( $deleted_semicolon_count > 1 )
5271 write_logfile_entry(
5272 "$deleted_semicolon_count unnecessary $what deleted:\n");
5273 write_logfile_entry(
5274 " $first at input line $first_deleted_semicolon_at\n");
5276 if ( $deleted_semicolon_count > 1 ) {
5277 write_logfile_entry(
5278 " Last at input line $last_deleted_semicolon_at\n");
5280 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
5281 write_logfile_entry("\n");
5284 if ( $embedded_tab_count > 0 ) {
5285 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
5287 ( $embedded_tab_count > 1 )
5288 ? "quotes or patterns"
5289 : "quote or pattern";
5290 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
5291 write_logfile_entry(
5292 "This means the display of this script could vary with device or software\n"
5294 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
5296 if ( $embedded_tab_count > 1 ) {
5297 write_logfile_entry(
5298 " Last at input line $last_embedded_tab_at\n");
5300 write_logfile_entry("\n");
5303 if ($first_tabbing_disagreement) {
5304 write_logfile_entry(
5305 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
5309 if ($in_tabbing_disagreement) {
5310 write_logfile_entry(
5311 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
5316 if ($last_tabbing_disagreement) {
5318 write_logfile_entry(
5319 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
5323 write_logfile_entry("No indentation disagreement seen\n");
5326 if ($first_tabbing_disagreement) {
5327 write_logfile_entry(
5328 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
5331 write_logfile_entry("\n");
5333 $vertical_aligner_object->report_anything_unusual();
5335 $file_writer_object->report_line_length_errors();
5342 # This routine is called to check the Opts hash after it is defined
5345 initialize_whitespace_hashes();
5346 initialize_bond_strength_hashes();
5348 make_static_block_comment_pattern();
5349 make_static_side_comment_pattern();
5350 make_closing_side_comment_prefix();
5351 make_closing_side_comment_list_pattern();
5352 $format_skipping_pattern_begin =
5353 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
5354 $format_skipping_pattern_end =
5355 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
5357 # If closing side comments ARE selected, then we can safely
5358 # delete old closing side comments unless closing side comment
5359 # warnings are requested. This is a good idea because it will
5360 # eliminate any old csc's which fall below the line count threshold.
5361 # We cannot do this if warnings are turned on, though, because we
5362 # might delete some text which has been added. So that must
5363 # be handled when comments are created.
5364 if ( $rOpts->{'closing-side-comments'} ) {
5365 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
5366 $rOpts->{'delete-closing-side-comments'} = 1;
5370 # If closing side comments ARE NOT selected, but warnings ARE
5371 # selected and we ARE DELETING csc's, then we will pretend to be
5372 # adding with a huge interval. This will force the comments to be
5373 # generated for comparison with the old comments, but not added.
5374 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
5375 if ( $rOpts->{'delete-closing-side-comments'} ) {
5376 $rOpts->{'delete-closing-side-comments'} = 0;
5377 $rOpts->{'closing-side-comments'} = 1;
5378 $rOpts->{'closing-side-comment-interval'} = 100000000;
5383 make_block_brace_vertical_tightness_pattern();
5384 make_blank_line_pattern();
5385 make_keyword_group_list_pattern();
5387 prepare_cuddled_block_types();
5388 if ( $rOpts->{'dump-cuddled-block-list'} ) {
5389 dump_cuddled_block_list(*STDOUT);
5393 if ( $rOpts->{'line-up-parentheses'} ) {
5395 if ( $rOpts->{'indent-only'}
5396 || !$rOpts->{'add-newlines'}
5397 || !$rOpts->{'delete-old-newlines'} )
5400 -----------------------------------------------------------------------
5401 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
5403 The -lp indentation logic requires that perltidy be able to coordinate
5404 arbitrarily large numbers of line breakpoints. This isn't possible
5405 with these flags. Sometimes an acceptable workaround is to use -wocb=3
5406 -----------------------------------------------------------------------
5408 $rOpts->{'line-up-parentheses'} = 0;
5412 # At present, tabs are not compatible with the line-up-parentheses style
5413 # (it would be possible to entab the total leading whitespace
5414 # just prior to writing the line, if desired).
5415 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
5417 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
5419 $rOpts->{'tabs'} = 0;
5422 # Likewise, tabs are not compatible with outdenting..
5423 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
5425 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
5427 $rOpts->{'tabs'} = 0;
5430 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
5432 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
5434 $rOpts->{'tabs'} = 0;
5437 if ( !$rOpts->{'space-for-semicolon'} ) {
5438 $want_left_space{'f'} = -1;
5441 if ( $rOpts->{'space-terminal-semicolon'} ) {
5442 $want_left_space{';'} = 1;
5445 # implement outdenting preferences for keywords
5446 %outdent_keyword = ();
5447 my @okw = split_words( $rOpts->{'outdent-keyword-okl'} );
5449 @okw = qw(next last redo goto return); # defaults
5452 # FUTURE: if not a keyword, assume that it is an identifier
5454 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
5455 $outdent_keyword{$_} = 1;
5458 Warn("ignoring '$_' in -okwl list; not a perl keyword");
5462 # implement user whitespace preferences
5463 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
5464 @want_left_space{@q} = (1) x scalar(@q);
5467 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
5468 @want_right_space{@q} = (1) x scalar(@q);
5471 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
5472 @want_left_space{@q} = (-1) x scalar(@q);
5475 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
5476 @want_right_space{@q} = (-1) x scalar(@q);
5478 if ( $rOpts->{'dump-want-left-space'} ) {
5479 dump_want_left_space(*STDOUT);
5483 if ( $rOpts->{'dump-want-right-space'} ) {
5484 dump_want_right_space(*STDOUT);
5488 # default keywords for which space is introduced before an opening paren
5489 # (at present, including them messes up vertical alignment)
5490 my @sak = qw(my local our and or err eq ne if else elsif until
5491 unless while for foreach return switch case given when catch);
5492 @space_after_keyword{@sak} = (1) x scalar(@sak);
5494 # first remove any or all of these if desired
5495 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
5497 # -nsak='*' selects all the above keywords
5498 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
5499 @space_after_keyword{@q} = (0) x scalar(@q);
5502 # then allow user to add to these defaults
5503 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
5504 @space_after_keyword{@q} = (1) x scalar(@q);
5507 # implement user break preferences
5508 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
5509 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
5510 . : ? && || and or err xor
5513 my $break_after = sub {
5515 foreach my $tok (@toks) {
5516 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
5517 my $lbs = $left_bond_strength{$tok};
5518 my $rbs = $right_bond_strength{$tok};
5519 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
5520 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
5526 my $break_before = sub {
5528 foreach my $tok (@toks) {
5529 my $lbs = $left_bond_strength{$tok};
5530 my $rbs = $right_bond_strength{$tok};
5531 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
5532 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
5538 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
5539 $break_before->(@all_operators)
5540 if ( $rOpts->{'break-before-all-operators'} );
5542 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
5543 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
5545 # make note if breaks are before certain key types
5546 %want_break_before = ();
5547 foreach my $tok ( @all_operators, ',' ) {
5548 $want_break_before{$tok} =
5549 $left_bond_strength{$tok} < $right_bond_strength{$tok};
5552 # Coordinate ?/: breaks, which must be similar
5553 if ( !$want_break_before{':'} ) {
5554 $want_break_before{'?'} = $want_break_before{':'};
5555 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
5556 $left_bond_strength{'?'} = NO_BREAK;
5559 # Define here tokens which may follow the closing brace of a do statement
5560 # on the same line, as in:
5561 # } while ( $something);
5562 my @dof = qw(until while unless if ; : );
5564 @is_do_follower{@dof} = (1) x scalar(@dof);
5566 # What tokens may follow the closing brace of an if or elsif block?
5567 # Not used. Previously used for cuddled else, but no longer needed.
5568 %is_if_brace_follower = ();
5570 # nothing can follow the closing curly of an else { } block:
5571 %is_else_brace_follower = ();
5573 # what can follow a multi-line anonymous sub definition closing curly:
5574 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
5576 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
5578 # what can follow a one-line anonymous sub closing curly:
5579 # one-line anonymous subs also have ']' here...
5580 # see tk3.t and PP.pm
5581 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
5583 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
5585 # What can follow a closing curly of a block
5586 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
5587 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
5588 my @obf = qw# ; : => or and && || ) #;
5590 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
5592 $right_bond_strength{'{'} = WEAK;
5593 $left_bond_strength{'{'} = VERY_STRONG;
5595 # make -l=0 equal to -l=infinite
5596 if ( !$rOpts->{'maximum-line-length'} ) {
5597 $rOpts->{'maximum-line-length'} = 1000000;
5600 # make -lbl=0 equal to -lbl=infinite
5601 if ( !$rOpts->{'long-block-line-count'} ) {
5602 $rOpts->{'long-block-line-count'} = 1000000;
5605 my $enc = $rOpts->{'character-encoding'};
5606 if ( $enc && $enc !~ /^(none|utf8)$/i ) {
5608 Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
5612 my $ole = $rOpts->{'output-line-ending'};
5621 # Patch for RT #99514, a memoization issue.
5622 # Normally, the user enters one of 'dos', 'win', etc, and we change the
5623 # value in the options parameter to be the corresponding line ending
5624 # character. But, if we are using memoization, on later passes through
5625 # here the option parameter will already have the desired ending
5626 # character rather than the keyword 'dos', 'win', etc. So
5627 # we must check to see if conversion has already been done and, if so,
5628 # bypass the conversion step.
5629 my %endings_inverted = (
5630 "\015\012" => 'dos',
5631 "\015\012" => 'win',
5636 if ( defined( $endings_inverted{$ole} ) ) {
5638 # we already have valid line ending, nothing more to do
5642 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
5643 my $str = join " ", keys %endings;
5645 Unrecognized line ending '$ole'; expecting one of: $str
5648 if ( $rOpts->{'preserve-line-endings'} ) {
5649 Warn("Ignoring -ple; conflicts with -ole\n");
5650 $rOpts->{'preserve-line-endings'} = undef;
5655 # hashes used to simplify setting whitespace
5657 '{' => $rOpts->{'brace-tightness'},
5658 '}' => $rOpts->{'brace-tightness'},
5659 '(' => $rOpts->{'paren-tightness'},
5660 ')' => $rOpts->{'paren-tightness'},
5661 '[' => $rOpts->{'square-bracket-tightness'},
5662 ']' => $rOpts->{'square-bracket-tightness'},
5671 # frequently used parameters
5672 $rOpts_add_newlines = $rOpts->{'add-newlines'};
5673 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
5674 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
5675 $rOpts_block_brace_vertical_tightness =
5676 $rOpts->{'block-brace-vertical-tightness'};
5677 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
5678 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
5679 $rOpts_break_at_old_ternary_breakpoints =
5680 $rOpts->{'break-at-old-ternary-breakpoints'};
5681 $rOpts_break_at_old_attribute_breakpoints =
5682 $rOpts->{'break-at-old-attribute-breakpoints'};
5683 $rOpts_break_at_old_comma_breakpoints =
5684 $rOpts->{'break-at-old-comma-breakpoints'};
5685 $rOpts_break_at_old_keyword_breakpoints =
5686 $rOpts->{'break-at-old-keyword-breakpoints'};
5687 $rOpts_break_at_old_logical_breakpoints =
5688 $rOpts->{'break-at-old-logical-breakpoints'};
5689 $rOpts_break_at_old_method_breakpoints =
5690 $rOpts->{'break-at-old-method-breakpoints'};
5691 $rOpts_closing_side_comment_else_flag =
5692 $rOpts->{'closing-side-comment-else-flag'};
5693 $rOpts_closing_side_comment_maximum_text =
5694 $rOpts->{'closing-side-comment-maximum-text'};
5695 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
5696 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
5697 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
5698 $rOpts_indent_columns = $rOpts->{'indent-columns'};
5699 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
5700 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
5701 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
5702 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
5703 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
5705 $rOpts_variable_maximum_line_length =
5706 $rOpts->{'variable-maximum-line-length'};
5707 $rOpts_short_concatenation_item_length =
5708 $rOpts->{'short-concatenation-item-length'};
5710 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
5711 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
5712 $rOpts_format_skipping = $rOpts->{'format-skipping'};
5713 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
5714 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
5715 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
5716 $rOpts_ignore_side_comment_lengths =
5717 $rOpts->{'ignore-side-comment-lengths'};
5719 # Note that both opening and closing tokens can access the opening
5720 # and closing flags of their container types.
5721 %opening_vertical_tightness = (
5722 '(' => $rOpts->{'paren-vertical-tightness'},
5723 '{' => $rOpts->{'brace-vertical-tightness'},
5724 '[' => $rOpts->{'square-bracket-vertical-tightness'},
5725 ')' => $rOpts->{'paren-vertical-tightness'},
5726 '}' => $rOpts->{'brace-vertical-tightness'},
5727 ']' => $rOpts->{'square-bracket-vertical-tightness'},
5730 %closing_vertical_tightness = (
5731 '(' => $rOpts->{'paren-vertical-tightness-closing'},
5732 '{' => $rOpts->{'brace-vertical-tightness-closing'},
5733 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
5734 ')' => $rOpts->{'paren-vertical-tightness-closing'},
5735 '}' => $rOpts->{'brace-vertical-tightness-closing'},
5736 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
5739 # assume flag for '>' same as ')' for closing qw quotes
5740 %closing_token_indentation = (
5741 ')' => $rOpts->{'closing-paren-indentation'},
5742 '}' => $rOpts->{'closing-brace-indentation'},
5743 ']' => $rOpts->{'closing-square-bracket-indentation'},
5744 '>' => $rOpts->{'closing-paren-indentation'},
5747 # flag indicating if any closing tokens are indented
5748 $some_closing_token_indentation =
5749 $rOpts->{'closing-paren-indentation'}
5750 || $rOpts->{'closing-brace-indentation'}
5751 || $rOpts->{'closing-square-bracket-indentation'}
5752 || $rOpts->{'indent-closing-brace'};
5754 %opening_token_right = (
5755 '(' => $rOpts->{'opening-paren-right'},
5756 '{' => $rOpts->{'opening-hash-brace-right'},
5757 '[' => $rOpts->{'opening-square-bracket-right'},
5760 %stack_opening_token = (
5761 '(' => $rOpts->{'stack-opening-paren'},
5762 '{' => $rOpts->{'stack-opening-hash-brace'},
5763 '[' => $rOpts->{'stack-opening-square-bracket'},
5766 %stack_closing_token = (
5767 ')' => $rOpts->{'stack-closing-paren'},
5768 '}' => $rOpts->{'stack-closing-hash-brace'},
5769 ']' => $rOpts->{'stack-closing-square-bracket'},
5771 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
5772 $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
5778 # See if a pattern will compile. We have to use a string eval here,
5779 # but it should be safe because the pattern has been constructed
5782 eval "'##'=~/$pattern/";
5789 # Add keywords here which really should not be cuddled
5791 my @q = qw(if unless for foreach while);
5792 @no_cuddle{@q} = (1) x scalar(@q);
5795 sub prepare_cuddled_block_types {
5797 # the cuddled-else style, if used, is controlled by a hash that
5800 # Include keywords here which should not be cuddled
5802 my $cuddled_string = "";
5803 if ( $rOpts->{'cuddled-else'} ) {
5806 $cuddled_string = 'elsif else continue catch finally'
5807 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
5809 # This is the old equivalent but more complex version
5810 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
5812 # Add users other blocks to be cuddled
5813 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
5814 if ($cuddled_block_list) {
5815 $cuddled_string .= " " . $cuddled_block_list;
5820 # If we have a cuddled string of the form
5821 # 'try-catch-finally'
5823 # we want to prepare a hash of the form
5825 # $rcuddled_block_types = {
5832 # use -dcbl to dump this hash
5834 # Multiple such strings are input as a space or comma separated list
5836 # If we get two lists with the same leading type, such as
5837 # -cbl = "-try-catch-finally -try-catch-otherwise"
5838 # then they will get merged as follows:
5839 # $rcuddled_block_types = {
5846 # This will allow either type of chain to be followed.
5848 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
5849 my @cuddled_strings = split /\s+/, $cuddled_string;
5851 $rcuddled_block_types = {};
5853 # process each dash-separated string...
5854 my $string_count = 0;
5855 foreach my $string (@cuddled_strings) {
5856 next unless $string;
5857 my @words = split /-+/, $string; # allow multiple dashes
5859 # we could look for and report possible errors here...
5860 next unless ( @words > 0 );
5862 # allow either '-continue' or *-continue' for arbitrary starting type
5865 # a single word without dashes is a secondary block type
5867 $start = shift @words;
5870 # always make an entry for the leading word. If none follow, this
5871 # will still prevent a wildcard from matching this word.
5872 if ( !defined( $rcuddled_block_types->{$start} ) ) {
5873 $rcuddled_block_types->{$start} = {};
5876 # The count gives the original word order in case we ever want it.
5879 foreach my $word (@words) {
5881 if ( $no_cuddle{$word} ) {
5883 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
5888 $rcuddled_block_types->{$start}->{$word} =
5889 1; #"$string_count.$word_count";
5896 sub dump_cuddled_block_list {
5899 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
5900 # which controls this routine
5901 # my $rcuddled_block_types = {
5912 # SIMPLFIED METHOD: the simplified method uses a wildcard for
5913 # the starting block type and puts all cuddled blocks together:
5914 # my $rcuddled_block_types = {
5923 # Both methods work, but the simplified method has proven to be adequate and
5926 my $cuddled_string = $rOpts->{'cuddled-block-list'};
5927 $cuddled_string = '' unless $cuddled_string;
5930 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
5931 $flags .= " -cbl='$cuddled_string'";
5933 unless ( $rOpts->{'cuddled-else'} ) {
5934 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
5938 ------------------------------------------------------------------------
5939 Hash of cuddled block types prepared for a run with these parameters:
5941 ------------------------------------------------------------------------
5945 $fh->print( Dumper($rcuddled_block_types) );
5948 ------------------------------------------------------------------------
5953 sub make_static_block_comment_pattern {
5955 # create the pattern used to identify static block comments
5956 $static_block_comment_pattern = '^\s*##';
5958 # allow the user to change it
5959 if ( $rOpts->{'static-block-comment-prefix'} ) {
5960 my $prefix = $rOpts->{'static-block-comment-prefix'};
5961 $prefix =~ s/^\s*//;
5962 my $pattern = $prefix;
5964 # user may give leading caret to force matching left comments only
5965 if ( $prefix !~ /^\^#/ ) {
5966 if ( $prefix !~ /^#/ ) {
5968 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
5971 $pattern = '^\s*' . $prefix;
5973 if ( bad_pattern($pattern) ) {
5975 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
5978 $static_block_comment_pattern = $pattern;
5983 sub make_format_skipping_pattern {
5984 my ( $opt_name, $default ) = @_;
5985 my $param = $rOpts->{$opt_name};
5986 unless ($param) { $param = $default }
5988 if ( $param !~ /^#/ ) {
5989 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
5991 my $pattern = '^' . $param . '\s';
5992 if ( bad_pattern($pattern) ) {
5994 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
6000 sub make_closing_side_comment_list_pattern {
6002 # turn any input list into a regex for recognizing selected block types
6003 $closing_side_comment_list_pattern = '^\w+';
6004 if ( defined( $rOpts->{'closing-side-comment-list'} )
6005 && $rOpts->{'closing-side-comment-list'} )
6007 $closing_side_comment_list_pattern =
6008 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
6013 sub make_bli_pattern {
6015 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
6016 && $rOpts->{'brace-left-and-indent-list'} )
6018 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
6021 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
6025 sub make_keyword_group_list_pattern {
6027 # turn any input list into a regex for recognizing selected block types.
6028 # Here are the defaults:
6029 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
6030 $keyword_group_list_comment_pattern = '';
6031 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
6032 && $rOpts->{'keyword-group-blanks-list'} )
6034 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
6037 foreach my $word (@words) {
6038 if ( $word =~ /^(BC|SBC)$/ ) {
6039 push @comment_list, $word;
6040 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
6043 push @keyword_list, $word;
6046 $keyword_group_list_pattern =
6047 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
6048 $keyword_group_list_comment_pattern =
6049 make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
6054 sub make_block_brace_vertical_tightness_pattern {
6056 # turn any input list into a regex for recognizing selected block types
6057 $block_brace_vertical_tightness_pattern =
6058 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6059 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
6060 && $rOpts->{'block-brace-vertical-tightness-list'} )
6062 $block_brace_vertical_tightness_pattern =
6063 make_block_pattern( '-bbvtl',
6064 $rOpts->{'block-brace-vertical-tightness-list'} );
6069 sub make_blank_line_pattern {
6071 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
6072 my $key = 'blank-lines-before-closing-block-list';
6073 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
6074 $blank_lines_before_closing_block_pattern =
6075 make_block_pattern( '-blbcl', $rOpts->{$key} );
6078 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
6079 $key = 'blank-lines-after-opening-block-list';
6080 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
6081 $blank_lines_after_opening_block_pattern =
6082 make_block_pattern( '-blaol', $rOpts->{$key} );
6087 sub make_block_pattern {
6089 # given a string of block-type keywords, return a regex to match them
6090 # The only tricky part is that labels are indicated with a single ':'
6091 # and the 'sub' token text may have additional text after it (name of
6096 # input string: "if else elsif unless while for foreach do : sub";
6097 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6101 # To distinguish between anonymous subs and named subs, use 'sub' to
6102 # indicate a named sub, and 'asub' to indicate an anonymous sub
6104 my ( $abbrev, $string ) = @_;
6105 my @list = split_words($string);
6109 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
6112 if ( $i eq 'sub' ) {
6114 elsif ( $i eq 'asub' ) {
6116 elsif ( $i eq ';' ) {
6119 elsif ( $i eq '{' ) {
6122 elsif ( $i eq ':' ) {
6123 push @words, '\w+:';
6125 elsif ( $i =~ /^\w/ ) {
6129 Warn("unrecognized block type $i after $abbrev, ignoring\n");
6132 my $pattern = '(' . join( '|', @words ) . ')$';
6133 my $sub_patterns = "";
6134 if ( $seen{'sub'} ) {
6135 $sub_patterns .= '|' . $SUB_PATTERN;
6137 if ( $seen{'asub'} ) {
6138 $sub_patterns .= '|' . $ASUB_PATTERN;
6140 if ($sub_patterns) {
6141 $pattern = '(' . $pattern . $sub_patterns . ')';
6143 $pattern = '^' . $pattern;
6147 sub make_static_side_comment_pattern {
6149 # create the pattern used to identify static side comments
6150 $static_side_comment_pattern = '^##';
6152 # allow the user to change it
6153 if ( $rOpts->{'static-side-comment-prefix'} ) {
6154 my $prefix = $rOpts->{'static-side-comment-prefix'};
6155 $prefix =~ s/^\s*//;
6156 my $pattern = '^' . $prefix;
6157 if ( bad_pattern($pattern) ) {
6159 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
6162 $static_side_comment_pattern = $pattern;
6167 sub make_closing_side_comment_prefix {
6169 # Be sure we have a valid closing side comment prefix
6170 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
6171 my $csc_prefix_pattern;
6172 if ( !defined($csc_prefix) ) {
6173 $csc_prefix = '## end';
6174 $csc_prefix_pattern = '^##\s+end';
6177 my $test_csc_prefix = $csc_prefix;
6178 if ( $test_csc_prefix !~ /^#/ ) {
6179 $test_csc_prefix = '#' . $test_csc_prefix;
6182 # make a regex to recognize the prefix
6183 my $test_csc_prefix_pattern = $test_csc_prefix;
6185 # escape any special characters
6186 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
6188 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
6190 # allow exact number of intermediate spaces to vary
6191 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
6193 # make sure we have a good pattern
6194 # if we fail this we probably have an error in escaping
6197 if ( bad_pattern($test_csc_prefix_pattern) ) {
6199 # shouldn't happen..must have screwed up escaping, above
6200 report_definite_bug();
6202 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
6205 # just warn and keep going with defaults
6206 Warn("Please consider using a simpler -cscp prefix\n");
6207 Warn("Using default -cscp instead; please check output\n");
6210 $csc_prefix = $test_csc_prefix;
6211 $csc_prefix_pattern = $test_csc_prefix_pattern;
6214 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
6215 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
6219 sub dump_want_left_space {
6223 These values are the main control of whitespace to the left of a token type;
6224 They may be altered with the -wls parameter.
6225 For a list of token types, use perltidy --dump-token-types (-dtt)
6226 1 means the token wants a space to its left
6227 -1 means the token does not want a space to its left
6228 ------------------------------------------------------------------------
6230 foreach my $key ( sort keys %want_left_space ) {
6231 print $fh "$key\t$want_left_space{$key}\n";
6236 sub dump_want_right_space {
6240 These values are the main control of whitespace to the right of a token type;
6241 They may be altered with the -wrs parameter.
6242 For a list of token types, use perltidy --dump-token-types (-dtt)
6243 1 means the token wants a space to its right
6244 -1 means the token does not want a space to its right
6245 ------------------------------------------------------------------------
6247 foreach my $key ( sort keys %want_right_space ) {
6248 print $fh "$key\t$want_right_space{$key}\n";
6253 { # begin is_essential_whitespace
6255 my %is_sort_grep_map;
6261 @q = qw(sort grep map);
6262 @is_sort_grep_map{@q} = (1) x scalar(@q);
6264 @q = qw(for foreach);
6265 @is_for_foreach{@q} = (1) x scalar(@q);
6269 sub is_essential_whitespace {
6271 # Essential whitespace means whitespace which cannot be safely deleted
6272 # without risking the introduction of a syntax error.
6273 # We are given three tokens and their types:
6274 # ($tokenl, $typel) is the token to the left of the space in question
6275 # ($tokenr, $typer) is the token to the right of the space in question
6276 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
6278 # This is a slow routine but is not needed too often except when -mangle
6281 # Note: This routine should almost never need to be changed. It is
6282 # for avoiding syntax problems rather than for formatting.
6283 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
6287 # never combine two bare words or numbers
6288 # examples: and ::ok(1)
6290 # for bla::bla:: abc
6291 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
6292 # $input eq"quit" to make $inputeq"quit"
6293 # my $size=-s::SINK if $file; <==OK but we won't do it
6294 # don't join something like: for bla::bla:: abc
6295 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
6296 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
6297 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
6299 # do not combine a number with a concatenation dot
6300 # example: pom.caputo:
6301 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
6302 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
6303 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
6305 # do not join a minus with a bare word, because you might form
6306 # a file test operator. Example from Complex.pm:
6307 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
6308 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
6310 # do not join a bare word with a minus, like between 'Send' and
6311 # '-recipients' here <<snippets/space3.in>>
6312 # my $msg = new Fax::Send
6313 # -recipients => $to,
6315 # This is the safest thing to do. If we had the token to the right of
6316 # the minus we could do a better check.
6317 || ( ( $tokenr eq '-' ) && ( $typel eq 'w' ) )
6319 # and something like this could become ambiguous without space
6321 # use constant III=>1;
6325 || ( ( $tokenl eq '-' )
6326 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
6328 # '= -' should not become =- or you will get a warning
6330 # || ($tokenr eq '-')
6332 # keep a space between a quote and a bareword to prevent the
6333 # bareword from becoming a quote modifier.
6334 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
6336 # keep a space between a token ending in '$' and any word;
6337 # this caused trouble: "die @$ if $@"
6338 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
6339 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
6341 # perl is very fussy about spaces before <<
6342 || ( $tokenr =~ /^\<\</ )
6344 # avoid combining tokens to create new meanings. Example:
6345 # $a+ +$b must not become $a++$b
6346 || ( $is_digraph{ $tokenl . $tokenr } )
6347 || ( $is_trigraph{ $tokenl . $tokenr } )
6349 # another example: do not combine these two &'s:
6350 # allow_options & &OPT_EXECCGI
6351 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
6353 # don't combine $$ or $# with any alphanumeric
6354 # (testfile mangle.t with --mangle)
6355 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
6357 # retain any space after possible filehandle
6358 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
6359 || ( $typel eq 'Z' )
6361 # Perl is sensitive to whitespace after the + here:
6362 # $b = xvals $a + 0.1 * yvals $a;
6363 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
6365 # keep paren separate in 'use Foo::Bar ()'
6369 && $tokenll eq 'use' )
6371 # keep any space between filehandle and paren:
6372 # file mangle.t with --mangle:
6373 || ( $typel eq 'Y' && $tokenr eq '(' )
6375 # retain any space after here doc operator ( hereerr.t)
6376 || ( $typel eq 'h' )
6378 # be careful with a space around ++ and --, to avoid ambiguity as to
6379 # which token it applies
6380 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
6381 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
6383 # need space after foreach my; for example, this will fail in
6384 # older versions of Perl:
6385 # foreach my$ft(@filetypes)...
6390 && $is_for_foreach{$tokenll}
6394 # must have space between grep and left paren; "grep(" will fail
6395 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
6397 # don't stick numbers next to left parens, as in:
6398 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
6399 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
6401 # We must be sure that a space between a ? and a quoted string
6402 # remains if the space before the ? remains. [Loca.pm, lockarea]
6404 # $b=join $comma ? ',' : ':', @_; # ok
6405 # $b=join $comma?',' : ':', @_; # ok!
6406 # $b=join $comma ?',' : ':', @_; # error!
6407 # Not really required:
6408 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
6410 # do not remove space between an '&' and a bare word because
6411 # it may turn into a function evaluation, like here
6412 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
6413 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
6414 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
6416 # space stacked labels (TODO: check if really necessary)
6417 || ( $typel eq 'J' && $typer eq 'J' )
6419 ; # the value of this long logic sequence is the result we want
6420 ##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
6426 my %secret_operators;
6427 my %is_leading_secret_token;
6431 # token lists for perl secret operators as compiled by Philippe Bruhat
6432 # at: https://metacpan.org/module/perlsecret
6433 %secret_operators = (
6434 'Goatse' => [qw#= ( ) =#], #=( )=
6435 'Venus1' => [qw#0 +#], # 0+
6436 'Venus2' => [qw#+ 0#], # +0
6437 'Enterprise' => [qw#) x ! !#], # ()x!!
6438 'Kite1' => [qw#~ ~ <>#], # ~~<>
6439 'Kite2' => [qw#~~ <>#], # ~~<>
6440 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
6441 'Bang bang ' => [qw#! !#], # !!
6444 # The following operators and constants are not included because they
6445 # are normally kept tight by perltidy:
6449 # Make a lookup table indexed by the first token of each operator:
6450 # first token => [list, list, ...]
6451 foreach my $value ( values(%secret_operators) ) {
6452 my $tok = $value->[0];
6453 push @{ $is_leading_secret_token{$tok} }, $value;
6457 sub new_secret_operator_whitespace {
6459 my ( $rlong_array, $rwhitespace_flags ) = @_;
6461 # Loop over all tokens in this line
6462 my ( $token, $type );
6463 my $jmax = @{$rlong_array} - 1;
6464 foreach my $j ( 0 .. $jmax ) {
6466 $token = $rlong_array->[$j]->[_TOKEN_];
6467 $type = $rlong_array->[$j]->[_TYPE_];
6469 # Skip unless this token might start a secret operator
6470 next if ( $type eq 'b' );
6471 next unless ( $is_leading_secret_token{$token} );
6473 # Loop over all secret operators with this leading token
6474 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
6476 foreach my $tok ( @{$rpattern} ) {
6481 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
6483 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
6492 # set flags to prevent spaces within this operator
6493 foreach my $jj ( $j + 1 .. $jend ) {
6494 $rwhitespace_flags->[$jj] = WS_NO;
6499 } ## End Loop over all operators
6500 } ## End loop over all tokens
6505 { # begin print_line_of_tokens
6507 my $rinput_token_array; # Current working array
6508 my $rinput_K_array; # Future working array
6511 my $guessed_indentation_level;
6513 # This should be a return variable from extract_token
6514 # These local token variables are stored by store_token_to_go:
6518 my $container_environment;
6520 my $in_continued_quote;
6522 my $no_internal_newlines;
6528 # routine to pull the jth token from the line of tokens
6530 my ( $self, $j ) = @_;
6532 my $rLL = $self->{rLL};
6533 $Ktoken_vars = $rinput_K_array->[$j];
6534 if ( !defined($Ktoken_vars) ) {
6536 # Shouldn't happen: an error here would be due to a recent program change
6537 Fault("undefined index K for j=$j");
6539 my $rtoken_vars = $rLL->[$Ktoken_vars];
6541 if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
6543 # Shouldn't happen: an error here would be due to a recent program change
6545 j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
6549 #########################################################
6550 # these are now redundant and can eventually be eliminated
6552 $token = $rtoken_vars->[_TOKEN_];
6553 $type = $rtoken_vars->[_TYPE_];
6554 $block_type = $rtoken_vars->[_BLOCK_TYPE_];
6555 $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
6556 $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
6557 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6558 $level = $rtoken_vars->[_LEVEL_];
6559 $slevel = $rtoken_vars->[_SLEVEL_];
6560 $ci_level = $rtoken_vars->[_CI_LEVEL_];
6561 #########################################################
6569 sub save_current_token {
6572 $block_type, $ci_level,
6573 $container_environment, $container_type,
6574 $in_continued_quote, $level,
6575 $no_internal_newlines, $slevel,
6577 $type_sequence, $Ktoken_vars,
6582 sub restore_current_token {
6584 $block_type, $ci_level,
6585 $container_environment, $container_type,
6586 $in_continued_quote, $level,
6587 $no_internal_newlines, $slevel,
6589 $type_sequence, $Ktoken_vars,
6597 # Returns the length of a token, given:
6598 # $token=text of the token
6600 # $not_first_token = should be TRUE if this is not the first token of
6601 # the line. It might the index of this token in an array. It is
6602 # used to test for a side comment vs a block comment.
6603 # Note: Eventually this should be the only routine determining the
6604 # length of a token in this package.
6605 my ( $token, $type, $not_first_token ) = @_;
6606 my $token_length = length($token);
6608 # We mark lengths of side comments as just 1 if we are
6609 # ignoring their lengths when setting line breaks.
6611 if ( $rOpts_ignore_side_comment_lengths
6614 return $token_length;
6619 # return length of ith token in @{$rtokens}
6621 return token_length( $rinput_token_array->[$i]->[_TOKEN_],
6622 $rinput_token_array->[$i]->[_TYPE_], $i );
6625 # Routine to place the current token into the output stream.
6626 # Called once per output token.
6627 sub store_token_to_go {
6629 my ( $self, $side_comment_follows ) = @_;
6631 my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
6634 $K_to_go[$max_index_to_go] = $Ktoken_vars;
6635 $tokens_to_go[$max_index_to_go] = $token;
6636 $types_to_go[$max_index_to_go] = $type;
6637 $nobreak_to_go[$max_index_to_go] = $flag;
6638 $old_breakpoint_to_go[$max_index_to_go] = 0;
6639 $forced_breakpoint_to_go[$max_index_to_go] = 0;
6640 $block_type_to_go[$max_index_to_go] = $block_type;
6641 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
6642 $container_environment_to_go[$max_index_to_go] = $container_environment;
6643 $ci_levels_to_go[$max_index_to_go] = $ci_level;
6644 $mate_index_to_go[$max_index_to_go] = -1;
6645 $matching_token_to_go[$max_index_to_go] = '';
6646 $bond_strength_to_go[$max_index_to_go] = 0;
6648 # Note: negative levels are currently retained as a diagnostic so that
6649 # the 'final indentation level' is correctly reported for bad scripts.
6650 # But this means that every use of $level as an index must be checked.
6651 # If this becomes too much of a problem, we might give up and just clip
6653 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
6654 $levels_to_go[$max_index_to_go] = $level;
6655 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
6657 # link the non-blank tokens
6658 my $iprev = $max_index_to_go - 1;
6659 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
6660 $iprev_to_go[$max_index_to_go] = $iprev;
6661 $inext_to_go[$iprev] = $max_index_to_go
6662 if ( $iprev >= 0 && $type ne 'b' );
6663 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
6665 $token_lengths_to_go[$max_index_to_go] =
6666 token_length( $token, $type, $max_index_to_go );
6668 # We keep a running sum of token lengths from the start of this batch:
6669 # summed_lengths_to_go[$i] = total length to just before token $i
6670 # summed_lengths_to_go[$i+1] = total length to just after token $i
6671 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
6672 $summed_lengths_to_go[$max_index_to_go] +
6673 $token_lengths_to_go[$max_index_to_go];
6675 # Define the indentation that this token would have if it started
6676 # a new line. We have to do this now because we need to know this
6677 # when considering one-line blocks.
6678 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
6680 # remember previous nonblank tokens seen
6681 if ( $type ne 'b' ) {
6682 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
6683 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
6684 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
6685 $last_nonblank_index_to_go = $max_index_to_go;
6686 $last_nonblank_type_to_go = $type;
6687 $last_nonblank_token_to_go = $token;
6688 if ( $type eq ',' ) {
6689 $comma_count_in_batch++;
6693 FORMATTER_DEBUG_FLAG_STORE && do {
6694 my ( $a, $b, $c ) = caller();
6696 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
6701 sub insert_new_token_to_go {
6703 # insert a new token into the output stream. use same level as
6704 # previous token; assumes a character at max_index_to_go.
6705 my ( $self, @args ) = @_;
6706 save_current_token();
6707 ( $token, $type, $slevel, $no_internal_newlines ) = @args;
6709 if ( $max_index_to_go == UNDEFINED_INDEX ) {
6710 warning("code bug: bad call to insert_new_token_to_go\n");
6712 $level = $levels_to_go[$max_index_to_go];
6714 # FIXME: it seems to be necessary to use the next, rather than
6715 # previous, value of this variable when creating a new blank (align.t)
6716 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
6717 $ci_level = $ci_levels_to_go[$max_index_to_go];
6718 $container_environment = $container_environment_to_go[$max_index_to_go];
6719 $in_continued_quote = 0;
6721 $type_sequence = "";
6723 # store an undef for the K value to catch unexpected usage
6724 # This routine is only called by add_closing_side_comments, and
6725 # eventually that call will be eliminated.
6726 $Ktoken_vars = undef;
6728 $self->store_token_to_go();
6729 restore_current_token();
6734 my ($rold_token_hash) = @_;
6735 my %new_token_hash =
6736 map { ( $_, $rold_token_hash->{$_} ) } keys %{$rold_token_hash};
6737 return \%new_token_hash;
6742 my @new = map { $_ } @{$rold};
6746 sub copy_token_as_type {
6747 my ( $rold_token, $type, $token ) = @_;
6748 if ( $type eq 'b' ) {
6749 $token = " " unless defined($token);
6751 elsif ( $type eq 'q' ) {
6752 $token = '' unless defined($token);
6754 elsif ( $type eq '->' ) {
6755 $token = '->' unless defined($token);
6757 elsif ( $type eq ';' ) {
6758 $token = ';' unless defined($token);
6762 "Programming error: copy_token_as has type $type but should be 'b' or 'q'"
6765 my $rnew_token = copy_array($rold_token);
6766 $rnew_token->[_TYPE_] = $type;
6767 $rnew_token->[_TOKEN_] = $token;
6768 $rnew_token->[_BLOCK_TYPE_] = '';
6769 $rnew_token->[_CONTAINER_TYPE_] = '';
6770 $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
6771 $rnew_token->[_TYPE_SEQUENCE_] = '';
6775 sub boolean_equals {
6776 my ( $val1, $val2 ) = @_;
6777 return ( $val1 && $val2 || !$val1 && !$val2 );
6780 sub print_line_of_tokens {
6782 my ( $self, $line_of_tokens ) = @_;
6784 # This routine is called once per input line to process all of
6785 # the tokens on that line. This is the first stage of
6788 # Full-line comments and blank lines may be processed immediately.
6790 # For normal lines of code, the tokens are stored one-by-one,
6791 # via calls to 'sub store_token_to_go', until a known line break
6792 # point is reached. Then, the batch of collected tokens is
6793 # passed along to 'sub output_line_to_go' for further
6794 # processing. This routine decides if there should be
6795 # whitespace between each pair of non-white tokens, so later
6796 # routines only need to decide on any additional line breaks.
6797 # Any whitespace is initially a single space character. Later,
6798 # the vertical aligner may expand that to be multiple space
6799 # characters if necessary for alignment.
6801 $input_line_number = $line_of_tokens->{_line_number};
6802 my $input_line = $line_of_tokens->{_line_text};
6803 my $CODE_type = $line_of_tokens->{_code_type};
6805 my $rK_range = $line_of_tokens->{_rK_range};
6806 my ( $K_first, $K_last ) = @{$rK_range};
6808 my $rLL = $self->{rLL};
6809 my $rbreak_container = $self->{rbreak_container};
6811 if ( !defined($K_first) ) {
6813 # Unexpected blank line..
6814 # Calling routine was supposed to handle this
6816 "Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring"
6821 $no_internal_newlines = 1 - $rOpts_add_newlines;
6823 ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
6824 my $is_static_block_comment_without_leading_space =
6825 $CODE_type eq 'SBCX';
6826 $is_static_block_comment =
6827 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
6828 my $is_hanging_side_comment = $CODE_type eq 'HSC';
6829 my $is_VERSION_statement = $CODE_type eq 'VER';
6830 if ($is_VERSION_statement) {
6831 $saw_VERSION_in_this_file = 1;
6832 $no_internal_newlines = 1;
6835 # Add interline blank if any
6836 my $last_old_nonblank_type = "b";
6837 my $first_new_nonblank_type = "b";
6838 my $first_new_nonblank_token = " ";
6839 if ( $max_index_to_go >= 0 ) {
6840 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
6841 $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
6842 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
6844 && $types_to_go[$max_index_to_go] ne 'b'
6846 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
6852 # Copy the tokens into local arrays
6853 $rinput_token_array = [];
6854 $rinput_K_array = [];
6855 $rinput_K_array = [ ( $K_first .. $K_last ) ];
6856 $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
6857 my $jmax = @{$rinput_K_array} - 1;
6859 $in_continued_quote = $starting_in_quote =
6860 $line_of_tokens->{_starting_in_quote};
6861 $in_quote = $line_of_tokens->{_ending_in_quote};
6862 $ending_in_quote = $in_quote;
6863 $guessed_indentation_level =
6864 $line_of_tokens->{_guessed_indentation_level};
6867 my $next_nonblank_token;
6868 my $next_nonblank_token_type;
6871 $container_type = "";
6872 $container_environment = "";
6873 $type_sequence = "";
6875 ######################################
6876 # Handle a block (full-line) comment..
6877 ######################################
6880 if ( $rOpts->{'delete-block-comments'} ) { return }
6882 if ( $rOpts->{'tee-block-comments'} ) {
6883 $file_writer_object->tee_on();
6886 destroy_one_line_block();
6887 $self->output_line_to_go();
6889 # output a blank line before block comments
6891 # unless we follow a blank or comment line
6892 $last_line_leading_type !~ /^[#b]$/
6895 && $rOpts->{'blanks-before-comments'}
6897 # if this is NOT an empty comment line
6898 && $rinput_token_array->[0]->[_TOKEN_] ne '#'
6900 # not after a short line ending in an opening token
6901 # because we already have space above this comment.
6902 # Note that the first comment in this if block, after
6903 # the 'if (', does not get a blank line because of this.
6904 && !$last_output_short_opening_token
6906 # never before static block comments
6907 && !$is_static_block_comment
6910 $self->flush(); # switching to new output stream
6911 $file_writer_object->write_blank_code_line();
6912 $last_line_leading_type = 'b';
6915 # TRIM COMMENTS -- This could be turned off as a option
6916 $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
6919 $rOpts->{'indent-block-comments'}
6920 && ( !$rOpts->{'indent-spaced-block-comments'}
6921 || $input_line =~ /^\s+/ )
6922 && !$is_static_block_comment_without_leading_space
6925 $self->extract_token(0);
6926 $self->store_token_to_go();
6927 $self->output_line_to_go();
6930 $self->flush(); # switching to new output stream
6931 $file_writer_object->write_code_line(
6932 $rinput_token_array->[0]->[_TOKEN_] . "\n" );
6933 $last_line_leading_type = '#';
6935 if ( $rOpts->{'tee-block-comments'} ) {
6936 $file_writer_object->tee_off();
6941 # TODO: Move to sub scan_comments
6942 # compare input/output indentation except for continuation lines
6943 # (because they have an unknown amount of initial blank space)
6944 # and lines which are quotes (because they may have been outdented)
6945 # Note: this test is placed here because we know the continuation flag
6946 # at this point, which allows us to avoid non-meaningful checks.
6947 my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
6948 compare_indentation_levels( $guessed_indentation_level,
6949 $structural_indentation_level )
6950 unless ( $is_hanging_side_comment
6951 || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
6952 || $guessed_indentation_level == 0
6953 && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
6955 ##########################
6956 # Handle indentation-only
6957 ##########################
6959 # NOTE: In previous versions we sent all qw lines out immediately here.
6960 # No longer doing this: also write a line which is entirely a 'qw' list
6961 # to allow stacking of opening and closing tokens. Note that interior
6962 # qw lines will still go out at the end of this routine.
6963 if ( $CODE_type eq 'IO' ) {
6965 my $line = $input_line;
6967 # delete side comments if requested with -io, but
6968 # we will not allow deleting of closing side comments with -io
6969 # because the coding would be more complex
6970 if ( $rOpts->{'delete-side-comments'}
6971 && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
6975 foreach my $jj ( 0 .. $jmax - 1 ) {
6976 $line .= $rinput_token_array->[$jj]->[_TOKEN_];
6980 # Fix for rt #125506 Unexpected string formating
6981 # in which leading space of a terminal quote was removed
6983 $line =~ s/^\s+// unless ($in_continued_quote);
6985 $self->extract_token(0);
6989 $container_type = "";
6990 $container_environment = "";
6991 $type_sequence = "";
6992 $self->store_token_to_go();
6993 $self->output_line_to_go();
6997 ############################
6998 # Handle all other lines ...
6999 ############################
7001 #######################################################
7002 # FIXME: this should become unnecessary
7003 # making $j+2 valid simplifies coding
7005 copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
7006 push @{$rinput_token_array}, $rnew_blank;
7007 push @{$rinput_token_array}, $rnew_blank;
7008 #######################################################
7010 # If we just saw the end of an elsif block, write nag message
7011 # if we do not see another elseif or an else.
7012 if ($looking_for_else) {
7014 unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
7015 write_logfile_entry("(No else block)\n");
7017 $looking_for_else = 0;
7020 # This is a good place to kill incomplete one-line blocks
7023 ( $semicolons_before_block_self_destruct == 0 )
7024 && ( $max_index_to_go >= 0 )
7025 && ( $last_old_nonblank_type eq ';' )
7026 && ( $first_new_nonblank_token ne '}' )
7029 # Patch for RT #98902. Honor request to break at old commas.
7030 || ( $rOpts_break_at_old_comma_breakpoints
7031 && $max_index_to_go >= 0
7032 && $last_old_nonblank_type eq ',' )
7035 $forced_breakpoint_to_go[$max_index_to_go] = 1
7036 if ($rOpts_break_at_old_comma_breakpoints);
7037 destroy_one_line_block();
7038 $self->output_line_to_go();
7041 # loop to process the tokens one-by-one
7045 # We do not want a leading blank if the previous batch just got output
7047 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
7051 foreach my $j ( $jmin .. $jmax ) {
7053 # pull out the local values for this token
7054 $self->extract_token($j);
7056 if ( $type eq '#' ) {
7058 # trim trailing whitespace
7059 # (there is no option at present to prevent this)
7063 $rOpts->{'delete-side-comments'}
7065 # delete closing side comments if necessary
7066 || ( $rOpts->{'delete-closing-side-comments'}
7067 && $token =~ /$closing_side_comment_prefix_pattern/o
7068 && $last_nonblank_block_type =~
7069 /$closing_side_comment_list_pattern/o )
7072 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
7073 unstore_token_to_go();
7079 # If we are continuing after seeing a right curly brace, flush
7080 # buffer unless we see what we are looking for, as in
7082 if ( $rbrace_follower && $type ne 'b' ) {
7084 unless ( $rbrace_follower->{$token} ) {
7085 $self->output_line_to_go();
7087 $rbrace_follower = undef;
7091 ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
7094 $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
7095 $next_nonblank_token_type =
7096 $rinput_token_array->[$j_next]->[_TYPE_];
7098 ######################
7099 # MAYBE MOVE ELSEWHERE?
7100 ######################
7101 if ( $type eq 'Q' ) {
7102 note_embedded_tab() if ( $token =~ "\t" );
7104 # make note of something like '$var = s/xxx/yyy/;'
7105 # in case it should have been '$var =~ s/xxx/yyy/;'
7107 $token =~ /^(s|tr|y|m|\/)/
7108 && $last_nonblank_token =~ /^(=|==|!=)$/
7110 # preceded by simple scalar
7111 && $last_last_nonblank_type eq 'i'
7112 && $last_last_nonblank_token =~ /^\$/
7114 # followed by some kind of termination
7115 # (but give complaint if we can's see far enough ahead)
7116 && $next_nonblank_token =~ /^[; \)\}]$/
7118 # scalar is not declared
7120 $types_to_go[0] eq 'k'
7121 && $tokens_to_go[0] =~ /^(my|our|local)$/
7125 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
7127 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
7132 # Do not allow breaks which would promote a side comment to a
7133 # block comment. In order to allow a break before an opening
7134 # or closing BLOCK, followed by a side comment, those sections
7135 # of code will handle this flag separately.
7136 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
7137 my $is_opening_BLOCK =
7141 && $block_type ne 't' );
7142 my $is_closing_BLOCK =
7146 && $block_type ne 't' );
7148 if ( $side_comment_follows
7149 && !$is_opening_BLOCK
7150 && !$is_closing_BLOCK )
7152 $no_internal_newlines = 1;
7155 # We're only going to handle breaking for code BLOCKS at this
7156 # (top) level. Other indentation breaks will be handled by
7157 # sub scan_list, which is better suited to dealing with them.
7158 if ($is_opening_BLOCK) {
7160 # Tentatively output this token. This is required before
7161 # calling starting_one_line_block. We may have to unstore
7162 # it, though, if we have to break before it.
7163 $self->store_token_to_go($side_comment_follows);
7165 # Look ahead to see if we might form a one-line block..
7167 $self->starting_one_line_block( $j, $jmax, $level, $slevel,
7168 $ci_level, $rinput_token_array );
7169 clear_breakpoint_undo_stack();
7171 # to simplify the logic below, set a flag to indicate if
7172 # this opening brace is far from the keyword which introduces it
7173 my $keyword_on_same_line = 1;
7174 if ( ( $max_index_to_go >= 0 )
7175 && ( $last_nonblank_type eq ')' )
7176 && ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) )
7178 $keyword_on_same_line = 0;
7181 # decide if user requested break before '{'
7184 # use -bl flag if not a sub block of any type
7185 $block_type !~ /^sub\b/
7186 ? $rOpts->{'opening-brace-on-new-line'}
7188 # use -sbl flag for a named sub block
7189 : $block_type !~ /$ASUB_PATTERN/
7190 ? $rOpts->{'opening-sub-brace-on-new-line'}
7192 # use -asbl flag for an anonymous sub block
7193 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
7195 # Do not break if this token is welded to the left
7196 if ( weld_len_left( $type_sequence, $token ) ) {
7200 # Break before an opening '{' ...
7206 # and we were unable to start looking for a block,
7207 && $index_start_one_line_block == UNDEFINED_INDEX
7209 # or if it will not be on same line as its keyword, so that
7210 # it will be outdented (eval.t, overload.t), and the user
7211 # has not insisted on keeping it on the right
7212 || ( !$keyword_on_same_line
7213 && !$rOpts->{'opening-brace-always-on-right'} )
7218 # but only if allowed
7219 unless ($no_internal_newlines) {
7221 # since we already stored this token, we must unstore it
7222 $self->unstore_token_to_go();
7224 # then output the line
7225 $self->output_line_to_go();
7227 # and now store this token at the start of a new line
7228 $self->store_token_to_go($side_comment_follows);
7232 # Now update for side comment
7233 if ($side_comment_follows) { $no_internal_newlines = 1 }
7235 # now output this line
7236 unless ($no_internal_newlines) {
7237 $self->output_line_to_go();
7241 elsif ($is_closing_BLOCK) {
7243 # If there is a pending one-line block ..
7244 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
7246 # we have to terminate it if..
7249 # it is too long (final length may be different from
7250 # initial estimate). note: must allow 1 space for this
7252 excess_line_length( $index_start_one_line_block,
7253 $max_index_to_go ) >= 0
7255 # or if it has too many semicolons
7256 || ( $semicolons_before_block_self_destruct == 0
7257 && $last_nonblank_type ne ';' )
7260 destroy_one_line_block();
7264 # put a break before this closing curly brace if appropriate
7265 unless ( $no_internal_newlines
7266 || $index_start_one_line_block != UNDEFINED_INDEX )
7269 # write out everything before this closing curly brace
7270 $self->output_line_to_go();
7273 # Now update for side comment
7274 if ($side_comment_follows) { $no_internal_newlines = 1 }
7276 # store the closing curly brace
7277 $self->store_token_to_go();
7279 # ok, we just stored a closing curly brace. Often, but
7280 # not always, we want to end the line immediately.
7281 # So now we have to check for special cases.
7283 # if this '}' successfully ends a one-line block..
7284 my $is_one_line_block = 0;
7286 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
7288 # Remember the type of token just before the
7289 # opening brace. It would be more general to use
7290 # a stack, but this will work for one-line blocks.
7291 $is_one_line_block =
7292 $types_to_go[$index_start_one_line_block];
7294 # we have to actually make it by removing tentative
7295 # breaks that were set within it
7296 undo_forced_breakpoint_stack(0);
7297 set_nobreaks( $index_start_one_line_block,
7298 $max_index_to_go - 1 );
7300 # then re-initialize for the next one-line block
7301 destroy_one_line_block();
7303 # then decide if we want to break after the '}' ..
7304 # We will keep going to allow certain brace followers as in:
7305 # do { $ifclosed = 1; last } unless $losing;
7307 # But make a line break if the curly ends a
7308 # significant block:
7311 $is_block_without_semicolon{$block_type}
7313 # Follow users break point for
7314 # one line block types U & G, such as a 'try' block
7315 || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
7318 # if needless semicolon follows we handle it later
7319 && $next_nonblank_token ne ';'
7322 $self->output_line_to_go()
7323 unless ($no_internal_newlines);
7327 # set string indicating what we need to look for brace follower
7329 if ( $block_type eq 'do' ) {
7330 $rbrace_follower = \%is_do_follower;
7332 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
7333 $rbrace_follower = \%is_if_brace_follower;
7335 elsif ( $block_type eq 'else' ) {
7336 $rbrace_follower = \%is_else_brace_follower;
7339 # added eval for borris.t
7340 elsif ($is_sort_map_grep_eval{$block_type}
7341 || $is_one_line_block eq 'G' )
7343 $rbrace_follower = undef;
7348 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
7350 if ($is_one_line_block) {
7351 $rbrace_follower = \%is_anon_sub_1_brace_follower;
7354 $rbrace_follower = \%is_anon_sub_brace_follower;
7358 # None of the above: specify what can follow a closing
7359 # brace of a block which is not an
7360 # if/elsif/else/do/sort/map/grep/eval
7362 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
7364 $rbrace_follower = \%is_other_brace_follower;
7367 # See if an elsif block is followed by another elsif or else;
7369 if ( $block_type eq 'elsif' ) {
7371 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
7372 $looking_for_else = 1; # ok, check on next line
7376 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
7377 write_logfile_entry("No else block :(\n");
7382 # keep going after certain block types (map,sort,grep,eval)
7383 # added eval for borris.t
7389 # if no more tokens, postpone decision until re-entring
7390 elsif ( ( $next_nonblank_token_type eq 'b' )
7391 && $rOpts_add_newlines )
7393 unless ($rbrace_follower) {
7394 $self->output_line_to_go()
7395 unless ($no_internal_newlines);
7399 elsif ($rbrace_follower) {
7401 unless ( $rbrace_follower->{$next_nonblank_token} ) {
7402 $self->output_line_to_go()
7403 unless ($no_internal_newlines);
7405 $rbrace_follower = undef;
7409 $self->output_line_to_go() unless ($no_internal_newlines);
7412 } # end treatment of closing block token
7415 elsif ( $type eq ';' ) {
7417 # kill one-line blocks with too many semicolons
7418 $semicolons_before_block_self_destruct--;
7420 ( $semicolons_before_block_self_destruct < 0 )
7421 || ( $semicolons_before_block_self_destruct == 0
7422 && $next_nonblank_token_type !~ /^[b\}]$/ )
7425 destroy_one_line_block();
7428 # Remove unnecessary semicolons, but not after bare
7429 # blocks, where it could be unsafe if the brace is
7433 $last_nonblank_token eq '}'
7435 $is_block_without_semicolon{
7436 $last_nonblank_block_type}
7437 || $last_nonblank_block_type =~ /$SUB_PATTERN/
7438 || $last_nonblank_block_type =~ /^\w+:$/ )
7440 || $last_nonblank_type eq ';'
7445 $rOpts->{'delete-semicolons'}
7447 # don't delete ; before a # because it would promote it
7448 # to a block comment
7449 && ( $next_nonblank_token_type ne '#' )
7452 note_deleted_semicolon();
7453 $self->output_line_to_go()
7454 unless ( $no_internal_newlines
7455 || $index_start_one_line_block != UNDEFINED_INDEX );
7459 write_logfile_entry("Extra ';'\n");
7462 $self->store_token_to_go();
7464 $self->output_line_to_go()
7465 unless ( $no_internal_newlines
7466 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
7467 || ( $next_nonblank_token eq '}' ) );
7471 # handle here_doc target string
7472 elsif ( $type eq 'h' ) {
7474 # no newlines after seeing here-target
7475 $no_internal_newlines = 1;
7476 destroy_one_line_block();
7477 $self->store_token_to_go();
7480 # handle all other token types
7483 $self->store_token_to_go();
7486 # remember two previous nonblank OUTPUT tokens
7487 if ( $type ne '#' && $type ne 'b' ) {
7488 $last_last_nonblank_token = $last_nonblank_token;
7489 $last_last_nonblank_type = $last_nonblank_type;
7490 $last_nonblank_token = $token;
7491 $last_nonblank_type = $type;
7492 $last_nonblank_block_type = $block_type;
7495 # unset the continued-quote flag since it only applies to the
7496 # first token, and we want to resume normal formatting if
7497 # there are additional tokens on the line
7498 $in_continued_quote = 0;
7500 } # end of loop over all tokens in this 'line_of_tokens'
7502 # we have to flush ..
7505 # if there is a side comment
7506 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
7508 # if this line ends in a quote
7509 # NOTE: This is critically important for insuring that quoted lines
7510 # do not get processed by things like -sot and -sct
7513 # if this is a VERSION statement
7514 || $is_VERSION_statement
7516 # to keep a label at the end of a line
7519 # if we are instructed to keep all old line breaks
7520 || !$rOpts->{'delete-old-newlines'}
7523 destroy_one_line_block();
7524 $self->output_line_to_go();
7527 # mark old line breakpoints in current output stream
7528 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
7529 my $jobp = $max_index_to_go;
7530 if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
7534 $old_breakpoint_to_go[$jobp] = 1;
7537 } ## end sub print_line_of_tokens
7538 } ## end block print_line_of_tokens
7540 sub consecutive_nonblank_lines {
7541 return $file_writer_object->get_consecutive_nonblank_lines() +
7542 $vertical_aligner_object->get_cached_line_count();
7545 # sub output_line_to_go sends one logical line of tokens on down the
7546 # pipeline to the VerticalAligner package, breaking the line into continuation
7547 # lines as necessary. The line of tokens is ready to go in the "to_go"
7549 sub output_line_to_go {
7552 my $rLL = $self->{rLL};
7554 # debug stuff; this routine can be called from many points
7555 FORMATTER_DEBUG_FLAG_OUTPUT && do {
7556 my ( $a, $b, $c ) = caller;
7558 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
7560 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
7561 write_diagnostics("$output_str\n");
7564 # Do not end line in a weld
7565 # TODO: Move this fix into the routine?
7566 #my $jnb = $max_index_to_go;
7567 #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- }
7568 return if ( weld_len_right_to_go($max_index_to_go) );
7570 # just set a tentative breakpoint if we might be in a one-line block
7571 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
7572 set_forced_breakpoint($max_index_to_go);
7576 ## my $cscw_block_comment;
7577 ## $cscw_block_comment = $self->add_closing_side_comment()
7578 ## if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
7580 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
7582 # tell the -lp option we are outputting a batch so it can close
7583 # any unfinished items in its stack
7586 # If this line ends in a code block brace, set breaks at any
7587 # previous closing code block braces to breakup a chain of code
7588 # blocks on one line. This is very rare but can happen for
7589 # user-defined subs. For example we might be looking at this:
7590 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
7591 my $saw_good_break = 0; # flag to force breaks even if short line
7594 # looking for opening or closing block brace
7595 $block_type_to_go[$max_index_to_go]
7597 # but not one of these which are never duplicated on a line:
7598 # until|while|for|if|elsif|else
7599 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
7602 my $lev = $nesting_depth_to_go[$max_index_to_go];
7604 # Walk backwards from the end and
7605 # set break at any closing block braces at the same level.
7606 # But quit if we are not in a chain of blocks.
7607 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
7608 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
7609 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
7611 if ( $block_type_to_go[$i] ) {
7612 if ( $tokens_to_go[$i] eq '}' ) {
7613 set_forced_breakpoint($i);
7614 $saw_good_break = 1;
7618 # quit if we see anything besides words, function, blanks
7620 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
7625 my $imax = $max_index_to_go;
7627 # trim any blank tokens
7628 if ( $max_index_to_go >= 0 ) {
7629 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
7630 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
7633 # anything left to write?
7634 if ( $imin <= $imax ) {
7636 # add a blank line before certain key types but not after a comment
7637 if ( $last_line_leading_type !~ /^[#]/ ) {
7639 my $leading_token = $tokens_to_go[$imin];
7640 my $leading_type = $types_to_go[$imin];
7642 # blank lines before subs except declarations and one-liners
7643 # MCONVERSION LOCATION - for sub tokenization change
7644 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
7645 $want_blank = $rOpts->{'blank-lines-before-subs'}
7647 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
7648 $imax ) !~ /^[\;\}]$/
7652 # break before all package declarations
7653 # MCONVERSION LOCATION - for tokenizaton change
7654 elsif ($leading_token =~ /^(package\s)/
7655 && $leading_type eq 'i' )
7657 $want_blank = $rOpts->{'blank-lines-before-packages'};
7660 # break before certain key blocks except one-liners
7661 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
7662 $want_blank = $rOpts->{'blank-lines-before-subs'}
7664 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
7669 # Break before certain block types if we haven't had a
7670 # break at this level for a while. This is the
7671 # difficult decision..
7672 elsif ($leading_type eq 'k'
7673 && $last_line_leading_type ne 'b'
7674 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
7676 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
7677 if ( !defined($lc) ) { $lc = 0 }
7679 # patch for RT #128216: no blank line inserted at a level change
7680 if ( $levels_to_go[$imin] != $last_line_leading_level ) {
7685 $rOpts->{'blanks-before-blocks'}
7686 && $lc >= $rOpts->{'long-block-line-count'}
7687 && consecutive_nonblank_lines() >=
7688 $rOpts->{'long-block-line-count'}
7690 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
7695 # Check for blank lines wanted before a closing brace
7696 if ( $leading_token eq '}' ) {
7697 if ( $rOpts->{'blank-lines-before-closing-block'}
7698 && $block_type_to_go[$imin]
7699 && $block_type_to_go[$imin] =~
7700 /$blank_lines_before_closing_block_pattern/ )
7702 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
7703 if ( $nblanks > $want_blank ) {
7704 $want_blank = $nblanks;
7711 # future: send blank line down normal path to VerticalAligner
7712 Perl::Tidy::VerticalAligner::flush();
7713 $file_writer_object->require_blank_code_lines($want_blank);
7717 # update blank line variables and count number of consecutive
7718 # non-blank, non-comment lines at this level
7719 $last_last_line_leading_level = $last_line_leading_level;
7720 $last_line_leading_level = $levels_to_go[$imin];
7721 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
7722 $last_line_leading_type = $types_to_go[$imin];
7723 if ( $last_line_leading_level == $last_last_line_leading_level
7724 && $last_line_leading_type ne 'b'
7725 && $last_line_leading_type ne '#'
7726 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
7728 $nonblank_lines_at_depth[$last_line_leading_level]++;
7731 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
7734 FORMATTER_DEBUG_FLAG_FLUSH && do {
7735 my ( $package, $file, $line ) = caller;
7737 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
7740 # add a couple of extra terminal blank tokens
7743 # set all forced breakpoints for good list formatting
7744 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
7746 my $old_line_count_in_batch =
7747 $self->get_old_line_count( $K_to_go[0], $K_to_go[$max_index_to_go] );
7751 || $old_line_count_in_batch > 1
7753 # must always call scan_list() with unbalanced batches because it
7754 # is maintaining some stacks
7755 || is_unbalanced_batch()
7757 # call scan_list if we might want to break at commas
7759 $comma_count_in_batch
7760 && ( $rOpts_maximum_fields_per_table > 0
7761 || $rOpts_comma_arrow_breakpoints == 0 )
7764 # call scan_list if user may want to break open some one-line
7766 || ( $comma_arrow_count_contained
7767 && $rOpts_comma_arrow_breakpoints != 3 )
7770 ## This caused problems in one version of perl for unknown reasons:
7771 ## $saw_good_break ||= scan_list();
7772 my $sgb = scan_list();
7773 $saw_good_break ||= $sgb;
7776 # let $ri_first and $ri_last be references to lists of
7777 # first and last tokens of line fragments to output..
7778 my ( $ri_first, $ri_last );
7780 # write a single line if..
7783 # we aren't allowed to add any newlines
7784 !$rOpts_add_newlines
7786 # or, we don't already have an interior breakpoint
7787 # and we didn't see a good breakpoint
7789 !$forced_breakpoint_count
7792 # and this line is 'short'
7797 @{$ri_first} = ($imin);
7798 @{$ri_last} = ($imax);
7801 # otherwise use multiple lines
7804 ( $ri_first, $ri_last, my $colon_count ) =
7805 set_continuation_breaks($saw_good_break);
7807 break_all_chain_tokens( $ri_first, $ri_last );
7809 break_equals( $ri_first, $ri_last );
7811 # now we do a correction step to clean this up a bit
7812 # (The only time we would not do this is for debugging)
7813 if ( $rOpts->{'recombine'} ) {
7814 ( $ri_first, $ri_last ) =
7815 recombine_breakpoints( $ri_first, $ri_last );
7818 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
7821 # do corrector step if -lp option is used
7823 if ($rOpts_line_up_parentheses) {
7824 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
7826 $self->unmask_phantom_semicolons( $ri_first, $ri_last );
7827 if ( $rOpts_one_line_block_semicolons == 0 ) {
7828 $self->delete_one_line_semicolons( $ri_first, $ri_last );
7830 $self->send_lines_to_vertical_aligner( $ri_first, $ri_last,
7833 # Insert any requested blank lines after an opening brace. We have to
7834 # skip back before any side comment to find the terminal token
7836 for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
7837 next if $types_to_go[$iterm] eq '#';
7838 next if $types_to_go[$iterm] eq 'b';
7842 # write requested number of blank lines after an opening block brace
7843 if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
7844 if ( $rOpts->{'blank-lines-after-opening-block'}
7845 && $block_type_to_go[$iterm]
7846 && $block_type_to_go[$iterm] =~
7847 /$blank_lines_after_opening_block_pattern/ )
7849 my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
7850 Perl::Tidy::VerticalAligner::flush();
7851 $file_writer_object->require_blank_code_lines($nblanks);
7856 prepare_for_new_input_lines();
7858 ## # output any new -cscw block comment
7859 ## if ($cscw_block_comment) {
7861 ## $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
7866 sub note_added_semicolon {
7867 my ($line_number) = @_;
7868 $last_added_semicolon_at = $line_number;
7869 if ( $added_semicolon_count == 0 ) {
7870 $first_added_semicolon_at = $last_added_semicolon_at;
7872 $added_semicolon_count++;
7873 write_logfile_entry("Added ';' here\n");
7877 sub note_deleted_semicolon {
7878 $last_deleted_semicolon_at = $input_line_number;
7879 if ( $deleted_semicolon_count == 0 ) {
7880 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
7882 $deleted_semicolon_count++;
7883 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
7887 sub note_embedded_tab {
7888 $embedded_tab_count++;
7889 $last_embedded_tab_at = $input_line_number;
7890 if ( !$first_embedded_tab_at ) {
7891 $first_embedded_tab_at = $last_embedded_tab_at;
7894 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
7895 write_logfile_entry("Embedded tabs in quote or pattern\n");
7900 sub starting_one_line_block {
7902 # after seeing an opening curly brace, look for the closing brace
7903 # and see if the entire block will fit on a line. This routine is
7904 # not always right because it uses the old whitespace, so a check
7905 # is made later (at the closing brace) to make sure we really
7906 # have a one-line block. We have to do this preliminary check,
7907 # though, because otherwise we would always break at a semicolon
7908 # within a one-line block if the block contains multiple statements.
7910 my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
7911 my $rbreak_container = $self->{rbreak_container};
7913 my $jmax_check = @{$rtoken_array};
7914 if ( $jmax_check < $jmax ) {
7915 Fault("jmax=$jmax > $jmax_check");
7918 # kill any current block - we can only go 1 deep
7919 destroy_one_line_block();
7922 # 1=distance from start of block to opening brace exceeds line length
7927 # shouldn't happen: there must have been a prior call to
7928 # store_token_to_go to put the opening brace in the output stream
7929 if ( $max_index_to_go < 0 ) {
7930 Fault("program bug: store_token_to_go called incorrectly\n");
7933 # return if block should be broken
7934 my $type_sequence = $rtoken_array->[$j]->[_TYPE_SEQUENCE_];
7935 if ( $rbreak_container->{$type_sequence} ) {
7939 my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
7941 # find the starting keyword for this block (such as 'if', 'else', ...)
7943 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
7944 $i_start = $max_index_to_go;
7947 # the previous nonblank token should start these block types
7948 elsif (( $last_last_nonblank_token_to_go eq $block_type )
7949 || ( $block_type =~ /^sub\b/ )
7950 || $block_type =~ /\(\)/ )
7952 $i_start = $last_last_nonblank_index_to_go;
7954 # For signatures and extended syntax ...
7955 # If this brace follows a parenthesized list, we should look back to
7956 # find the keyword before the opening paren because otherwise we might
7957 # form a one line block which stays intack, and cause the parenthesized
7958 # expression to break open. That looks bad. However, actually
7959 # searching for the opening paren is slow and tedius.
7960 # The actual keyword is often at the start of a line, but might not be.
7961 # For example, we might have an anonymous sub with signature list
7962 # following a =>. It is safe to mark the start anywhere before the
7963 # opening paren, so we just go back to the prevoious break (or start of
7964 # the line) if that is before the opening paren. The minor downside is
7965 # that we may very occasionally break open a block unnecessarily.
7966 if ( $tokens_to_go[$i_start] eq ')' ) {
7967 $i_start = $index_max_forced_break + 1;
7968 if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
7969 my $lev = $levels_to_go[$i_start];
7970 if ( $lev > $level ) { return 0 }
7974 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
7976 # For something like "if (xxx) {", the keyword "if" will be
7977 # just after the most recent break. This will be 0 unless
7978 # we have just killed a one-line block and are starting another.
7980 # Note: cannot use inext_index_to_go[] here because that array
7981 # is still being constructed.
7982 $i_start = $index_max_forced_break + 1;
7983 if ( $types_to_go[$i_start] eq 'b' ) {
7987 # Patch to avoid breaking short blocks defined with extended_syntax:
7988 # Strip off any trailing () which was added in the parser to mark
7989 # the opening keyword. For example, in the following
7990 # create( TypeFoo $e) {$bubba}
7991 # the blocktype would be marked as create()
7992 my $stripped_block_type = $block_type;
7993 $stripped_block_type =~ s/\(\)$//;
7995 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
8000 # patch for SWITCH/CASE to retain one-line case/when blocks
8001 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
8003 # Note: cannot use inext_index_to_go[] here because that array
8004 # is still being constructed.
8005 $i_start = $index_max_forced_break + 1;
8006 if ( $types_to_go[$i_start] eq 'b' ) {
8009 unless ( $tokens_to_go[$i_start] eq $block_type ) {
8018 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
8020 # see if length is too long to even start
8021 if ( $pos > maximum_line_length($i_start) ) {
8025 foreach my $i ( $j + 1 .. $jmax ) {
8027 # old whitespace could be arbitrarily large, so don't use it
8028 if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
8029 else { $pos += rtoken_length($i) }
8031 # Return false result if we exceed the maximum line length,
8032 if ( $pos > maximum_line_length($i_start) ) {
8036 # or encounter another opening brace before finding the closing brace.
8037 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
8038 && $rtoken_array->[$i]->[_TYPE_] eq '{'
8039 && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
8044 # if we find our closing brace..
8045 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
8046 && $rtoken_array->[$i]->[_TYPE_] eq '}'
8047 && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
8050 # be sure any trailing comment also fits on the line
8052 ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
8054 # Patch for one-line sort/map/grep/eval blocks with side comments:
8055 # We will ignore the side comment length for sort/map/grep/eval
8056 # because this can lead to statements which change every time
8057 # perltidy is run. Here is an example from Denis Moskowitz which
8058 # oscillates between these two states without this patch:
8061 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
8066 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
8070 # When the first line is input it gets broken apart by the main
8071 # line break logic in sub print_line_of_tokens.
8072 # When the second line is input it gets recombined by
8073 # print_line_of_tokens and passed to the output routines. The
8074 # output routines (set_continuation_breaks) do not break it apart
8075 # because the bond strengths are set to the highest possible value
8076 # for grep/map/eval/sort blocks, so the first version gets output.
8077 # It would be possible to fix this by changing bond strengths,
8078 # but they are high to prevent errors in older versions of perl.
8080 if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
8081 && !$is_sort_map_grep{$block_type} )
8084 $pos += rtoken_length($i_nonblank);
8086 if ( $i_nonblank > $i + 1 ) {
8088 # source whitespace could be anything, assume
8089 # at least one space before the hash on output
8090 if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
8093 else { $pos += rtoken_length( $i + 1 ) }
8096 if ( $pos >= maximum_line_length($i_start) ) {
8101 # ok, it's a one-line block
8102 create_one_line_block( $i_start, 20 );
8106 # just keep going for other characters
8111 # Allow certain types of new one-line blocks to form by joining
8112 # input lines. These can be safely done, but for other block types,
8113 # we keep old one-line blocks but do not form new ones. It is not
8114 # always a good idea to make as many one-line blocks as possible,
8115 # so other types are not done. The user can always use -mangle.
8116 if ( $is_sort_map_grep_eval{$block_type} ) {
8117 create_one_line_block( $i_start, 1 );
8122 sub unstore_token_to_go {
8124 # remove most recent token from output stream
8126 if ( $max_index_to_go > 0 ) {
8130 $max_index_to_go = UNDEFINED_INDEX;
8135 sub want_blank_line {
8138 $file_writer_object->want_blank_line();
8142 sub write_unindented_line {
8143 my ( $self, $line ) = @_;
8145 $file_writer_object->write_line($line);
8151 # Undo continuation indentation in certain sequences
8152 # For example, we can undo continuation indentation in sort/map/grep chains
8153 # my $dat1 = pack( "n*",
8154 # map { $_, $lookup->{$_} }
8155 # sort { $a <=> $b }
8156 # grep { $lookup->{$_} ne $default } keys %$lookup );
8157 # To align the map/sort/grep keywords like this:
8158 # my $dat1 = pack( "n*",
8159 # map { $_, $lookup->{$_} }
8160 # sort { $a <=> $b }
8161 # grep { $lookup->{$_} ne $default } keys %$lookup );
8162 my ( $ri_first, $ri_last ) = @_;
8163 my ( $line_1, $line_2, $lev_last );
8164 my $this_line_is_semicolon_terminated;
8165 my $max_line = @{$ri_first} - 1;
8167 # looking at each line of this batch..
8168 # We are looking at leading tokens and looking for a sequence
8169 # all at the same level and higher level than enclosing lines.
8170 foreach my $line ( 0 .. $max_line ) {
8172 my $ibeg = $ri_first->[$line];
8173 my $lev = $levels_to_go[$ibeg];
8176 # if we have started a chain..
8179 # see if it continues..
8180 if ( $lev == $lev_last ) {
8181 if ( $types_to_go[$ibeg] eq 'k'
8182 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
8185 # chain continues...
8186 # check for chain ending at end of a statement
8187 if ( $line == $max_line ) {
8189 # see of this line ends a statement
8190 my $iend = $ri_last->[$line];
8191 $this_line_is_semicolon_terminated =
8192 $types_to_go[$iend] eq ';'
8194 # with possible side comment
8195 || ( $types_to_go[$iend] eq '#'
8196 && $iend - $ibeg >= 2
8197 && $types_to_go[ $iend - 2 ] eq ';'
8198 && $types_to_go[ $iend - 1 ] eq 'b' );
8200 $line_2 = $line if ($this_line_is_semicolon_terminated);
8208 elsif ( $lev < $lev_last ) {
8210 # chain ends with previous line
8211 $line_2 = $line - 1;
8213 elsif ( $lev > $lev_last ) {
8219 # undo the continuation indentation if a chain ends
8220 if ( defined($line_2) && defined($line_1) ) {
8221 my $continuation_line_count = $line_2 - $line_1 + 1;
8222 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
8223 (0) x ($continuation_line_count)
8224 if ( $continuation_line_count >= 0 );
8225 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
8226 = @reduced_spaces_to_go[ @{$ri_first}
8227 [ $line_1 .. $line_2 ] ];
8232 # not in a chain yet..
8235 # look for start of a new sort/map/grep chain
8236 if ( $lev > $lev_last ) {
8237 if ( $types_to_go[$ibeg] eq 'k'
8238 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
8252 # If there is a single, long parameter within parens, like this:
8254 # $self->command( "/msg "
8256 # . " You said $1, but did you know that it's square was "
8257 # . $1 * $1 . " ?" );
8259 # we can remove the continuation indentation of the 2nd and higher lines
8260 # to achieve this effect, which is more pleasing:
8262 # $self->command("/msg "
8264 # . " You said $1, but did you know that it's square was "
8265 # . $1 * $1 . " ?");
8267 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
8268 my $max_line = @{$ri_first} - 1;
8270 # must be multiple lines
8271 return unless $max_line > $line_open;
8273 my $lev_start = $levels_to_go[$i_start];
8274 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
8276 # see if all additional lines in this container have continuation
8279 my $line_1 = 1 + $line_open;
8280 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
8281 my $ibeg = $ri_first->[$n];
8282 my $iend = $ri_last->[$n];
8283 if ( $ibeg eq $closing_index ) { $n--; last }
8284 return if ( $lev_start != $levels_to_go[$ibeg] );
8285 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
8286 last if ( $closing_index <= $iend );
8289 # we can reduce the indentation of all continuation lines
8290 my $continuation_line_count = $n - $line_open;
8291 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
8292 (0) x ($continuation_line_count);
8293 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
8294 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
8300 # insert $pad_spaces before token number $ipad
8301 my ( $ipad, $pad_spaces ) = @_;
8302 if ( $pad_spaces > 0 ) {
8303 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
8305 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
8306 $tokens_to_go[$ipad] = "";
8314 $token_lengths_to_go[$ipad] += $pad_spaces;
8315 foreach my $i ( $ipad .. $max_index_to_go ) {
8316 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
8326 my @q = qw( + - * / );
8327 @is_math_op{@q} = (1) x scalar(@q);
8330 sub set_logical_padding {
8332 # Look at a batch of lines and see if extra padding can improve the
8333 # alignment when there are certain leading operators. Here is an
8334 # example, in which some extra space is introduced before
8335 # '( $year' to make it line up with the subsequent lines:
8337 # if ( ( $Year < 1601 )
8338 # || ( $Year > 2899 )
8339 # || ( $EndYear < 1601 )
8340 # || ( $EndYear > 2899 ) )
8342 # &Error_OutOfRange;
8345 my ( $ri_first, $ri_last ) = @_;
8346 my $max_line = @{$ri_first} - 1;
8348 # FIXME: move these declarations below
8349 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
8350 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
8352 # looking at each line of this batch..
8353 foreach my $line ( 0 .. $max_line - 1 ) {
8355 # see if the next line begins with a logical operator
8356 $ibeg = $ri_first->[$line];
8357 $iend = $ri_last->[$line];
8358 $ibeg_next = $ri_first->[ $line + 1 ];
8359 $tok_next = $tokens_to_go[$ibeg_next];
8360 $type_next = $types_to_go[$ibeg_next];
8362 $has_leading_op_next = ( $tok_next =~ /^\w/ )
8363 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
8364 : $is_chain_operator{$type_next}; # and, or
8366 next unless ($has_leading_op_next);
8368 # next line must not be at lesser depth
8370 if ( $nesting_depth_to_go[$ibeg] >
8371 $nesting_depth_to_go[$ibeg_next] );
8373 # identify the token in this line to be padded on the left
8376 # handle lines at same depth...
8377 if ( $nesting_depth_to_go[$ibeg] ==
8378 $nesting_depth_to_go[$ibeg_next] )
8381 # if this is not first line of the batch ...
8384 # and we have leading operator..
8385 next if $has_leading_op;
8387 # Introduce padding if..
8388 # 1. the previous line is at lesser depth, or
8389 # 2. the previous line ends in an assignment
8390 # 3. the previous line ends in a 'return'
8391 # 4. the previous line ends in a comma
8392 # Example 1: previous line at lesser depth
8393 # if ( ( $Year < 1601 ) # <- we are here but
8394 # || ( $Year > 2899 ) # list has not yet
8395 # || ( $EndYear < 1601 ) # collapsed vertically
8396 # || ( $EndYear > 2899 ) )
8399 # Example 2: previous line ending in assignment:
8401 # $year % 4 ? 0 # <- We are here
8406 # Example 3: previous line ending in comma:
8413 # be sure levels agree (do not indent after an indented 'if')
8415 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
8417 # allow padding on first line after a comma but only if:
8418 # (1) this is line 2 and
8419 # (2) there are at more than three lines and
8420 # (3) lines 3 and 4 have the same leading operator
8421 # These rules try to prevent padding within a long
8422 # comma-separated list.
8424 if ( $types_to_go[$iendm] eq ','
8428 my $ibeg_next_next = $ri_first->[ $line + 2 ];
8429 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
8430 $ok_comma = $tok_next_next eq $tok_next;
8435 $is_assignment{ $types_to_go[$iendm] }
8437 || ( $nesting_depth_to_go[$ibegm] <
8438 $nesting_depth_to_go[$ibeg] )
8439 || ( $types_to_go[$iendm] eq 'k'
8440 && $tokens_to_go[$iendm] eq 'return' )
8443 # we will add padding before the first token
8447 # for first line of the batch..
8450 # WARNING: Never indent if first line is starting in a
8451 # continued quote, which would change the quote.
8452 next if $starting_in_quote;
8454 # if this is text after closing '}'
8455 # then look for an interior token to pad
8456 if ( $types_to_go[$ibeg] eq '}' ) {
8460 # otherwise, we might pad if it looks really good
8463 # we might pad token $ibeg, so be sure that it
8464 # is at the same depth as the next line.
8466 if ( $nesting_depth_to_go[$ibeg] !=
8467 $nesting_depth_to_go[$ibeg_next] );
8469 # We can pad on line 1 of a statement if at least 3
8470 # lines will be aligned. Otherwise, it
8471 # can look very confusing.
8473 # We have to be careful not to pad if there are too few
8474 # lines. The current rule is:
8475 # (1) in general we require at least 3 consecutive lines
8476 # with the same leading chain operator token,
8477 # (2) but an exception is that we only require two lines
8478 # with leading colons if there are no more lines. For example,
8479 # the first $i in the following snippet would get padding
8480 # by the second rule:
8482 # $i == 1 ? ( "First", "Color" )
8483 # : $i == 2 ? ( "Then", "Rarity" )
8484 # : ( "Then", "Name" );
8486 if ( $max_line > 1 ) {
8487 my $leading_token = $tokens_to_go[$ibeg_next];
8490 # never indent line 1 of a '.' series because
8491 # previous line is most likely at same level.
8492 # TODO: we should also look at the leasing_spaces
8493 # of the last output line and skip if it is same
8495 next if ( $leading_token eq '.' );
8498 foreach my $l ( 2 .. 3 ) {
8499 last if ( $line + $l > $max_line );
8500 my $ibeg_next_next = $ri_first->[ $line + $l ];
8501 if ( $tokens_to_go[$ibeg_next_next] ne
8509 next if ($tokens_differ);
8510 next if ( $count < 3 && $leading_token ne ':' );
8520 # find interior token to pad if necessary
8521 if ( !defined($ipad) ) {
8523 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
8525 # find any unclosed container
8527 unless ( $type_sequence_to_go[$i]
8528 && $mate_index_to_go[$i] > $iend );
8530 # find next nonblank token to pad
8531 $ipad = $inext_to_go[$i];
8532 last if ( $ipad > $iend );
8537 # We cannot pad the first leading token of a file because
8538 # it could cause a bug in which the starting indentation
8539 # level is guessed incorrectly each time the code is run
8540 # though perltidy, thus causing the code to march off to
8541 # the right. For example, the following snippet would have
8544 ## ov_method mycan( $package, '(""' ), $package
8545 ## or ov_method mycan( $package, '(0+' ), $package
8546 ## or ov_method mycan( $package, '(bool' ), $package
8547 ## or ov_method mycan( $package, '(nomethod' ), $package;
8549 # If this snippet is within a block this won't happen
8550 # unless the user just processes the snippet alone within
8551 # an editor. In that case either the user will see and
8552 # fix the problem or it will be corrected next time the
8553 # entire file is processed with perltidy.
8554 next if ( $ipad == 0 && $peak_batch_size <= 1 );
8556 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
8557 ## IT DID MORE HARM THAN GOOD
8559 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
8562 ##? # do not put leading padding for just 2 lines of math
8563 ##? if ( $ipad == $ibeg
8565 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
8566 ##? && $is_math_op{$type_next}
8567 ##? && $line + 2 <= $max_line )
8569 ##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
8570 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
8571 ##? next if !$is_math_op{$type_next_next};
8574 # next line must not be at greater depth
8575 my $iend_next = $ri_last->[ $line + 1 ];
8577 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
8578 $nesting_depth_to_go[$ipad] );
8580 # lines must be somewhat similar to be padded..
8581 my $inext_next = $inext_to_go[$ibeg_next];
8582 my $type = $types_to_go[$ipad];
8583 my $type_next = $types_to_go[ $ipad + 1 ];
8585 # see if there are multiple continuation lines
8586 my $logical_continuation_lines = 1;
8587 if ( $line + 2 <= $max_line ) {
8588 my $leading_token = $tokens_to_go[$ibeg_next];
8589 my $ibeg_next_next = $ri_first->[ $line + 2 ];
8590 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
8591 && $nesting_depth_to_go[$ibeg_next] eq
8592 $nesting_depth_to_go[$ibeg_next_next] )
8594 $logical_continuation_lines++;
8598 # see if leading types match
8599 my $types_match = $types_to_go[$inext_next] eq $type;
8600 my $matches_without_bang;
8602 # if first line has leading ! then compare the following token
8603 if ( !$types_match && $type eq '!' ) {
8604 $types_match = $matches_without_bang =
8605 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
8610 # either we have multiple continuation lines to follow
8611 # and we are not padding the first token
8612 ( $logical_continuation_lines > 1 && $ipad > 0 )
8620 # and keywords must match if keyword
8623 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
8629 #----------------------begin special checks--------------
8632 # A check is needed before we can make the pad.
8633 # If we are in a list with some long items, we want each
8634 # item to stand out. So in the following example, the
8635 # first line beginning with '$casefold->' would look good
8636 # padded to align with the next line, but then it
8637 # would be indented more than the last line, so we
8641 # $casefold->{code} eq '0041'
8642 # && $casefold->{status} eq 'C'
8643 # && $casefold->{mapping} eq '0061',
8648 # It would be faster, and almost as good, to use a comma
8649 # count, and not pad if comma_count > 1 and the previous
8650 # line did not end with a comma.
8654 my $ibg = $ri_first->[ $line + 1 ];
8655 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
8657 # just use simplified formula for leading spaces to avoid
8658 # needless sub calls
8659 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
8661 # look at each line beyond the next ..
8663 foreach my $ltest ( $line + 2 .. $max_line ) {
8665 my $ibg = $ri_first->[$l];
8667 # quit looking at the end of this container
8669 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
8670 || ( $nesting_depth_to_go[$ibg] < $depth );
8672 # cannot do the pad if a later line would be
8674 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
8680 # don't pad if we end in a broken list
8681 if ( $l == $max_line ) {
8682 my $i2 = $ri_last->[$l];
8683 if ( $types_to_go[$i2] eq '#' ) {
8684 my $i1 = $ri_first->[$l];
8687 terminal_type( \@types_to_go, \@block_type_to_go,
8694 # a minus may introduce a quoted variable, and we will
8695 # add the pad only if this line begins with a bare word,
8696 # such as for the word 'Button' here:
8698 # Button => "Print letter \"~$_\"",
8699 # -command => [ sub { print "$_[0]\n" }, $_ ],
8700 # -accelerator => "Meta+$_"
8703 # On the other hand, if 'Button' is quoted, it looks best
8706 # 'Button' => "Print letter \"~$_\"",
8707 # -command => [ sub { print "$_[0]\n" }, $_ ],
8708 # -accelerator => "Meta+$_"
8710 if ( $types_to_go[$ibeg_next] eq 'm' ) {
8711 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
8714 next unless $ok_to_pad;
8716 #----------------------end special check---------------
8718 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
8719 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
8720 $pad_spaces = $length_2 - $length_1;
8722 # If the first line has a leading ! and the second does
8723 # not, then remove one space to try to align the next
8724 # leading characters, which are often the same. For example:
8726 # || $ts == $self->Holder
8727 # || $self->Holder->Type eq "Arena" )
8729 # This usually helps readability, but if there are subsequent
8730 # ! operators things will still get messed up. For example:
8732 # if ( !exists $Net::DNS::typesbyname{$qtype}
8733 # && exists $Net::DNS::classesbyname{$qtype}
8734 # && !exists $Net::DNS::classesbyname{$qclass}
8735 # && exists $Net::DNS::typesbyname{$qclass} )
8736 # We can't fix that.
8737 if ($matches_without_bang) { $pad_spaces-- }
8739 # make sure this won't change if -lp is used
8740 my $indentation_1 = $leading_spaces_to_go[$ibeg];
8741 if ( ref($indentation_1) ) {
8742 if ( $indentation_1->get_recoverable_spaces() == 0 ) {
8743 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
8744 unless ( $indentation_2->get_recoverable_spaces() == 0 )
8751 # we might be able to handle a pad of -1 by removing a blank
8753 if ( $pad_spaces < 0 ) {
8755 if ( $pad_spaces == -1 ) {
8756 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
8758 pad_token( $ipad - 1, $pad_spaces );
8764 # now apply any padding for alignment
8765 if ( $ipad >= 0 && $pad_spaces ) {
8767 my $length_t = total_line_length( $ibeg, $iend );
8768 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
8770 pad_token( $ipad, $pad_spaces );
8778 $has_leading_op = $has_leading_op_next;
8779 } # end of loop over lines
8784 sub correct_lp_indentation {
8786 # When the -lp option is used, we need to make a last pass through
8787 # each line to correct the indentation positions in case they differ
8788 # from the predictions. This is necessary because perltidy uses a
8789 # predictor/corrector method for aligning with opening parens. The
8790 # predictor is usually good, but sometimes stumbles. The corrector
8791 # tries to patch things up once the actual opening paren locations
8793 my ( $ri_first, $ri_last ) = @_;
8796 # Note on flag '$do_not_pad':
8797 # We want to avoid a situation like this, where the aligner inserts
8798 # whitespace before the '=' to align it with a previous '=', because
8799 # otherwise the parens might become mis-aligned in a situation like
8800 # this, where the '=' has become aligned with the previous line,
8801 # pushing the opening '(' forward beyond where we want it.
8803 # $mkFloor::currentRoom = '';
8804 # $mkFloor::c_entry = $c->Entry(
8806 # -relief => 'sunken',
8810 # We leave it to the aligner to decide how to do this.
8812 # first remove continuation indentation if appropriate
8813 my $max_line = @{$ri_first} - 1;
8815 # looking at each line of this batch..
8816 my ( $ibeg, $iend );
8817 foreach my $line ( 0 .. $max_line ) {
8818 $ibeg = $ri_first->[$line];
8819 $iend = $ri_last->[$line];
8821 # looking at each token in this output line..
8822 foreach my $i ( $ibeg .. $iend ) {
8824 # How many space characters to place before this token
8825 # for special alignment. Actual padding is done in the
8828 # looking for next unvisited indentation item
8829 my $indentation = $leading_spaces_to_go[$i];
8830 if ( !$indentation->get_marked() ) {
8831 $indentation->set_marked(1);
8833 # looking for indentation item for which we are aligning
8834 # with parens, braces, and brackets
8835 next unless ( $indentation->get_align_paren() );
8837 # skip closed container on this line
8839 my $im = max( $ibeg, $iprev_to_go[$i] );
8840 if ( $type_sequence_to_go[$im]
8841 && $mate_index_to_go[$im] <= $iend )
8847 if ( $line == 1 && $i == $ibeg ) {
8851 # Ok, let's see what the error is and try to fix it
8853 my $predicted_pos = $indentation->get_spaces();
8856 # token is mid-line - use length to previous token
8857 $actual_pos = total_line_length( $ibeg, $i - 1 );
8859 # for mid-line token, we must check to see if all
8860 # additional lines have continuation indentation,
8861 # and remove it if so. Otherwise, we do not get
8863 my $closing_index = $indentation->get_closed();
8864 if ( $closing_index > $iend ) {
8865 my $ibeg_next = $ri_first->[ $line + 1 ];
8866 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
8867 undo_lp_ci( $line, $i, $closing_index, $ri_first,
8872 elsif ( $line > 0 ) {
8874 # handle case where token starts a new line;
8875 # use length of previous line
8876 my $ibegm = $ri_first->[ $line - 1 ];
8877 my $iendm = $ri_last->[ $line - 1 ];
8878 $actual_pos = total_line_length( $ibegm, $iendm );
8882 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
8886 # token is first character of first line of batch
8887 $actual_pos = $predicted_pos;
8890 my $move_right = $actual_pos - $predicted_pos;
8892 # done if no error to correct (gnu2.t)
8893 if ( $move_right == 0 ) {
8894 $indentation->set_recoverable_spaces($move_right);
8898 # if we have not seen closure for this indentation in
8899 # this batch, we can only pass on a request to the
8901 my $closing_index = $indentation->get_closed();
8903 if ( $closing_index < 0 ) {
8904 $indentation->set_recoverable_spaces($move_right);
8908 # If necessary, look ahead to see if there is really any
8909 # leading whitespace dependent on this whitespace, and
8910 # also find the longest line using this whitespace.
8911 # Since it is always safe to move left if there are no
8912 # dependents, we only need to do this if we may have
8913 # dependent nodes or need to move right.
8915 my $right_margin = 0;
8916 my $have_child = $indentation->get_have_child();
8918 my %saw_indentation;
8920 $saw_indentation{$indentation} = $indentation;
8922 if ( $have_child || $move_right > 0 ) {
8925 if ( $i == $ibeg ) {
8926 $max_length = total_line_length( $ibeg, $iend );
8929 # look ahead at the rest of the lines of this batch..
8930 foreach my $line_t ( $line + 1 .. $max_line ) {
8931 my $ibeg_t = $ri_first->[$line_t];
8932 my $iend_t = $ri_last->[$line_t];
8933 last if ( $closing_index <= $ibeg_t );
8935 # remember all different indentation objects
8936 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
8937 $saw_indentation{$indentation_t} = $indentation_t;
8940 # remember longest line in the group
8941 my $length_t = total_line_length( $ibeg_t, $iend_t );
8942 if ( $length_t > $max_length ) {
8943 $max_length = $length_t;
8946 $right_margin = maximum_line_length($ibeg) - $max_length;
8947 if ( $right_margin < 0 ) { $right_margin = 0 }
8950 my $first_line_comma_count =
8951 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
8952 my $comma_count = $indentation->get_comma_count();
8953 my $arrow_count = $indentation->get_arrow_count();
8955 # This is a simple approximate test for vertical alignment:
8956 # if we broke just after an opening paren, brace, bracket,
8957 # and there are 2 or more commas in the first line,
8958 # and there are no '=>'s,
8959 # then we are probably vertically aligned. We could set
8960 # an exact flag in sub scan_list, but this is good
8962 my $indentation_count = keys %saw_indentation;
8963 my $is_vertically_aligned =
8965 && $first_line_comma_count > 1
8966 && $indentation_count == 1
8967 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
8969 # Make the move if possible ..
8972 # we can always move left
8975 # but we should only move right if we are sure it will
8976 # not spoil vertical alignment
8977 || ( $comma_count == 0 )
8978 || ( $comma_count > 0 && !$is_vertically_aligned )
8982 ( $move_right <= $right_margin )
8986 foreach ( keys %saw_indentation ) {
8987 $saw_indentation{$_}
8988 ->permanently_decrease_available_spaces( -$move );
8992 # Otherwise, record what we want and the vertical aligner
8993 # will try to recover it.
8995 $indentation->set_recoverable_spaces($move_right);
9003 # flush is called to output any tokens in the pipeline, so that
9004 # an alternate source of lines can be written in the correct order
9008 destroy_one_line_block();
9009 $self->output_line_to_go();
9010 Perl::Tidy::VerticalAligner::flush();
9014 sub reset_block_text_accumulator {
9016 # save text after 'if' and 'elsif' to append after 'else'
9017 if ($accumulating_text_for_block) {
9019 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
9020 push @{$rleading_block_if_elsif_text}, $leading_block_text;
9023 $accumulating_text_for_block = "";
9024 $leading_block_text = "";
9025 $leading_block_text_level = 0;
9026 $leading_block_text_length_exceeded = 0;
9027 $leading_block_text_line_number = 0;
9028 $leading_block_text_line_length = 0;
9032 sub set_block_text_accumulator {
9034 $accumulating_text_for_block = $tokens_to_go[$i];
9035 if ( $accumulating_text_for_block !~ /^els/ ) {
9036 $rleading_block_if_elsif_text = [];
9038 $leading_block_text = "";
9039 $leading_block_text_level = $levels_to_go[$i];
9040 $leading_block_text_line_number = get_output_line_number();
9041 $leading_block_text_length_exceeded = 0;
9043 # this will contain the column number of the last character
9044 # of the closing side comment
9045 $leading_block_text_line_length =
9046 length($csc_last_label) +
9047 length($accumulating_text_for_block) +
9048 length( $rOpts->{'closing-side-comment-prefix'} ) +
9049 $leading_block_text_level * $rOpts_indent_columns + 3;
9053 sub accumulate_block_text {
9056 # accumulate leading text for -csc, ignoring any side comments
9057 if ( $accumulating_text_for_block
9058 && !$leading_block_text_length_exceeded
9059 && $types_to_go[$i] ne '#' )
9062 my $added_length = $token_lengths_to_go[$i];
9063 $added_length += 1 if $i == 0;
9064 my $new_line_length = $leading_block_text_line_length + $added_length;
9066 # we can add this text if we don't exceed some limits..
9069 # we must not have already exceeded the text length limit
9070 length($leading_block_text) <
9071 $rOpts_closing_side_comment_maximum_text
9074 # the new total line length must be below the line length limit
9075 # or the new length must be below the text length limit
9076 # (ie, we may allow one token to exceed the text length limit)
9079 maximum_line_length_for_level($leading_block_text_level)
9081 || length($leading_block_text) + $added_length <
9082 $rOpts_closing_side_comment_maximum_text
9085 # UNLESS: we are adding a closing paren before the brace we seek.
9086 # This is an attempt to avoid situations where the ... to be
9087 # added are longer than the omitted right paren, as in:
9089 # foreach my $item (@a_rather_long_variable_name_here) {
9091 # } ## end foreach my $item (@a_rather_long_variable_name_here...
9094 $tokens_to_go[$i] eq ')'
9097 $i + 1 <= $max_index_to_go
9098 && $block_type_to_go[ $i + 1 ] eq
9099 $accumulating_text_for_block
9101 || ( $i + 2 <= $max_index_to_go
9102 && $block_type_to_go[ $i + 2 ] eq
9103 $accumulating_text_for_block )
9109 # add an extra space at each newline
9110 if ( $i == 0 ) { $leading_block_text .= ' ' }
9112 # add the token text
9113 $leading_block_text .= $tokens_to_go[$i];
9114 $leading_block_text_line_length = $new_line_length;
9117 # show that text was truncated if necessary
9118 elsif ( $types_to_go[$i] ne 'b' ) {
9119 $leading_block_text_length_exceeded = 1;
9120 $leading_block_text .= '...';
9127 my %is_if_elsif_else_unless_while_until_for_foreach;
9131 # These block types may have text between the keyword and opening
9132 # curly. Note: 'else' does not, but must be included to allow trailing
9133 # if/elsif text to be appended.
9134 # patch for SWITCH/CASE: added 'case' and 'when'
9136 qw(if elsif else unless while until for foreach case when catch);
9137 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
9141 sub accumulate_csc_text {
9143 # called once per output buffer when -csc is used. Accumulates
9144 # the text placed after certain closing block braces.
9145 # Defines and returns the following for this buffer:
9147 my $block_leading_text = ""; # the leading text of the last '}'
9148 my $rblock_leading_if_elsif_text;
9149 my $i_block_leading_text =
9150 -1; # index of token owning block_leading_text
9151 my $block_line_count = 100; # how many lines the block spans
9152 my $terminal_type = 'b'; # type of last nonblank token
9153 my $i_terminal = 0; # index of last nonblank token
9154 my $terminal_block_type = "";
9156 # update most recent statement label
9157 $csc_last_label = "" unless ($csc_last_label);
9158 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
9159 my $block_label = $csc_last_label;
9161 # Loop over all tokens of this batch
9162 for my $i ( 0 .. $max_index_to_go ) {
9163 my $type = $types_to_go[$i];
9164 my $block_type = $block_type_to_go[$i];
9165 my $token = $tokens_to_go[$i];
9167 # remember last nonblank token type
9168 if ( $type ne '#' && $type ne 'b' ) {
9169 $terminal_type = $type;
9170 $terminal_block_type = $block_type;
9174 my $type_sequence = $type_sequence_to_go[$i];
9175 if ( $block_type && $type_sequence ) {
9177 if ( $token eq '}' ) {
9179 # restore any leading text saved when we entered this block
9180 if ( defined( $block_leading_text{$type_sequence} ) ) {
9181 ( $block_leading_text, $rblock_leading_if_elsif_text )
9182 = @{ $block_leading_text{$type_sequence} };
9183 $i_block_leading_text = $i;
9184 delete $block_leading_text{$type_sequence};
9185 $rleading_block_if_elsif_text =
9186 $rblock_leading_if_elsif_text;
9189 if ( defined( $csc_block_label{$type_sequence} ) ) {
9190 $block_label = $csc_block_label{$type_sequence};
9191 delete $csc_block_label{$type_sequence};
9194 # if we run into a '}' then we probably started accumulating
9195 # at something like a trailing 'if' clause..no harm done.
9196 if ( $accumulating_text_for_block
9197 && $levels_to_go[$i] <= $leading_block_text_level )
9199 my $lev = $levels_to_go[$i];
9200 reset_block_text_accumulator();
9203 if ( defined( $block_opening_line_number{$type_sequence} ) )
9205 my $output_line_number = get_output_line_number();
9207 $output_line_number -
9208 $block_opening_line_number{$type_sequence} + 1;
9209 delete $block_opening_line_number{$type_sequence};
9213 # Error: block opening line undefined for this line..
9214 # This shouldn't be possible, but it is not a
9215 # significant problem.
9219 elsif ( $token eq '{' ) {
9221 my $line_number = get_output_line_number();
9222 $block_opening_line_number{$type_sequence} = $line_number;
9224 # set a label for this block, except for
9225 # a bare block which already has the label
9226 # A label can only be used on the next {
9227 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
9228 $csc_block_label{$type_sequence} = $csc_last_label;
9229 $csc_last_label = "";
9231 if ( $accumulating_text_for_block
9232 && $levels_to_go[$i] == $leading_block_text_level )
9235 if ( $accumulating_text_for_block eq $block_type ) {
9237 # save any leading text before we enter this block
9238 $block_leading_text{$type_sequence} = [
9239 $leading_block_text,
9240 $rleading_block_if_elsif_text
9242 $block_opening_line_number{$type_sequence} =
9243 $leading_block_text_line_number;
9244 reset_block_text_accumulator();
9248 # shouldn't happen, but not a serious error.
9249 # We were accumulating -csc text for block type
9250 # $accumulating_text_for_block and unexpectedly
9251 # encountered a '{' for block type $block_type.
9258 && $csc_new_statement_ok
9259 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
9260 && $token =~ /$closing_side_comment_list_pattern/o )
9262 set_block_text_accumulator($i);
9266 # note: ignoring type 'q' because of tricks being played
9267 # with 'q' for hanging side comments
9268 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
9269 $csc_new_statement_ok =
9270 ( $block_type || $type eq 'J' || $type eq ';' );
9273 && $accumulating_text_for_block
9274 && $levels_to_go[$i] == $leading_block_text_level )
9276 reset_block_text_accumulator();
9279 accumulate_block_text($i);
9284 # Treat an 'else' block specially by adding preceding 'if' and
9285 # 'elsif' text. Otherwise, the 'end else' is not helpful,
9286 # especially for cuddled-else formatting.
9287 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
9288 $block_leading_text =
9289 make_else_csc_text( $i_terminal, $terminal_block_type,
9290 $block_leading_text, $rblock_leading_if_elsif_text );
9293 # if this line ends in a label then remember it for the next pass
9294 $csc_last_label = "";
9295 if ( $terminal_type eq 'J' ) {
9296 $csc_last_label = $tokens_to_go[$i_terminal];
9299 return ( $terminal_type, $i_terminal, $i_block_leading_text,
9300 $block_leading_text, $block_line_count, $block_label );
9304 sub make_else_csc_text {
9306 # create additional -csc text for an 'else' and optionally 'elsif',
9307 # depending on the value of switch
9308 # $rOpts_closing_side_comment_else_flag:
9310 # = 0 add 'if' text to trailing else
9311 # = 1 same as 0 plus:
9312 # add 'if' to 'elsif's if can fit in line length
9313 # add last 'elsif' to trailing else if can fit in one line
9314 # = 2 same as 1 but do not check if exceed line length
9316 # $rif_elsif_text = a reference to a list of all previous closing
9317 # side comments created for this if block
9319 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
9320 my $csc_text = $block_leading_text;
9322 if ( $block_type eq 'elsif'
9323 && $rOpts_closing_side_comment_else_flag == 0 )
9328 my $count = @{$rif_elsif_text};
9329 return $csc_text unless ($count);
9331 my $if_text = '[ if' . $rif_elsif_text->[0];
9333 # always show the leading 'if' text on 'else'
9334 if ( $block_type eq 'else' ) {
9335 $csc_text .= $if_text;
9339 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
9343 my $last_elsif_text = "";
9345 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
9346 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
9349 # tentatively append one more item
9350 my $saved_text = $csc_text;
9351 if ( $block_type eq 'else' ) {
9352 $csc_text .= $last_elsif_text;
9355 $csc_text .= ' ' . $if_text;
9358 # all done if no length checks requested
9359 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
9363 # undo it if line length exceeded
9366 length($block_type) +
9367 length( $rOpts->{'closing-side-comment-prefix'} ) +
9368 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
9369 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
9370 $csc_text = $saved_text;
9375 { # sub balance_csc_text
9390 sub balance_csc_text {
9392 # Append characters to balance a closing side comment so that editors
9393 # such as vim can correctly jump through code.
9395 # input = ## end foreach my $foo ( sort { $b ...
9396 # output = ## end foreach my $foo ( sort { $b ...})
9398 # NOTE: This routine does not currently filter out structures within
9399 # quoted text because the bounce algorithms in text editors do not
9400 # necessarily do this either (a version of vim was checked and
9403 # Some complex examples which will cause trouble for some editors:
9404 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
9405 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
9406 # if ( $1 eq '{' ) {
9407 # test file test1/braces.pl has many such examples.
9411 # loop to examine characters one-by-one, RIGHT to LEFT and
9412 # build a balancing ending, LEFT to RIGHT.
9413 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
9415 my $char = substr( $csc, $pos, 1 );
9417 # ignore everything except structural characters
9418 next unless ( $matching_char{$char} );
9420 # pop most recently appended character
9421 my $top = chop($csc);
9423 # push it back plus the mate to the newest character
9424 # unless they balance each other.
9425 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
9428 # return the balanced string
9433 sub add_closing_side_comment {
9437 # add closing side comments after closing block braces if -csc used
9438 my $cscw_block_comment;
9440 #---------------------------------------------------------------
9441 # Step 1: loop through all tokens of this line to accumulate
9442 # the text needed to create the closing side comments. Also see
9443 # how the line ends.
9444 #---------------------------------------------------------------
9446 my ( $terminal_type, $i_terminal, $i_block_leading_text,
9447 $block_leading_text, $block_line_count, $block_label )
9448 = accumulate_csc_text();
9450 #---------------------------------------------------------------
9451 # Step 2: make the closing side comment if this ends a block
9452 #---------------------------------------------------------------
9453 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
9455 # if this line might end in a block closure..
9457 $terminal_type eq '}'
9462 # the block is long enough
9463 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
9465 # or there is an existing comment to check
9466 || ( $have_side_comment
9467 && $rOpts->{'closing-side-comment-warnings'} )
9470 # .. and if this is one of the types of interest
9471 && $block_type_to_go[$i_terminal] =~
9472 /$closing_side_comment_list_pattern/o
9474 # .. but not an anonymous sub
9475 # These are not normally of interest, and their closing braces are
9476 # often followed by commas or semicolons anyway. This also avoids
9477 # possible erratic output due to line numbering inconsistencies
9478 # in the cases where their closing braces terminate a line.
9479 && $block_type_to_go[$i_terminal] ne 'sub'
9481 # ..and the corresponding opening brace must is not in this batch
9482 # (because we do not need to tag one-line blocks, although this
9483 # should also be caught with a positive -csci value)
9484 && $mate_index_to_go[$i_terminal] < 0
9489 # this is the last token (line doesn't have a side comment)
9492 # or the old side comment is a closing side comment
9493 || $tokens_to_go[$max_index_to_go] =~
9494 /$closing_side_comment_prefix_pattern/o
9499 # then make the closing side comment text
9500 if ($block_label) { $block_label .= " " }
9502 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
9504 # append any extra descriptive text collected above
9505 if ( $i_block_leading_text == $i_terminal ) {
9506 $token .= $block_leading_text;
9509 $token = balance_csc_text($token)
9510 if $rOpts->{'closing-side-comments-balanced'};
9512 $token =~ s/\s*$//; # trim any trailing whitespace
9514 # handle case of existing closing side comment
9515 if ($have_side_comment) {
9517 # warn if requested and tokens differ significantly
9518 if ( $rOpts->{'closing-side-comment-warnings'} ) {
9519 my $old_csc = $tokens_to_go[$max_index_to_go];
9520 my $new_csc = $token;
9521 $new_csc =~ s/\s+//g; # trim all whitespace
9522 $old_csc =~ s/\s+//g; # trim all whitespace
9523 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
9524 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
9525 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
9526 my $new_trailing_dots = $1;
9527 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
9529 # Patch to handle multiple closing side comments at
9530 # else and elsif's. These have become too complicated
9531 # to check, so if we see an indication of
9532 # '[ if' or '[ # elsif', then assume they were made
9534 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
9535 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
9537 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
9538 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
9541 # if old comment is contained in new comment,
9542 # only compare the common part.
9543 if ( length($new_csc) > length($old_csc) ) {
9544 $new_csc = substr( $new_csc, 0, length($old_csc) );
9547 # if the new comment is shorter and has been limited,
9548 # only compare the common part.
9549 if ( length($new_csc) < length($old_csc)
9550 && $new_trailing_dots )
9552 $old_csc = substr( $old_csc, 0, length($new_csc) );
9555 # any remaining difference?
9556 if ( $new_csc ne $old_csc ) {
9558 # just leave the old comment if we are below the threshold
9559 # for creating side comments
9560 if ( $block_line_count <
9561 $rOpts->{'closing-side-comment-interval'} )
9566 # otherwise we'll make a note of it
9570 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
9573 # save the old side comment in a new trailing block
9576 if ( $rOpts->{'timestamp'} ) {
9577 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
9580 $timestamp = "$year-$month-$day";
9582 $cscw_block_comment =
9583 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
9584 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
9589 # No differences.. we can safely delete old comment if we
9590 # are below the threshold
9591 if ( $block_line_count <
9592 $rOpts->{'closing-side-comment-interval'} )
9595 $self->unstore_token_to_go()
9596 if ( $types_to_go[$max_index_to_go] eq '#' );
9597 $self->unstore_token_to_go()
9598 if ( $types_to_go[$max_index_to_go] eq 'b' );
9603 # switch to the new csc (unless we deleted it!)
9604 $tokens_to_go[$max_index_to_go] = $token if $token;
9607 # handle case of NO existing closing side comment
9610 # Remove any existing blank and add another below.
9611 # This is a tricky point. A side comment needs to have the same level
9612 # as the preceding closing brace or else the line will not get the right
9613 # indentation. So even if we have a blank, we are going to replace it.
9614 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9615 unstore_token_to_go();
9618 # insert the new side comment into the output token stream
9620 my $block_type = '';
9621 my $type_sequence = '';
9622 my $container_environment =
9623 $container_environment_to_go[$max_index_to_go];
9624 my $level = $levels_to_go[$max_index_to_go];
9625 my $slevel = $nesting_depth_to_go[$max_index_to_go];
9626 my $no_internal_newlines = 0;
9628 my $ci_level = $ci_levels_to_go[$max_index_to_go];
9629 my $in_continued_quote = 0;
9631 # insert a blank token
9632 $self->insert_new_token_to_go( ' ', 'b', $slevel,
9633 $no_internal_newlines );
9635 # then the side comment
9636 $self->insert_new_token_to_go( $token, $type, $slevel,
9637 $no_internal_newlines );
9640 return $cscw_block_comment;
9643 sub previous_nonblank_token {
9647 return "" if ( $im < 0 );
9648 if ( $types_to_go[$im] eq 'b' ) { $im--; }
9649 return "" if ( $im < 0 );
9650 $name = $tokens_to_go[$im];
9652 # prepend any sub name to an isolated -> to avoid unwanted alignments
9653 # [test case is test8/penco.pl]
9654 if ( $name eq '->' ) {
9656 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
9657 $name = $tokens_to_go[$im] . $name;
9663 sub send_lines_to_vertical_aligner {
9665 my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_;
9667 my $valign_batch_number = $self->increment_valign_batch_count();
9669 my $cscw_block_comment;
9670 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ) {
9671 $cscw_block_comment = $self->add_closing_side_comment();
9673 # Add or update any closing side comment
9674 if ( $types_to_go[$max_index_to_go] eq '#' ) {
9675 $ri_last->[-1] = $max_index_to_go;
9679 my $rindentation_list = [0]; # ref to indentations for each line
9681 # define the array @matching_token_to_go for the output tokens
9682 # which will be non-blank for each special token (such as =>)
9683 # for which alignment is required.
9684 set_vertical_alignment_markers( $ri_first, $ri_last );
9686 # flush if necessary to avoid unwanted alignment
9688 if ( @{$ri_first} > 1 ) {
9690 # flush before a long if statement
9691 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
9696 Perl::Tidy::VerticalAligner::flush();
9699 undo_ci( $ri_first, $ri_last );
9701 set_logical_padding( $ri_first, $ri_last );
9703 # loop to prepare each line for shipment
9704 my $n_last_line = @{$ri_first} - 1;
9706 for my $n ( 0 .. $n_last_line ) {
9707 my $ibeg = $ri_first->[$n];
9708 my $iend = $ri_last->[$n];
9710 my ( $rtokens, $rfields, $rpatterns ) =
9711 make_alignment_patterns( $ibeg, $iend );
9713 # Set flag to show how much level changes between this line
9714 # and the next line, if we have it.
9716 if ( $n < $n_last_line ) {
9717 my $ibegp = $ri_first->[ $n + 1 ];
9718 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
9721 my ( $indentation, $lev, $level_end, $terminal_type,
9722 $is_semicolon_terminated, $is_outdented_line )
9723 = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
9724 $ri_first, $ri_last, $rindentation_list, $ljump );
9726 # we will allow outdenting of long lines..
9727 my $outdent_long_lines = (
9729 # which are long quotes, if allowed
9730 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
9732 # which are long block comments, if allowed
9734 $types_to_go[$ibeg] eq '#'
9735 && $rOpts->{'outdent-long-comments'}
9737 # but not if this is a static block comment
9738 && !$is_static_block_comment
9743 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
9745 my $rvertical_tightness_flags =
9746 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
9747 $ri_first, $ri_last );
9749 # flush an outdented line to avoid any unwanted vertical alignment
9750 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
9752 # Set a flag at the final ':' of a ternary chain to request
9753 # vertical alignment of the final term. Here is a
9754 # slightly complex example:
9756 # $self->{_text} = (
9758 # : $type eq 'item' ? "the $section entry"
9759 # : "the section on $section"
9763 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
9764 # : ' elsewhere in this document'
9767 my $is_terminal_ternary = 0;
9768 if ( $tokens_to_go[$ibeg] eq ':'
9769 || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' )
9771 my $last_leading_type = ":";
9773 my $iprev = $ri_first->[ $n - 1 ];
9774 $last_leading_type = $types_to_go[$iprev];
9776 if ( $terminal_type ne ';'
9777 && $n_last_line > $n
9778 && $level_end == $lev )
9780 my $inext = $ri_first->[ $n + 1 ];
9781 $level_end = $levels_to_go[$inext];
9782 $terminal_type = $types_to_go[$inext];
9785 $is_terminal_ternary = $last_leading_type eq ':'
9786 && ( ( $terminal_type eq ';' && $level_end <= $lev )
9787 || ( $terminal_type ne ':' && $level_end < $lev ) )
9789 # the terminal term must not contain any ternary terms, as in
9791 # $Is_MSWin32 ? ".\\echo$$"
9792 # : $Is_MacOS ? ":echo$$"
9793 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
9795 && !grep { /^[\?\:]$/ } @types_to_go[ $ibeg + 1 .. $iend ];
9798 # send this new line down the pipe
9799 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
9801 my $rvalign_hash = {};
9802 $rvalign_hash->{level} = $lev;
9803 $rvalign_hash->{level_end} = $level_end;
9804 $rvalign_hash->{indentation} = $indentation;
9805 $rvalign_hash->{is_forced_break} =
9806 $forced_breakpoint_to_go[$iend] || $in_comma_list;
9807 $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
9808 $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
9809 $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
9810 $rvalign_hash->{do_not_pad} = $do_not_pad;
9811 $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
9812 $rvalign_hash->{level_jump} = $level_jump;
9814 $rvalign_hash->{valign_batch_number} = $valign_batch_number;
9816 Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields,
9817 $rtokens, $rpatterns );
9820 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
9822 # flush an outdented line to avoid any unwanted vertical alignment
9823 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
9827 # Set flag indicating if this line ends in an opening
9828 # token and is very short, so that a blank line is not
9829 # needed if the subsequent line is a comment.
9830 # Examples of what we are looking for:
9836 $last_output_short_opening_token
9838 # line ends in opening token
9839 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
9843 # line has either single opening token
9846 # or is a single token followed by opening token.
9847 # Note that sub identifiers have blanks like 'sub doit'
9848 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
9851 # and limit total to 10 character widths
9852 && token_sequence_length( $ibeg, $iend ) <= 10;
9854 } # end of loop to output each line
9856 # remember indentation of lines containing opening containers for
9857 # later use by sub set_adjusted_indentation
9858 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
9860 # output any new -cscw block comment
9861 if ($cscw_block_comment) {
9862 Perl::Tidy::VerticalAligner::flush();
9863 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9868 { # begin make_alignment_patterns
9875 # map related block names into a common name to
9888 # map certain keywords to the same 'if' class to align
9889 # long if/elsif sequences. [elsif.pl]
9895 'default' => 'given',
9898 # treat an 'undef' similar to numbers and quotes
9903 sub make_alignment_patterns {
9905 # Here we do some important preliminary work for the
9906 # vertical aligner. We create three arrays for one
9907 # output line. These arrays contain strings that can
9908 # be tested by the vertical aligner to see if
9909 # consecutive lines can be aligned vertically.
9911 # The three arrays are indexed on the vertical
9912 # alignment fields and are:
9913 # @tokens - a list of any vertical alignment tokens for this line.
9914 # These are tokens, such as '=' '&&' '#' etc which
9915 # we want to might align vertically. These are
9916 # decorated with various information such as
9917 # nesting depth to prevent unwanted vertical
9918 # alignment matches.
9919 # @fields - the actual text of the line between the vertical alignment
9921 # @patterns - a modified list of token types, one for each alignment
9922 # field. These should normally each match before alignment is
9923 # allowed, even when the alignment tokens match.
9924 my ( $ibeg, $iend ) = @_;
9928 my $i_start = $ibeg;
9931 my @container_name = ("");
9932 my @multiple_comma_arrows = (undef);
9934 my $j = 0; # field index
9937 for my $i ( $ibeg .. $iend ) {
9939 # Keep track of containers balanced on this line only.
9940 # These are used below to prevent unwanted cross-line alignments.
9941 # Unbalanced containers already avoid aligning across
9942 # container boundaries.
9943 my $tok = $tokens_to_go[$i];
9944 if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) {
9946 # if container is balanced on this line...
9947 my $i_mate = $mate_index_to_go[$i];
9948 if ( $i_mate > $i && $i_mate <= $iend ) {
9950 my $seqno = $type_sequence_to_go[$i];
9951 my $count = comma_arrow_count($seqno);
9952 $multiple_comma_arrows[$depth] = $count && $count > 1;
9954 # Append the previous token name to make the container name
9955 # more unique. This name will also be given to any commas
9956 # within this container, and it helps avoid undesirable
9957 # alignments of different types of containers.
9959 # Containers beginning with { and [ are given those names
9960 # for uniqueness. That way commas in different containers
9961 # will not match. Here is an example of what this prevents:
9963 # b => { b1 => 4, b2 => 5 },
9964 # Here is another example of what we avoid by labeling the
9966 # is_d( [ $a, $a ], [ $b, $c ] );
9967 # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
9968 # is_d( [ \$a, \$a ], [ \$b, \$c ] );
9971 if ( $tok eq '(' ) {
9972 $name = previous_nonblank_token($i);
9975 $container_name[$depth] = "+" . $name;
9977 # Make the container name even more unique if necessary.
9978 # If we are not vertically aligning this opening paren,
9979 # append a character count to avoid bad alignment because
9980 # it usually looks bad to align commas within containers
9981 # for which the opening parens do not align. Here
9982 # is an example very BAD alignment of commas (because
9983 # the atan2 functions are not all aligned):
9985 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
9986 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
9987 # $X * atan2( $X, 1 ) -
9988 # $Y * atan2( $Y, 1 );
9990 # On the other hand, it is usually okay to align commas if
9991 # opening parens align, such as:
9992 # glVertex3d( $cx + $s * $xs, $cy, $z );
9993 # glVertex3d( $cx, $cy + $s * $ys, $z );
9994 # glVertex3d( $cx - $s * $xs, $cy, $z );
9995 # glVertex3d( $cx, $cy - $s * $ys, $z );
9997 # To distinguish between these situations, we will
9998 # append the length of the line from the previous matching
9999 # token, or beginning of line, to the function name. This
10000 # will allow the vertical aligner to reject undesirable
10003 # if we are not aligning on this paren...
10004 if ( $matching_token_to_go[$i] eq '' ) {
10006 # Sum length from previous alignment, or start of line.
10008 ( $i_start == $ibeg )
10009 ? total_line_length( $i_start, $i - 1 )
10010 : token_sequence_length( $i_start, $i - 1 );
10012 # tack length onto the container name to make unique
10013 $container_name[$depth] .= "-" . $len;
10017 elsif ( $tokens_to_go[$i] =~ /^[\)\}\]]/ ) {
10018 $depth-- if $depth > 0;
10021 # if we find a new synchronization token, we are done with
10023 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
10025 my $tok = my $raw_tok = $matching_token_to_go[$i];
10027 # map similar items
10028 if ( $tok eq '!~' ) { $tok = '=~' }
10030 # make separators in different nesting depths unique
10031 # by appending the nesting depth digit.
10032 if ( $raw_tok ne '#' ) {
10033 $tok .= "$nesting_depth_to_go[$i]";
10036 # also decorate commas with any container name to avoid
10037 # unwanted cross-line alignments.
10038 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
10039 if ( $container_name[$depth] ) {
10040 $tok .= $container_name[$depth];
10044 # Patch to avoid aligning leading and trailing if, unless.
10045 # Mark trailing if, unless statements with container names.
10046 # This makes them different from leading if, unless which
10047 # are not so marked at present. If we ever need to name
10048 # them too, we could use ci to distinguish them.
10049 # Example problem to avoid:
10050 # return ( 2, "DBERROR" )
10051 # if ( $retval == 2 );
10052 # if ( scalar @_ ) {
10053 # my ( $a, $b, $c, $d, $e, $f ) = @_;
10055 if ( $raw_tok eq '(' ) {
10056 my $ci = $ci_levels_to_go[$ibeg];
10057 if ( $container_name[$depth] =~ /^\+(if|unless)/
10060 $tok .= $container_name[$depth];
10064 # Decorate block braces with block types to avoid
10065 # unwanted alignments such as the following:
10066 # foreach ( @{$routput_array} ) { $fh->print($_) }
10067 # eval { $fh->close() };
10068 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
10069 my $block_type = $block_type_to_go[$i];
10071 # map certain related block types to allow
10072 # else blocks to align
10073 $block_type = $block_type_map{$block_type}
10074 if ( defined( $block_type_map{$block_type} ) );
10076 # remove sub names to allow one-line sub braces to align
10077 # regardless of name
10078 #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
10079 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
10081 # allow all control-type blocks to align
10082 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
10084 $tok .= $block_type;
10087 # concatenate the text of the consecutive tokens to form
10090 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10092 # store the alignment token for this field
10093 push( @tokens, $tok );
10095 # get ready for the next batch
10098 $patterns[$j] = "";
10101 # continue accumulating tokens
10102 # handle non-keywords..
10103 if ( $types_to_go[$i] ne 'k' ) {
10104 my $type = $types_to_go[$i];
10106 # Mark most things before arrows as a quote to
10107 # get them to line up. Testfile: mixed.pl.
10108 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10109 my $next_type = $types_to_go[ $i + 1 ];
10110 my $i_next_nonblank =
10111 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10113 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10116 # Patch to ignore leading minus before words,
10117 # by changing pattern 'mQ' into just 'Q',
10118 # so that we can align things like this:
10119 # Button => "Print letter \"~$_\"",
10120 # -command => [ sub { print "$_[0]\n" }, $_ ],
10121 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
10125 # Convert a bareword within braces into a quote for matching. This will
10126 # allow alignment of expressions like this:
10127 # local ( $SIG{'INT'} ) = IGNORE;
10128 # local ( $SIG{ALRM} ) = 'POSTMAN';
10132 && $types_to_go[ $i - 1 ] eq 'L'
10133 && $types_to_go[ $i + 1 ] eq 'R' )
10138 # patch to make numbers and quotes align
10139 if ( $type eq 'n' ) { $type = 'Q' }
10141 # patch to ignore any ! in patterns
10142 if ( $type eq '!' ) { $type = '' }
10144 $patterns[$j] .= $type;
10147 # for keywords we have to use the actual text
10150 my $tok = $tokens_to_go[$i];
10152 # but map certain keywords to a common string to allow
10154 $tok = $keyword_map{$tok}
10155 if ( defined( $keyword_map{$tok} ) );
10156 $patterns[$j] .= $tok;
10160 # done with this line .. join text of tokens to make the last field
10161 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10162 return ( \@tokens, \@fields, \@patterns );
10165 } # end make_alignment_patterns
10167 { # begin unmatched_indexes
10169 # closure to keep track of unbalanced containers.
10170 # arrays shared by the routines in this block:
10171 my @unmatched_opening_indexes_in_this_batch;
10172 my @unmatched_closing_indexes_in_this_batch;
10173 my %comma_arrow_count;
10175 sub is_unbalanced_batch {
10176 return @unmatched_opening_indexes_in_this_batch +
10177 @unmatched_closing_indexes_in_this_batch;
10180 sub comma_arrow_count {
10182 return $comma_arrow_count{$seqno};
10185 sub match_opening_and_closing_tokens {
10187 # Match up indexes of opening and closing braces, etc, in this batch.
10188 # This has to be done after all tokens are stored because unstoring
10189 # of tokens would otherwise cause trouble.
10191 @unmatched_opening_indexes_in_this_batch = ();
10192 @unmatched_closing_indexes_in_this_batch = ();
10193 %comma_arrow_count = ();
10194 my $comma_arrow_count_contained = 0;
10196 foreach my $i ( 0 .. $max_index_to_go ) {
10197 if ( $type_sequence_to_go[$i] ) {
10198 my $token = $tokens_to_go[$i];
10199 if ( $token =~ /^[\(\[\{\?]$/ ) {
10200 push @unmatched_opening_indexes_in_this_batch, $i;
10202 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
10204 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
10205 if ( defined($i_mate) && $i_mate >= 0 ) {
10206 if ( $type_sequence_to_go[$i_mate] ==
10207 $type_sequence_to_go[$i] )
10209 $mate_index_to_go[$i] = $i_mate;
10210 $mate_index_to_go[$i_mate] = $i;
10211 my $seqno = $type_sequence_to_go[$i];
10212 if ( $comma_arrow_count{$seqno} ) {
10213 $comma_arrow_count_contained +=
10214 $comma_arrow_count{$seqno};
10218 push @unmatched_opening_indexes_in_this_batch,
10220 push @unmatched_closing_indexes_in_this_batch, $i;
10224 push @unmatched_closing_indexes_in_this_batch, $i;
10228 elsif ( $tokens_to_go[$i] eq '=>' ) {
10229 if (@unmatched_opening_indexes_in_this_batch) {
10230 my $j = $unmatched_opening_indexes_in_this_batch[-1];
10231 my $seqno = $type_sequence_to_go[$j];
10232 $comma_arrow_count{$seqno}++;
10236 return $comma_arrow_count_contained;
10239 sub save_opening_indentation {
10241 # This should be called after each batch of tokens is output. It
10242 # saves indentations of lines of all unmatched opening tokens.
10243 # These will be used by sub get_opening_indentation.
10245 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
10247 # we no longer need indentations of any saved indentations which
10248 # are unmatched closing tokens in this batch, because we will
10249 # never encounter them again. So we can delete them to keep
10250 # the hash size down.
10251 foreach (@unmatched_closing_indexes_in_this_batch) {
10252 my $seqno = $type_sequence_to_go[$_];
10253 delete $saved_opening_indentation{$seqno};
10256 # we need to save indentations of any unmatched opening tokens
10257 # in this batch because we may need them in a subsequent batch.
10258 foreach (@unmatched_opening_indexes_in_this_batch) {
10259 my $seqno = $type_sequence_to_go[$_];
10260 $saved_opening_indentation{$seqno} = [
10261 lookup_opening_indentation(
10262 $_, $ri_first, $ri_last, $rindentation_list
10268 } # end unmatched_indexes
10270 sub get_opening_indentation {
10272 # get the indentation of the line which output the opening token
10273 # corresponding to a given closing token in the current output batch.
10276 # $i_closing - index in this line of a closing token ')' '}' or ']'
10278 # $ri_first - reference to list of the first index $i for each output
10279 # line in this batch
10280 # $ri_last - reference to list of the last index $i for each output line
10282 # $rindentation_list - reference to a list containing the indentation
10283 # used for each line.
10286 # -the indentation of the line which contained the opening token
10287 # which matches the token at index $i_opening
10288 # -and its offset (number of columns) from the start of the line
10290 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
10292 # first, see if the opening token is in the current batch
10293 my $i_opening = $mate_index_to_go[$i_closing];
10294 my ( $indent, $offset, $is_leading, $exists );
10296 if ( $i_opening >= 0 ) {
10298 # it is..look up the indentation
10299 ( $indent, $offset, $is_leading ) =
10300 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
10301 $rindentation_list );
10304 # if not, it should have been stored in the hash by a previous batch
10306 my $seqno = $type_sequence_to_go[$i_closing];
10308 if ( $saved_opening_indentation{$seqno} ) {
10309 ( $indent, $offset, $is_leading ) =
10310 @{ $saved_opening_indentation{$seqno} };
10313 # some kind of serious error
10314 # (example is badfile.t)
10323 # if no sequence number it must be an unbalanced container
10331 return ( $indent, $offset, $is_leading, $exists );
10334 sub lookup_opening_indentation {
10336 # get the indentation of the line in the current output batch
10337 # which output a selected opening token
10340 # $i_opening - index of an opening token in the current output batch
10341 # whose line indentation we need
10342 # $ri_first - reference to list of the first index $i for each output
10343 # line in this batch
10344 # $ri_last - reference to list of the last index $i for each output line
10346 # $rindentation_list - reference to a list containing the indentation
10347 # used for each line. (NOTE: the first slot in
10348 # this list is the last returned line number, and this is
10349 # followed by the list of indentations).
10352 # -the indentation of the line which contained token $i_opening
10353 # -and its offset (number of columns) from the start of the line
10355 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
10357 my $nline = $rindentation_list->[0]; # line number of previous lookup
10359 # reset line location if necessary
10360 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
10362 # find the correct line
10363 unless ( $i_opening > $ri_last->[-1] ) {
10364 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
10367 # error - token index is out of bounds - shouldn't happen
10370 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
10372 report_definite_bug();
10373 $nline = $#{$ri_last};
10376 $rindentation_list->[0] =
10377 $nline; # save line number to start looking next call
10378 my $ibeg = $ri_start->[$nline];
10379 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
10380 my $is_leading = ( $ibeg == $i_opening );
10381 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
10385 my %is_if_elsif_else_unless_while_until_for_foreach;
10389 # These block types may have text between the keyword and opening
10390 # curly. Note: 'else' does not, but must be included to allow trailing
10391 # if/elsif text to be appended.
10392 # patch for SWITCH/CASE: added 'case' and 'when'
10393 my @q = qw(if elsif else unless while until for foreach case when);
10394 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
10398 sub set_adjusted_indentation {
10400 # This routine has the final say regarding the actual indentation of
10401 # a line. It starts with the basic indentation which has been
10402 # defined for the leading token, and then takes into account any
10403 # options that the user has set regarding special indenting and
10407 $self, $ibeg, $iend,
10408 $rfields, $rpatterns, $ri_first,
10409 $ri_last, $rindentation_list, $level_jump
10412 my $rLL = $self->{rLL};
10414 # we need to know the last token of this line
10415 my ( $terminal_type, $i_terminal ) =
10416 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
10418 my $is_outdented_line = 0;
10420 my $is_semicolon_terminated = $terminal_type eq ';'
10421 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
10423 # NOTE: A future improvement would be to make it semicolon terminated
10424 # even if it does not have a semicolon but is followed by a closing
10425 # block brace. This would undo ci even for something like the
10426 # following, in which the final paren does not have a semicolon because
10427 # it is a possible weld location:
10429 # if ($BOLD_MATH) {
10431 # $labels, $comment,
10432 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
10437 # MOJO: Set a flag if this lines begins with ')->'
10438 my $leading_paren_arrow = (
10439 $types_to_go[$ibeg] eq '}'
10440 && $tokens_to_go[$ibeg] eq ')'
10442 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
10443 || ( $ibeg < $i_terminal - 1
10444 && $types_to_go[ $ibeg + 1 ] eq 'b'
10445 && $types_to_go[ $ibeg + 2 ] eq '->' )
10449 ##########################################################
10450 # Section 1: set a flag and a default indentation
10452 # Most lines are indented according to the initial token.
10453 # But it is common to outdent to the level just after the
10454 # terminal token in certain cases...
10455 # adjust_indentation flag:
10456 # 0 - do not adjust
10458 # 2 - vertically align with opening token
10460 ##########################################################
10461 my $adjust_indentation = 0;
10462 my $default_adjust_indentation = $adjust_indentation;
10465 $opening_indentation, $opening_offset,
10466 $is_leading, $opening_exists
10469 my $type_beg = $types_to_go[$ibeg];
10470 my $token_beg = $tokens_to_go[$ibeg];
10471 my $K_beg = $K_to_go[$ibeg];
10472 my $ibeg_weld_fix = $ibeg;
10474 # QW PATCH 2 (Testing)
10475 # At an isolated closing token of a qw quote which is welded to
10476 # a following closing token, we will locally change its type to
10477 # be the same as its token. This will allow formatting to be the
10478 # same as for an ordinary closing token.
10480 # For -lp formatting se use $ibeg_weld_fix to get around the problem
10481 # that with -lp type formatting the opening and closing tokens to not
10482 # have sequence numbers.
10483 if ( $type_beg eq 'q' && $token_beg =~ /^[\)\}\]\>]/ ) {
10484 my $K_next_nonblank = $self->K_next_code($K_beg);
10485 if ( defined($K_next_nonblank) ) {
10486 my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_];
10487 my $token = $rLL->[$K_next_nonblank]->[_TOKEN_];
10488 my $welded = weld_len_left( $type_sequence, $token );
10490 $ibeg_weld_fix = $ibeg + ( $K_next_nonblank - $K_beg );
10491 $type_beg = ')'; ##$token_beg;
10496 # if we are at a closing token of some type..
10497 if ( $type_beg =~ /^[\)\}\]R]$/ ) {
10499 # get the indentation of the line containing the corresponding
10502 $opening_indentation, $opening_offset,
10503 $is_leading, $opening_exists
10505 = get_opening_indentation( $ibeg_weld_fix, $ri_first, $ri_last,
10506 $rindentation_list );
10508 # First set the default behavior:
10511 # default behavior is to outdent closing lines
10512 # of the form: "); }; ]; )->xxx;"
10513 $is_semicolon_terminated
10515 # and 'cuddled parens' of the form: ")->pack("
10516 # Bug fix for RT #123749]: the types here were
10517 # incorrectly '(' and ')'. Corrected to be '{' and '}'
10519 $terminal_type eq '{'
10520 && $type_beg eq '}'
10521 && ( $nesting_depth_to_go[$iend] + 1 ==
10522 $nesting_depth_to_go[$ibeg] )
10525 # remove continuation indentation for any line like
10527 # or without ending '{' and unbalanced, such as
10528 # such as '}->{$operator}'
10532 && ( $types_to_go[$iend] eq '{'
10533 || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
10536 # and when the next line is at a lower indentation level
10537 # PATCH: and only if the style allows undoing continuation
10538 # for all closing token types. We should really wait until
10539 # the indentation of the next line is known and then make
10540 # a decision, but that would require another pass.
10541 || ( $level_jump < 0 && !$some_closing_token_indentation )
10543 # Patch for -wn=2, multiple welded closing tokens
10544 || ( $i_terminal > $ibeg
10545 && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
10549 $adjust_indentation = 1;
10552 # outdent something like '),'
10554 $terminal_type eq ','
10556 # Removed this constraint for -wn
10557 # OLD: allow just one character before the comma
10558 # && $i_terminal == $ibeg + 1
10560 # require LIST environment; otherwise, we may outdent too much -
10561 # this can happen in calls without parentheses (overload.t);
10562 && $container_environment_to_go[$i_terminal] eq 'LIST'
10565 $adjust_indentation = 1;
10568 # undo continuation indentation of a terminal closing token if
10569 # it is the last token before a level decrease. This will allow
10570 # a closing token to line up with its opening counterpart, and
10571 # avoids a indentation jump larger than 1 level.
10572 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
10573 && $i_terminal == $ibeg
10574 && defined($K_beg) )
10576 my $K_next_nonblank = $self->K_next_code($K_beg);
10577 if ( defined($K_next_nonblank) ) {
10578 my $lev = $rLL->[$K_beg]->[_LEVEL_];
10579 my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
10580 $adjust_indentation = 1 if ( $level_next < $lev );
10583 # Patch for RT #96101, in which closing brace of anonymous subs
10584 # was not outdented. We should look ahead and see if there is
10585 # a level decrease at the next token (i.e., a closing token),
10586 # but right now we do not have that information. For now
10587 # we see if we are in a list, and this works well.
10588 # See test files 'sub*.t' for good test cases.
10589 if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
10590 && $container_environment_to_go[$i_terminal] eq 'LIST'
10591 && !$rOpts->{'indent-closing-brace'} )
10594 $opening_indentation, $opening_offset,
10595 $is_leading, $opening_exists
10597 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
10598 $rindentation_list );
10599 my $indentation = $leading_spaces_to_go[$ibeg];
10600 if ( defined($opening_indentation)
10601 && get_spaces($indentation) >
10602 get_spaces($opening_indentation) )
10604 $adjust_indentation = 1;
10609 # YVES patch 1 of 2:
10610 # Undo ci of line with leading closing eval brace,
10611 # but not beyond the indention of the line with
10612 # the opening brace.
10613 if ( $block_type_to_go[$ibeg] eq 'eval'
10614 && !$rOpts->{'line-up-parentheses'}
10615 && !$rOpts->{'indent-closing-brace'} )
10618 $opening_indentation, $opening_offset,
10619 $is_leading, $opening_exists
10621 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
10622 $rindentation_list );
10623 my $indentation = $leading_spaces_to_go[$ibeg];
10624 if ( defined($opening_indentation)
10625 && get_spaces($indentation) >
10626 get_spaces($opening_indentation) )
10628 $adjust_indentation = 1;
10632 $default_adjust_indentation = $adjust_indentation;
10634 # Now modify default behavior according to user request:
10635 # handle option to indent non-blocks of the form ); }; ];
10636 # But don't do special indentation to something like ')->pack('
10637 if ( !$block_type_to_go[$ibeg] ) {
10638 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
10640 if ( $i_terminal <= $ibeg + 1
10641 || $is_semicolon_terminated )
10643 $adjust_indentation = 2;
10646 $adjust_indentation = 0;
10649 elsif ( $cti == 2 ) {
10650 if ($is_semicolon_terminated) {
10651 $adjust_indentation = 3;
10654 $adjust_indentation = 0;
10657 elsif ( $cti == 3 ) {
10658 $adjust_indentation = 3;
10662 # handle option to indent blocks
10665 $rOpts->{'indent-closing-brace'}
10667 $i_terminal == $ibeg # isolated terminal '}'
10668 || $is_semicolon_terminated
10672 $adjust_indentation = 3;
10677 # if at ');', '};', '>;', and '];' of a terminal qw quote
10678 elsif ($rpatterns->[0] =~ /^qb*;$/
10679 && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
10681 if ( $closing_token_indentation{$1} == 0 ) {
10682 $adjust_indentation = 1;
10685 $adjust_indentation = 3;
10689 # if line begins with a ':', align it with any
10690 # previous line leading with corresponding ?
10691 elsif ( $types_to_go[$ibeg] eq ':' ) {
10693 $opening_indentation, $opening_offset,
10694 $is_leading, $opening_exists
10696 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
10697 $rindentation_list );
10698 if ($is_leading) { $adjust_indentation = 2; }
10701 ##########################################################
10702 # Section 2: set indentation according to flag set above
10704 # Select the indentation object to define leading
10705 # whitespace. If we are outdenting something like '} } );'
10706 # then we want to use one level below the last token
10707 # ($i_terminal) in order to get it to fully outdent through
10709 ##########################################################
10712 my $level_end = $levels_to_go[$iend];
10714 if ( $adjust_indentation == 0 ) {
10715 $indentation = $leading_spaces_to_go[$ibeg];
10716 $lev = $levels_to_go[$ibeg];
10718 elsif ( $adjust_indentation == 1 ) {
10720 # Change the indentation to be that of a different token on the line
10721 # Previously, the indentation of the terminal token was used:
10723 # $indentation = $reduced_spaces_to_go[$i_terminal];
10724 # $lev = $levels_to_go[$i_terminal];
10726 # Generalization for MOJO:
10727 # Use the lowest level indentation of the tokens on the line.
10728 # For example, here we can use the indentation of the ending ';':
10729 # } until ($selection > 0 and $selection < 10); # ok to use ';'
10730 # But this will not outdent if we use the terminal indentation:
10731 # )->then( sub { # use indentation of the ->, not the {
10732 # Warning: reduced_spaces_to_go[] may be a reference, do not
10733 # do numerical checks with it
10736 $indentation = $reduced_spaces_to_go[$i_ind];
10737 $lev = $levels_to_go[$i_ind];
10738 while ( $i_ind < $i_terminal ) {
10740 if ( $levels_to_go[$i_ind] < $lev ) {
10741 $indentation = $reduced_spaces_to_go[$i_ind];
10742 $lev = $levels_to_go[$i_ind];
10747 # handle indented closing token which aligns with opening token
10748 elsif ( $adjust_indentation == 2 ) {
10750 # handle option to align closing token with opening token
10751 $lev = $levels_to_go[$ibeg];
10753 # calculate spaces needed to align with opening token
10755 get_spaces($opening_indentation) + $opening_offset;
10757 # Indent less than the previous line.
10759 # Problem: For -lp we don't exactly know what it was if there
10760 # were recoverable spaces sent to the aligner. A good solution
10761 # would be to force a flush of the vertical alignment buffer, so
10762 # that we would know. For now, this rule is used for -lp:
10764 # When the last line did not start with a closing token we will
10765 # be optimistic that the aligner will recover everything wanted.
10767 # This rule will prevent us from breaking a hierarchy of closing
10768 # tokens, and in a worst case will leave a closing paren too far
10769 # indented, but this is better than frequently leaving it not
10771 my $last_spaces = get_spaces($last_indentation_written);
10772 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
10774 get_recoverable_spaces($last_indentation_written);
10777 # reset the indentation to the new space count if it works
10778 # only options are all or none: nothing in-between looks good
10779 $lev = $levels_to_go[$ibeg];
10780 if ( $space_count < $last_spaces ) {
10781 if ($rOpts_line_up_parentheses) {
10782 my $lev = $levels_to_go[$ibeg];
10784 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10787 $indentation = $space_count;
10791 # revert to default if it doesn't work
10793 $space_count = leading_spaces_to_go($ibeg);
10794 if ( $default_adjust_indentation == 0 ) {
10795 $indentation = $leading_spaces_to_go[$ibeg];
10797 elsif ( $default_adjust_indentation == 1 ) {
10798 $indentation = $reduced_spaces_to_go[$i_terminal];
10799 $lev = $levels_to_go[$i_terminal];
10804 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
10807 # handle -icb (indented closing code block braces)
10808 # Updated method for indented block braces: indent one full level if
10809 # there is no continuation indentation. This will occur for major
10810 # structures such as sub, if, else, but not for things like map
10813 # Note: only code blocks without continuation indentation are
10814 # handled here (if, else, unless, ..). In the following snippet,
10815 # the terminal brace of the sort block will have continuation
10816 # indentation as shown so it will not be handled by the coding
10817 # here. We would have to undo the continuation indentation to do
10818 # this, but it probably looks ok as is. This is a possible future
10819 # update for semicolon terminated lines.
10821 # if ($sortby eq 'date' or $sortby eq 'size') {
10823 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
10828 if ( $block_type_to_go[$ibeg]
10829 && $ci_levels_to_go[$i_terminal] == 0 )
10831 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
10832 $indentation = $spaces + $rOpts_indent_columns;
10834 # NOTE: for -lp we could create a new indentation object, but
10835 # there is probably no need to do it
10838 # handle -icp and any -icb block braces which fall through above
10839 # test such as the 'sort' block mentioned above.
10842 # There are currently two ways to handle -icp...
10843 # One way is to use the indentation of the previous line:
10844 # $indentation = $last_indentation_written;
10846 # The other way is to use the indentation that the previous line
10847 # would have had if it hadn't been adjusted:
10848 $indentation = $last_unadjusted_indentation;
10850 # Current method: use the minimum of the two. This avoids
10851 # inconsistent indentation.
10852 if ( get_spaces($last_indentation_written) <
10853 get_spaces($indentation) )
10855 $indentation = $last_indentation_written;
10859 # use previous indentation but use own level
10860 # to cause list to be flushed properly
10861 $lev = $levels_to_go[$ibeg];
10864 # remember indentation except for multi-line quotes, which get
10866 unless ( $ibeg == 0 && $starting_in_quote ) {
10867 $last_indentation_written = $indentation;
10868 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
10869 $last_leading_token = $tokens_to_go[$ibeg];
10872 # be sure lines with leading closing tokens are not outdented more
10873 # than the line which contained the corresponding opening token.
10875 #############################################################
10876 # updated per bug report in alex_bug.pl: we must not
10877 # mess with the indentation of closing logical braces so
10878 # we must treat something like '} else {' as if it were
10879 # an isolated brace my $is_isolated_block_brace = (
10880 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
10881 #############################################################
10882 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
10883 && ( $iend == $ibeg
10884 || $is_if_elsif_else_unless_while_until_for_foreach{
10885 $block_type_to_go[$ibeg]
10888 # only do this for a ':; which is aligned with its leading '?'
10889 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
10892 defined($opening_indentation)
10893 && !$leading_paren_arrow # MOJO
10894 && !$is_isolated_block_brace
10895 && !$is_unaligned_colon
10898 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
10899 $indentation = $opening_indentation;
10903 # remember the indentation of each line of this batch
10904 push @{$rindentation_list}, $indentation;
10906 # outdent lines with certain leading tokens...
10909 # must be first word of this batch
10915 # certain leading keywords if requested
10917 $rOpts->{'outdent-keywords'}
10918 && $types_to_go[$ibeg] eq 'k'
10919 && $outdent_keyword{ $tokens_to_go[$ibeg] }
10922 # or labels if requested
10923 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
10925 # or static block comments if requested
10926 || ( $types_to_go[$ibeg] eq '#'
10927 && $rOpts->{'outdent-static-block-comments'}
10928 && $is_static_block_comment )
10933 my $space_count = leading_spaces_to_go($ibeg);
10934 if ( $space_count > 0 ) {
10935 $space_count -= $rOpts_continuation_indentation;
10936 $is_outdented_line = 1;
10937 if ( $space_count < 0 ) { $space_count = 0 }
10939 # do not promote a spaced static block comment to non-spaced;
10940 # this is not normally necessary but could be for some
10941 # unusual user inputs (such as -ci = -i)
10942 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
10946 if ($rOpts_line_up_parentheses) {
10948 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10951 $indentation = $space_count;
10956 return ( $indentation, $lev, $level_end, $terminal_type,
10957 $is_semicolon_terminated, $is_outdented_line );
10961 sub set_vertical_tightness_flags {
10963 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
10965 # Define vertical tightness controls for the nth line of a batch.
10966 # We create an array of parameters which tell the vertical aligner
10967 # if we should combine this line with the next line to achieve the
10968 # desired vertical tightness. The array of parameters contains:
10970 # [0] type: 1=opening non-block 2=closing non-block
10971 # 3=opening block brace 4=closing block brace
10973 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
10974 # if closing: spaces of padding to use
10975 # [2] sequence number of container
10976 # [3] valid flag: do not append if this flag is false. Will be
10977 # true if appropriate -vt flag is set. Otherwise, Will be
10978 # made true only for 2 line container in parens with -lp
10980 # These flags are used by sub set_leading_whitespace in
10981 # the vertical aligner
10983 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
10985 #--------------------------------------------------------------
10986 # Vertical Tightness Flags Section 1:
10987 # Handle Lines 1 .. n-1 but not the last line
10988 # For non-BLOCK tokens, we will need to examine the next line
10989 # too, so we won't consider the last line.
10990 #--------------------------------------------------------------
10991 if ( $n < $n_last_line ) {
10993 #--------------------------------------------------------------
10994 # Vertical Tightness Flags Section 1a:
10995 # Look for Type 1, last token of this line is a non-block opening token
10996 #--------------------------------------------------------------
10997 my $ibeg_next = $ri_first->[ $n + 1 ];
10998 my $token_end = $tokens_to_go[$iend];
10999 my $iend_next = $ri_last->[ $n + 1 ];
11001 $type_sequence_to_go[$iend]
11002 && !$block_type_to_go[$iend]
11003 && $is_opening_token{$token_end}
11005 $opening_vertical_tightness{$token_end} > 0
11007 # allow 2-line method call to be closed up
11008 || ( $rOpts_line_up_parentheses
11009 && $token_end eq '('
11011 && $types_to_go[ $iend - 1 ] ne 'b' )
11016 # avoid multiple jumps in nesting depth in one line if
11018 my $ovt = $opening_vertical_tightness{$token_end};
11019 my $iend_next = $ri_last->[ $n + 1 ];
11022 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
11023 $nesting_depth_to_go[$ibeg_next] )
11027 # If -vt flag has not been set, mark this as invalid
11028 # and aligner will validate it if it sees the closing paren
11030 my $valid_flag = $ovt;
11031 @{$rvertical_tightness_flags} =
11032 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
11036 #--------------------------------------------------------------
11037 # Vertical Tightness Flags Section 1b:
11038 # Look for Type 2, first token of next line is a non-block closing
11039 # token .. and be sure this line does not have a side comment
11040 #--------------------------------------------------------------
11041 my $token_next = $tokens_to_go[$ibeg_next];
11042 if ( $type_sequence_to_go[$ibeg_next]
11043 && !$block_type_to_go[$ibeg_next]
11044 && $is_closing_token{$token_next}
11045 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
11047 my $ovt = $opening_vertical_tightness{$token_next};
11048 my $cvt = $closing_vertical_tightness{$token_next};
11051 # never append a trailing line like )->pack(
11052 # because it will throw off later alignment
11054 $nesting_depth_to_go[$ibeg_next] ==
11055 $nesting_depth_to_go[ $iend_next + 1 ] + 1
11060 $container_environment_to_go[$ibeg_next] ne 'LIST'
11064 # allow closing up 2-line method calls
11065 || ( $rOpts_line_up_parentheses
11066 && $token_next eq ')' )
11073 # decide which trailing closing tokens to append..
11075 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11077 my $str = join( '',
11078 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11080 # append closing token if followed by comment or ';'
11081 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11085 my $valid_flag = $cvt;
11086 @{$rvertical_tightness_flags} = (
11088 $tightness{$token_next} == 2 ? 0 : 1,
11089 $type_sequence_to_go[$ibeg_next], $valid_flag,
11095 #--------------------------------------------------------------
11096 # Vertical Tightness Flags Section 1c:
11097 # Implement the Opening Token Right flag (Type 2)..
11098 # If requested, move an isolated trailing opening token to the end of
11099 # the previous line which ended in a comma. We could do this
11100 # in sub recombine_breakpoints but that would cause problems
11101 # with -lp formatting. The problem is that indentation will
11102 # quickly move far to the right in nested expressions. By
11103 # doing it after indentation has been set, we avoid changes
11104 # to the indentation. Actual movement of the token takes place
11105 # in sub valign_output_step_B.
11106 #--------------------------------------------------------------
11108 $opening_token_right{ $tokens_to_go[$ibeg_next] }
11110 # previous line is not opening
11111 # (use -sot to combine with it)
11112 && !$is_opening_token{$token_end}
11114 # previous line ended in one of these
11115 # (add other cases if necessary; '=>' and '.' are not necessary
11116 && !$block_type_to_go[$ibeg_next]
11118 # this is a line with just an opening token
11119 && ( $iend_next == $ibeg_next
11120 || $iend_next == $ibeg_next + 2
11121 && $types_to_go[$iend_next] eq '#' )
11123 # looks bad if we align vertically with the wrong container
11124 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
11127 my $valid_flag = 1;
11128 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11129 @{$rvertical_tightness_flags} =
11130 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
11133 #--------------------------------------------------------------
11134 # Vertical Tightness Flags Section 1d:
11135 # Stacking of opening and closing tokens (Type 2)
11136 #--------------------------------------------------------------
11138 my $token_beg_next = $tokens_to_go[$ibeg_next];
11140 # patch to make something like 'qw(' behave like an opening paren
11142 if ( $types_to_go[$ibeg_next] eq 'q' ) {
11143 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
11144 $token_beg_next = $1;
11148 if ( $is_closing_token{$token_end}
11149 && $is_closing_token{$token_beg_next} )
11151 $stackable = $stack_closing_token{$token_beg_next}
11152 unless ( $block_type_to_go[$ibeg_next] )
11153 ; # shouldn't happen; just checking
11155 elsif ($is_opening_token{$token_end}
11156 && $is_opening_token{$token_beg_next} )
11158 $stackable = $stack_opening_token{$token_beg_next}
11159 unless ( $block_type_to_go[$ibeg_next] )
11160 ; # shouldn't happen; just checking
11165 my $is_semicolon_terminated;
11166 if ( $n + 1 == $n_last_line ) {
11167 my ( $terminal_type, $i_terminal ) = terminal_type(
11168 \@types_to_go, \@block_type_to_go,
11169 $ibeg_next, $iend_next
11171 $is_semicolon_terminated = $terminal_type eq ';'
11172 && $nesting_depth_to_go[$iend_next] <
11173 $nesting_depth_to_go[$ibeg_next];
11176 # this must be a line with just an opening token
11177 # or end in a semicolon
11179 $is_semicolon_terminated
11180 || ( $iend_next == $ibeg_next
11181 || $iend_next == $ibeg_next + 2
11182 && $types_to_go[$iend_next] eq '#' )
11185 my $valid_flag = 1;
11186 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11187 @{$rvertical_tightness_flags} =
11188 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
11194 #--------------------------------------------------------------
11195 # Vertical Tightness Flags Section 2:
11196 # Handle type 3, opening block braces on last line of the batch
11197 # Check for a last line with isolated opening BLOCK curly
11198 #--------------------------------------------------------------
11199 elsif ($rOpts_block_brace_vertical_tightness
11201 && $types_to_go[$iend] eq '{'
11202 && $block_type_to_go[$iend] =~
11203 /$block_brace_vertical_tightness_pattern/o )
11205 @{$rvertical_tightness_flags} =
11206 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11209 #--------------------------------------------------------------
11210 # Vertical Tightness Flags Section 3:
11211 # Handle type 4, a closing block brace on the last line of the batch Check
11212 # for a last line with isolated closing BLOCK curly
11213 #--------------------------------------------------------------
11214 elsif ($rOpts_stack_closing_block_brace
11216 && $block_type_to_go[$iend]
11217 && $types_to_go[$iend] eq '}' )
11219 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
11220 @{$rvertical_tightness_flags} =
11221 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
11224 # pack in the sequence numbers of the ends of this line
11225 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
11226 $rvertical_tightness_flags->[5] = get_seqno($iend);
11227 return $rvertical_tightness_flags;
11232 # get opening and closing sequence numbers of a token for the vertical
11233 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
11234 # to be treated somewhat like opening and closing tokens for stacking
11235 # tokens by the vertical aligner.
11237 my $seqno = $type_sequence_to_go[$ii];
11238 if ( $types_to_go[$ii] eq 'q' ) {
11241 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
11244 if ( !$ending_in_quote ) {
11245 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
11253 my %is_vertical_alignment_type;
11254 my %is_vertical_alignment_keyword;
11255 my %is_terminal_alignment_type;
11261 # Replaced =~ and // in the list. // had been removed in RT 119588
11263 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
11264 { ? : => && || ~~ !~~ =~ !~ //
11266 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
11268 # only align these at end of line
11270 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
11272 # eq and ne were removed from this list to improve alignment chances
11273 @q = qw(if unless and or err for foreach while until);
11274 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
11277 sub set_vertical_alignment_markers {
11279 # This routine takes the first step toward vertical alignment of the
11280 # lines of output text. It looks for certain tokens which can serve as
11281 # vertical alignment markers (such as an '=').
11283 # Method: We look at each token $i in this output batch and set
11284 # $matching_token_to_go[$i] equal to those tokens at which we would
11285 # accept vertical alignment.
11287 my ( $ri_first, $ri_last ) = @_;
11289 # nothing to do if we aren't allowed to change whitespace
11290 if ( !$rOpts_add_whitespace ) {
11291 for my $i ( 0 .. $max_index_to_go ) {
11292 $matching_token_to_go[$i] = '';
11297 # remember the index of last nonblank token before any sidecomment
11298 my $i_terminal = $max_index_to_go;
11299 if ( $types_to_go[$i_terminal] eq '#' ) {
11300 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
11301 if ( $i_terminal > 0 ) { --$i_terminal }
11305 # look at each line of this batch..
11306 my $last_vertical_alignment_before_index;
11307 my $vert_last_nonblank_type;
11308 my $vert_last_nonblank_token;
11309 my $vert_last_nonblank_block_type;
11310 my $max_line = @{$ri_first} - 1;
11312 foreach my $line ( 0 .. $max_line ) {
11313 my $ibeg = $ri_first->[$line];
11314 my $iend = $ri_last->[$line];
11315 $last_vertical_alignment_before_index = -1;
11316 $vert_last_nonblank_type = '';
11317 $vert_last_nonblank_token = '';
11318 $vert_last_nonblank_block_type = '';
11320 # look at each token in this output line..
11322 foreach my $i ( $ibeg .. $iend ) {
11323 my $alignment_type = '';
11324 my $type = $types_to_go[$i];
11325 my $block_type = $block_type_to_go[$i];
11326 my $token = $tokens_to_go[$i];
11328 # check for flag indicating that we should not align
11330 if ( $matching_token_to_go[$i] ) {
11331 $matching_token_to_go[$i] = '';
11335 #--------------------------------------------------------
11336 # First see if we want to align BEFORE this token
11337 #--------------------------------------------------------
11339 # The first possible token that we can align before
11340 # is index 2 because: 1) it doesn't normally make sense to
11341 # align before the first token and 2) the second
11342 # token must be a blank if we are to align before
11344 if ( $i < $ibeg + 2 ) { }
11346 # must follow a blank token
11347 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
11349 # align a side comment --
11350 elsif ( $type eq '#' ) {
11354 # it is a static side comment
11356 $rOpts->{'static-side-comments'}
11357 && $token =~ /$static_side_comment_pattern/o
11360 # or a closing side comment
11361 || ( $vert_last_nonblank_block_type
11363 /$closing_side_comment_prefix_pattern/o )
11366 $alignment_type = $type;
11367 } ## Example of a static side comment
11370 # otherwise, do not align two in a row to create a
11372 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
11374 # align before one of these keywords
11375 # (within a line, since $i>1)
11376 elsif ( $type eq 'k' ) {
11378 # /^(if|unless|and|or|eq|ne)$/
11379 if ( $is_vertical_alignment_keyword{$token} ) {
11380 $alignment_type = $token;
11384 # align before one of these types..
11385 # Note: add '.' after new vertical aligner is operational
11386 elsif ( $is_vertical_alignment_type{$type} ) {
11387 $alignment_type = $token;
11389 # Do not align a terminal token. Although it might
11390 # occasionally look ok to do this, this has been found to be
11391 # a good general rule. The main problems are:
11392 # (1) that the terminal token (such as an = or :) might get
11393 # moved far to the right where it is hard to see because
11394 # nothing follows it, and
11395 # (2) doing so may prevent other good alignments.
11396 # Current exceptions are && and ||
11397 if ( $i == $iend || $i >= $i_terminal ) {
11398 $alignment_type = ""
11399 unless ( $is_terminal_alignment_type{$type} );
11402 # Do not align leading ': (' or '. ('. This would prevent
11403 # alignment in something like the following:
11405 # ( $input_line_number < 10 ) ? " "
11406 # : ( $input_line_number < 100 ) ? " "
11410 # ( $case_matters ? $accessor : " lc($accessor) " )
11411 # . ( $yesno ? " eq " : " ne " )
11412 if ( $i == $ibeg + 2
11413 && $types_to_go[$ibeg] =~ /^[\.\:]$/
11414 && $types_to_go[ $i - 1 ] eq 'b' )
11416 $alignment_type = "";
11419 # For a paren after keyword, only align something like this:
11421 # elsif ( $b ) { &b }
11422 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
11423 $alignment_type = ""
11424 unless $vert_last_nonblank_token =~
11425 /^(if|unless|elsif)$/;
11428 # be sure the alignment tokens are unique
11429 # This didn't work well: reason not determined
11430 # if ($token ne $type) {$alignment_type .= $type}
11433 # NOTE: This is deactivated because it causes the previous
11434 # if/elsif alignment to fail
11435 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
11436 #{ $alignment_type = $type; }
11438 if ($alignment_type) {
11439 $last_vertical_alignment_before_index = $i;
11442 #--------------------------------------------------------
11443 # Next see if we want to align AFTER the previous nonblank
11444 #--------------------------------------------------------
11446 # We want to line up ',' and interior ';' tokens, with the added
11447 # space AFTER these tokens. (Note: interior ';' is included
11448 # because it may occur in short blocks).
11451 # we haven't already set it
11454 # and its not the first token of the line
11457 # and it follows a blank
11458 && $types_to_go[ $i - 1 ] eq 'b'
11460 # and previous token IS one of these:
11461 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
11463 # and it's NOT one of these
11464 && ( $type !~ /^[b\#\)\]\}]$/ )
11466 # then go ahead and align
11470 $alignment_type = $vert_last_nonblank_type;
11473 #--------------------------------------------------------
11474 # patch for =~ operator. We only align this if it
11475 # is the first operator in a line, and the line is a simple
11476 # statement. Aligning them within a statement
11477 # interferes could interfere with other good alignments.
11478 #--------------------------------------------------------
11479 if ( $alignment_type eq '=~' ) {
11480 my $terminal_type = $types_to_go[$i_terminal];
11481 if ( $count > 0 || $max_line > 0 || $terminal_type ne ';' )
11483 $alignment_type = "";
11487 #--------------------------------------------------------
11488 # then store the value
11489 #--------------------------------------------------------
11490 $matching_token_to_go[$i] = $alignment_type;
11491 $count++ if ($alignment_type);
11492 if ( $type ne 'b' ) {
11493 $vert_last_nonblank_type = $type;
11494 $vert_last_nonblank_token = $token;
11495 $vert_last_nonblank_block_type = $block_type;
11503 sub terminal_type {
11505 # returns type of last token on this line (terminal token), as follows:
11506 # returns # for a full-line comment
11507 # returns ' ' for a blank line
11508 # otherwise returns final token type
11510 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
11512 # check for full-line comment..
11513 if ( $rtype->[$ibeg] eq '#' ) {
11514 return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg];
11518 # start at end and walk backwards..
11519 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
11521 # skip past any side comment and blanks
11522 next if ( $rtype->[$i] eq 'b' );
11523 next if ( $rtype->[$i] eq '#' );
11525 # found it..make sure it is a BLOCK termination,
11526 # but hide a terminal } after sort/grep/map because it is not
11527 # necessarily the end of the line. (terminal.t)
11528 my $terminal_type = $rtype->[$i];
11530 $terminal_type eq '}'
11531 && ( !$rblock_type->[$i]
11532 || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) )
11535 $terminal_type = 'b';
11537 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
11541 return wantarray ? ( ' ', $ibeg ) : ' ';
11545 { # set_bond_strengths
11547 my %is_good_keyword_breakpoint;
11548 my %is_lt_gt_le_ge;
11550 my %binary_bond_strength;
11557 sub bias_table_key {
11558 my ( $type, $token ) = @_;
11559 my $bias_table_key = $type;
11560 if ( $type eq 'k' ) {
11561 $bias_table_key = $token;
11562 if ( $token eq 'err' ) { $bias_table_key = 'or' }
11564 return $bias_table_key;
11567 sub initialize_bond_strength_hashes {
11570 @q = qw(if unless while until for foreach);
11571 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
11573 @q = qw(lt gt le ge);
11574 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
11576 # The decision about where to break a line depends upon a "bond
11577 # strength" between tokens. The LOWER the bond strength, the MORE
11578 # likely a break. A bond strength may be any value but to simplify
11579 # things there are several pre-defined strength levels:
11581 # NO_BREAK => 10000;
11582 # VERY_STRONG => 100;
11586 # VERY_WEAK => 0.55;
11588 # The strength values are based on trial-and-error, and need to be
11589 # tweaked occasionally to get desired results. Some comments:
11591 # 1. Only relative strengths are important. small differences
11592 # in strengths can make big formatting differences.
11593 # 2. Each indentation level adds one unit of bond strength.
11594 # 3. A value of NO_BREAK makes an unbreakable bond
11595 # 4. A value of VERY_WEAK is the strength of a ','
11596 # 5. Values below NOMINAL are considered ok break points.
11597 # 6. Values above NOMINAL are considered poor break points.
11599 # The bond strengths should roughly follow precedence order where
11600 # possible. If you make changes, please check the results very
11601 # carefully on a variety of scripts. Testing with the -extrude
11602 # options is particularly helpful in exercising all of the rules.
11604 # Wherever possible, bond strengths are defined in the following
11605 # tables. There are two main stages to setting bond strengths and
11606 # two types of tables:
11608 # The first stage involves looking at each token individually and
11609 # defining left and right bond strengths, according to if we want
11610 # to break to the left or right side, and how good a break point it
11611 # is. For example tokens like =, ||, && make good break points and
11612 # will have low strengths, but one might want to break on either
11613 # side to put them at the end of one line or beginning of the next.
11615 # The second stage involves looking at certain pairs of tokens and
11616 # defining a bond strength for that particular pair. This second
11617 # stage has priority.
11619 #---------------------------------------------------------------
11620 # Bond Strength BEGIN Section 1.
11621 # Set left and right bond strengths of individual tokens.
11622 #---------------------------------------------------------------
11624 # NOTE: NO_BREAK's set in this section first are HINTS which will
11625 # probably not be honored. Essential NO_BREAKS's should be set in
11626 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
11627 # of this subroutine.
11629 # Note that we are setting defaults in this section. The user
11630 # cannot change bond strengths but can cause the left and right
11631 # bond strengths of any token type to be swapped through the use of
11632 # the -wba and -wbb flags. In this way the user can determine if a
11633 # breakpoint token should appear at the end of one line or the
11634 # beginning of the next line.
11636 # The hash keys in this section are token types, plus the text of
11637 # certain keywords like 'or', 'and'.
11639 # no break around possible filehandle
11640 $left_bond_strength{'Z'} = NO_BREAK;
11641 $right_bond_strength{'Z'} = NO_BREAK;
11643 # never put a bare word on a new line:
11644 # example print (STDERR, "bla"); will fail with break after (
11645 $left_bond_strength{'w'} = NO_BREAK;
11647 # blanks always have infinite strength to force breaks after
11649 $right_bond_strength{'b'} = NO_BREAK;
11651 # try not to break on exponentation
11652 @q = qw# ** .. ... <=> #;
11653 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11654 @right_bond_strength{@q} = (STRONG) x scalar(@q);
11656 # The comma-arrow has very low precedence but not a good break point
11657 $left_bond_strength{'=>'} = NO_BREAK;
11658 $right_bond_strength{'=>'} = NOMINAL;
11660 # ok to break after label
11661 $left_bond_strength{'J'} = NO_BREAK;
11662 $right_bond_strength{'J'} = NOMINAL;
11663 $left_bond_strength{'j'} = STRONG;
11664 $right_bond_strength{'j'} = STRONG;
11665 $left_bond_strength{'A'} = STRONG;
11666 $right_bond_strength{'A'} = STRONG;
11668 $left_bond_strength{'->'} = STRONG;
11669 $right_bond_strength{'->'} = VERY_STRONG;
11671 $left_bond_strength{'CORE::'} = NOMINAL;
11672 $right_bond_strength{'CORE::'} = NO_BREAK;
11674 # breaking AFTER modulus operator is ok:
11676 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11677 @right_bond_strength{@q} =
11678 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
11680 # Break AFTER math operators * and /
11682 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11683 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
11685 # Break AFTER weakest math operators + and -
11686 # Make them weaker than * but a bit stronger than '.'
11688 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11689 @right_bond_strength{@q} =
11690 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
11692 # breaking BEFORE these is just ok:
11694 @right_bond_strength{@q} = (STRONG) x scalar(@q);
11695 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
11697 # breaking before the string concatenation operator seems best
11698 # because it can be hard to see at the end of a line
11699 $right_bond_strength{'.'} = STRONG;
11700 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
11702 @q = qw< } ] ) R >;
11703 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11704 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
11706 # make these a little weaker than nominal so that they get
11707 # favored for end-of-line characters
11708 @q = qw< != == =~ !~ ~~ !~~ >;
11709 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11710 @right_bond_strength{@q} =
11711 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
11713 # break AFTER these
11714 @q = qw# < > | & >= <= #;
11715 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
11716 @right_bond_strength{@q} =
11717 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
11719 # breaking either before or after a quote is ok
11720 # but bias for breaking before a quote
11721 $left_bond_strength{'Q'} = NOMINAL;
11722 $right_bond_strength{'Q'} = NOMINAL + 0.02;
11723 $left_bond_strength{'q'} = NOMINAL;
11724 $right_bond_strength{'q'} = NOMINAL;
11726 # starting a line with a keyword is usually ok
11727 $left_bond_strength{'k'} = NOMINAL;
11729 # we usually want to bond a keyword strongly to what immediately
11730 # follows, rather than leaving it stranded at the end of a line
11731 $right_bond_strength{'k'} = STRONG;
11733 $left_bond_strength{'G'} = NOMINAL;
11734 $right_bond_strength{'G'} = STRONG;
11736 # assignment operators
11738 = **= += *= &= <<= &&=
11739 -= /= |= >>= ||= //=
11744 # Default is to break AFTER various assignment operators
11745 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11746 @right_bond_strength{@q} =
11747 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
11749 # Default is to break BEFORE '&&' and '||' and '//'
11750 # set strength of '||' to same as '=' so that chains like
11751 # $a = $b || $c || $d will break before the first '||'
11752 $right_bond_strength{'||'} = NOMINAL;
11753 $left_bond_strength{'||'} = $right_bond_strength{'='};
11755 # same thing for '//'
11756 $right_bond_strength{'//'} = NOMINAL;
11757 $left_bond_strength{'//'} = $right_bond_strength{'='};
11759 # set strength of && a little higher than ||
11760 $right_bond_strength{'&&'} = NOMINAL;
11761 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
11763 $left_bond_strength{';'} = VERY_STRONG;
11764 $right_bond_strength{';'} = VERY_WEAK;
11765 $left_bond_strength{'f'} = VERY_STRONG;
11767 # make right strength of for ';' a little less than '='
11768 # to make for contents break after the ';' to avoid this:
11769 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
11770 # $number_of_fields )
11771 # and make it weaker than ',' and 'and' too
11772 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
11774 # The strengths of ?/: should be somewhere between
11775 # an '=' and a quote (NOMINAL),
11776 # make strength of ':' slightly less than '?' to help
11777 # break long chains of ? : after the colons
11778 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
11779 $right_bond_strength{':'} = NO_BREAK;
11780 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
11781 $right_bond_strength{'?'} = NO_BREAK;
11783 $left_bond_strength{','} = VERY_STRONG;
11784 $right_bond_strength{','} = VERY_WEAK;
11786 # remaining digraphs and trigraphs not defined above
11787 @q = qw( :: <> ++ --);
11788 @left_bond_strength{@q} = (WEAK) x scalar(@q);
11789 @right_bond_strength{@q} = (STRONG) x scalar(@q);
11791 # Set bond strengths of certain keywords
11792 # make 'or', 'err', 'and' slightly weaker than a ','
11793 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
11794 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
11795 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
11796 $left_bond_strength{'xor'} = NOMINAL;
11797 $right_bond_strength{'and'} = NOMINAL;
11798 $right_bond_strength{'or'} = NOMINAL;
11799 $right_bond_strength{'err'} = NOMINAL;
11800 $right_bond_strength{'xor'} = STRONG;
11802 #---------------------------------------------------------------
11803 # Bond Strength BEGIN Section 2.
11804 # Set binary rules for bond strengths between certain token types.
11805 #---------------------------------------------------------------
11807 # We have a little problem making tables which apply to the
11808 # container tokens. Here is a list of container tokens and
11811 # type tokens // meaning
11812 # { {, [, ( // indent
11813 # } }, ], ) // outdent
11814 # [ [ // left non-structural [ (enclosing an array index)
11815 # ] ] // right non-structural square bracket
11816 # ( ( // left non-structural paren
11817 # ) ) // right non-structural paren
11818 # L { // left non-structural curly brace (enclosing a key)
11819 # R } // right non-structural curly brace
11821 # Some rules apply to token types and some to just the token
11822 # itself. We solve the problem by combining type and token into a
11823 # new hash key for the container types.
11825 # If a rule applies to a token 'type' then we need to make rules
11826 # for each of these 'type.token' combinations:
11837 # If a rule applies to a token then we need to make rules for
11838 # these 'type.token' combinations:
11847 # allow long lines before final { in an if statement, as in:
11852 # Otherwise, the line before the { tends to be too short.
11854 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
11855 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
11857 # break on something like '} (', but keep this stronger than a ','
11858 # example is in 'howe.pl'
11859 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
11860 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
11862 # keep matrix and hash indices together
11863 # but make them a little below STRONG to allow breaking open
11864 # something like {'some-word'}{'some-very-long-word'} at the }{
11866 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
11867 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
11868 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
11869 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
11871 # increase strength to the point where a break in the following
11872 # will be after the opening paren rather than at the arrow:
11874 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
11876 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11877 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11878 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11879 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11880 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11881 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11883 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
11884 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
11885 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
11886 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
11888 #---------------------------------------------------------------
11889 # Binary NO_BREAK rules
11890 #---------------------------------------------------------------
11892 # use strict requires that bare word and => not be separated
11893 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
11894 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
11896 # Never break between a bareword and a following paren because
11897 # perl may give an error. For example, if a break is placed
11898 # between 'to_filehandle' and its '(' the following line will
11899 # give a syntax error [Carp.pm]: my( $no) =fileno(
11900 # to_filehandle( $in)) ;
11901 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
11902 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
11903 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
11904 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
11906 # use strict requires that bare word within braces not start new
11908 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
11910 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
11912 # use strict requires that bare word and => not be separated
11913 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
11915 # use strict does not allow separating type info from trailing { }
11916 # testfile is readmail.pl
11917 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
11918 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
11920 # As a defensive measure, do not break between a '(' and a
11921 # filehandle. In some cases, this can cause an error. For
11922 # example, the following program works:
11929 # But this program fails:
11937 # This is normally only a problem with the 'extrude' option
11938 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
11939 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
11941 # never break between sub name and opening paren
11942 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
11943 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
11945 # keep '}' together with ';'
11946 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
11948 # Breaking before a ++ can cause perl to guess wrong. For
11949 # example the following line will cause a syntax error
11950 # with -extrude if we break between '$i' and '++' [fixstyle2]
11951 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
11952 $nobreak_lhs{'++'} = NO_BREAK;
11954 # Do not break before a possible file handle
11955 $nobreak_lhs{'Z'} = NO_BREAK;
11957 # use strict hates bare words on any new line. For
11958 # example, a break before the underscore here provokes the
11959 # wrath of use strict:
11960 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
11961 $nobreak_rhs{'F'} = NO_BREAK;
11962 $nobreak_rhs{'CORE::'} = NO_BREAK;
11964 #---------------------------------------------------------------
11965 # Bond Strength BEGIN Section 3.
11966 # Define tables and values for applying a small bias to the above
11968 #---------------------------------------------------------------
11969 # Adding a small 'bias' to strengths is a simple way to make a line
11970 # break at the first of a sequence of identical terms. For
11971 # example, to force long string of conditional operators to break
11972 # with each line ending in a ':', we can add a small number to the
11973 # bond strength of each ':' (colon.t)
11974 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
11975 $delta_bias = 0.0001; # a very small strength level
11978 } ## end sub initialize_bond_strength_hashes
11980 sub set_bond_strengths {
11982 # patch-its always ok to break at end of line
11983 $nobreak_to_go[$max_index_to_go] = 0;
11985 # we start a new set of bias values for each line
11987 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
11988 my $code_bias = -.01; # bias for closing block braces
11993 my $last_nonblank_type = $type;
11994 my $last_nonblank_token = $token;
11995 my $list_str = $left_bond_strength{'?'};
11997 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
11998 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12001 # main loop to compute bond strengths between each pair of tokens
12002 foreach my $i ( 0 .. $max_index_to_go ) {
12003 $last_type = $type;
12004 if ( $type ne 'b' ) {
12005 $last_nonblank_type = $type;
12006 $last_nonblank_token = $token;
12008 $type = $types_to_go[$i];
12010 # strength on both sides of a blank is the same
12011 if ( $type eq 'b' && $last_type ne 'b' ) {
12012 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12016 $token = $tokens_to_go[$i];
12017 $block_type = $block_type_to_go[$i];
12019 $next_type = $types_to_go[$i_next];
12020 $next_token = $tokens_to_go[$i_next];
12021 $total_nesting_depth = $nesting_depth_to_go[$i_next];
12022 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12023 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12024 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12026 # We are computing the strength of the bond between the current
12027 # token and the NEXT token.
12029 #---------------------------------------------------------------
12030 # Bond Strength Section 1:
12031 # First Approximation.
12032 # Use minimum of individual left and right tabulated bond
12034 #---------------------------------------------------------------
12035 my $bsr = $right_bond_strength{$type};
12036 my $bsl = $left_bond_strength{$next_nonblank_type};
12038 # define right bond strengths of certain keywords
12039 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12040 $bsr = $right_bond_strength{$token};
12042 elsif ( $token eq 'ne' or $token eq 'eq' ) {
12046 # set terminal bond strength to the nominal value
12047 # this will cause good preceding breaks to be retained
12048 if ( $i_next_nonblank > $max_index_to_go ) {
12052 # define right bond strengths of certain keywords
12053 if ( $next_nonblank_type eq 'k'
12054 && defined( $left_bond_strength{$next_nonblank_token} ) )
12056 $bsl = $left_bond_strength{$next_nonblank_token};
12058 elsif ($next_nonblank_token eq 'ne'
12059 or $next_nonblank_token eq 'eq' )
12063 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12064 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12067 # Use the minimum of the left and right strengths. Note: it might
12068 # seem that we would want to keep a NO_BREAK if either token has
12069 # this value. This didn't work, for example because in an arrow
12070 # list, it prevents the comma from separating from the following
12071 # bare word (which is probably quoted by its arrow). So necessary
12072 # NO_BREAK's have to be handled as special cases in the final
12074 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
12075 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
12076 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12077 my $bond_str_1 = $bond_str;
12079 #---------------------------------------------------------------
12080 # Bond Strength Section 2:
12081 # Apply hardwired rules..
12082 #---------------------------------------------------------------
12084 # Patch to put terminal or clauses on a new line: Weaken the bond
12085 # at an || followed by die or similar keyword to make the terminal
12086 # or clause fall on a new line, like this:
12088 # my $class = shift
12089 # || die "Cannot add broadcast: No class identifier found";
12091 # Otherwise the break will be at the previous '=' since the || and
12092 # = have the same starting strength and the or is biased, like
12096 # shift || die "Cannot add broadcast: No class identifier found";
12098 # In any case if the user places a break at either the = or the ||
12099 # it should remain there.
12100 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
12101 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
12102 if ( $want_break_before{$token} && $i > 0 ) {
12103 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
12106 $bond_str -= $delta_bias;
12111 # good to break after end of code blocks
12112 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
12114 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
12115 $code_bias += $delta_bias;
12118 if ( $type eq 'k' ) {
12120 # allow certain control keywords to stand out
12121 if ( $next_nonblank_type eq 'k'
12122 && $is_last_next_redo_return{$token} )
12124 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
12127 # Don't break after keyword my. This is a quick fix for a
12128 # rare problem with perl. An example is this line from file
12131 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
12132 # $this->{'question'} ) )
12134 if ( $token eq 'my' ) {
12135 $bond_str = NO_BREAK;
12140 # good to break before 'if', 'unless', etc
12141 if ( $is_if_brace_follower{$next_nonblank_token} ) {
12142 $bond_str = VERY_WEAK;
12145 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
12147 # FIXME: needs more testing
12148 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
12149 $bond_str = $list_str if ( $bond_str > $list_str );
12152 # keywords like 'unless', 'if', etc, within statements
12154 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
12155 $bond_str = VERY_WEAK / 1.05;
12159 # try not to break before a comma-arrow
12160 elsif ( $next_nonblank_type eq '=>' ) {
12161 if ( $bond_str < STRONG ) { $bond_str = STRONG }
12164 #---------------------------------------------------------------
12165 # Additional hardwired NOBREAK rules
12166 #---------------------------------------------------------------
12168 # map1.t -- correct for a quirk in perl
12170 && $next_nonblank_type eq 'i'
12171 && $last_nonblank_type eq 'k'
12172 && $is_sort_map_grep{$last_nonblank_token} )
12174 # /^(sort|map|grep)$/ )
12176 $bond_str = NO_BREAK;
12179 # extrude.t: do not break before paren at:
12181 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
12182 $bond_str = NO_BREAK;
12185 # in older version of perl, use strict can cause problems with
12186 # breaks before bare words following opening parens. For example,
12187 # this will fail under older versions if a break is made between
12188 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
12189 # command"); close MAIL;
12190 if ( $type eq '{' ) {
12192 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
12194 # but it's fine to break if the word is followed by a '=>'
12195 # or if it is obviously a sub call
12196 my $i_next_next_nonblank = $i_next_nonblank + 1;
12197 my $next_next_type = $types_to_go[$i_next_next_nonblank];
12198 if ( $next_next_type eq 'b'
12199 && $i_next_nonblank < $max_index_to_go )
12201 $i_next_next_nonblank++;
12202 $next_next_type = $types_to_go[$i_next_next_nonblank];
12205 # We'll check for an old breakpoint and keep a leading
12206 # bareword if it was that way in the input file.
12207 # Presumably it was ok that way. For example, the
12208 # following would remain unchanged:
12211 # January, February, March, April,
12212 # May, June, July, August,
12213 # September, October, November, December,
12216 # This should be sufficient:
12218 !$old_breakpoint_to_go[$i]
12219 && ( $next_next_type eq ','
12220 || $next_next_type eq '}' )
12223 $bond_str = NO_BREAK;
12228 # Do not break between a possible filehandle and a ? or / and do
12229 # not introduce a break after it if there is no blank
12231 elsif ( $type eq 'Z' ) {
12236 # if there is no blank and we do not want one. Examples:
12237 # print $x++ # do not break after $x
12238 # print HTML"HELLO" # break ok after HTML
12241 && defined( $want_left_space{$next_type} )
12242 && $want_left_space{$next_type} == WS_NO
12245 # or we might be followed by the start of a quote
12246 || $next_nonblank_type =~ /^[\/\?]$/
12249 $bond_str = NO_BREAK;
12253 # Breaking before a ? before a quote can cause trouble if
12254 # they are not separated by a blank.
12255 # Example: a syntax error occurs if you break before the ? here
12256 # my$logic=join$all?' && ':' || ',@regexps;
12257 # From: Professional_Perl_Programming_Code/multifind.pl
12258 if ( $next_nonblank_type eq '?' ) {
12259 $bond_str = NO_BREAK
12260 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
12263 # Breaking before a . followed by a number
12264 # can cause trouble if there is no intervening space
12265 # Example: a syntax error occurs if you break before the .2 here
12266 # $str .= pack($endian.2, ensurrogate($ord));
12267 # From: perl58/Unicode.pm
12268 elsif ( $next_nonblank_type eq '.' ) {
12269 $bond_str = NO_BREAK
12270 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
12273 my $bond_str_2 = $bond_str;
12275 #---------------------------------------------------------------
12276 # End of hardwired rules
12277 #---------------------------------------------------------------
12279 #---------------------------------------------------------------
12280 # Bond Strength Section 3:
12281 # Apply table rules. These have priority over the above
12283 #---------------------------------------------------------------
12285 my $tabulated_bond_str;
12287 my $rtype = $next_nonblank_type;
12288 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
12289 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
12290 $rtype = $next_nonblank_type . $next_nonblank_token;
12293 if ( $binary_bond_strength{$ltype}{$rtype} ) {
12294 $bond_str = $binary_bond_strength{$ltype}{$rtype};
12295 $tabulated_bond_str = $bond_str;
12298 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
12299 $bond_str = NO_BREAK;
12300 $tabulated_bond_str = $bond_str;
12302 my $bond_str_3 = $bond_str;
12304 # If the hardwired rules conflict with the tabulated bond
12305 # strength then there is an inconsistency that should be fixed
12306 FORMATTER_DEBUG_FLAG_BOND_TABLES
12307 && $tabulated_bond_str
12309 && $bond_str_1 != $bond_str_2
12310 && $bond_str_2 != $tabulated_bond_str
12313 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
12316 #-----------------------------------------------------------------
12317 # Bond Strength Section 4:
12318 # Modify strengths of certain tokens which often occur in sequence
12319 # by adding a small bias to each one in turn so that the breaks
12320 # occur from left to right.
12322 # Note that we only changing strengths by small amounts here,
12323 # and usually increasing, so we should not be altering any NO_BREAKs.
12324 # Other routines which check for NO_BREAKs will use a tolerance
12325 # of one to avoid any problem.
12326 #-----------------------------------------------------------------
12328 # The bias tables use special keys
12329 my $left_key = bias_table_key( $type, $token );
12331 bias_table_key( $next_nonblank_type, $next_nonblank_token );
12333 # add any bias set by sub scan_list at old comma break points.
12334 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
12337 elsif ( defined( $bias{$left_key} ) ) {
12338 if ( !$want_break_before{$left_key} ) {
12339 $bias{$left_key} += $delta_bias;
12340 $bond_str += $bias{$left_key};
12345 if ( defined( $bias{$right_key} ) ) {
12346 if ( $want_break_before{$right_key} ) {
12348 # for leading '.' align all but 'short' quotes; the idea
12349 # is to not place something like "\n" on a single line.
12350 if ( $right_key eq '.' ) {
12352 $last_nonblank_type eq '.'
12355 $rOpts_short_concatenation_item_length )
12356 && ( !$is_closing_token{$token} )
12359 $bias{$right_key} += $delta_bias;
12363 $bias{$right_key} += $delta_bias;
12365 $bond_str += $bias{$right_key};
12368 my $bond_str_4 = $bond_str;
12370 #---------------------------------------------------------------
12371 # Bond Strength Section 5:
12372 # Fifth Approximation.
12373 # Take nesting depth into account by adding the nesting depth
12374 # to the bond strength.
12375 #---------------------------------------------------------------
12378 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
12379 if ( $total_nesting_depth > 0 ) {
12380 $strength = $bond_str + $total_nesting_depth;
12383 $strength = $bond_str;
12387 $strength = NO_BREAK;
12390 #---------------------------------------------------------------
12391 # Bond Strength Section 6:
12392 # Sixth Approximation. Welds.
12393 #---------------------------------------------------------------
12395 # Do not allow a break within welds,
12396 if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
12398 # But encourage breaking after opening welded tokens
12399 elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
12403 # always break after side comment
12404 if ( $type eq '#' ) { $strength = 0 }
12406 $bond_strength_to_go[$i] = $strength;
12408 FORMATTER_DEBUG_FLAG_BOND && do {
12409 my $str = substr( $token, 0, 15 );
12410 $str .= ' ' x ( 16 - length($str) );
12412 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
12416 } ## end sub set_bond_strengths
12419 sub pad_array_to_go {
12421 # to simplify coding in scan_list and set_bond_strengths, it helps
12422 # to create some extra blank tokens at the end of the arrays
12423 $tokens_to_go[ $max_index_to_go + 1 ] = '';
12424 $tokens_to_go[ $max_index_to_go + 2 ] = '';
12425 $types_to_go[ $max_index_to_go + 1 ] = 'b';
12426 $types_to_go[ $max_index_to_go + 2 ] = 'b';
12427 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12428 $nesting_depth_to_go[$max_index_to_go];
12431 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12432 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12434 # shouldn't happen:
12435 unless ( get_saw_brace_error() ) {
12437 "Program bug in scan_list: hit nesting error which should have been caught\n"
12439 report_definite_bug();
12443 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12448 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12449 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12454 { # begin scan_list
12457 $block_type, $current_depth,
12459 $i_last_nonblank_token, $last_colon_sequence_number,
12460 $last_nonblank_token, $last_nonblank_type,
12461 $last_nonblank_block_type, $last_old_breakpoint_count,
12462 $minimum_depth, $next_nonblank_block_type,
12463 $next_nonblank_token, $next_nonblank_type,
12464 $old_breakpoint_count, $starting_breakpoint_count,
12465 $starting_depth, $token,
12466 $type, $type_sequence,
12470 @breakpoint_stack, @breakpoint_undo_stack,
12471 @comma_index, @container_type,
12472 @identifier_count_stack, @index_before_arrow,
12473 @interrupted_list, @item_count_stack,
12474 @last_comma_index, @last_dot_index,
12475 @last_nonblank_type, @old_breakpoint_count_stack,
12476 @opening_structure_index_stack, @rfor_semicolon_list,
12477 @has_old_logical_breakpoints, @rand_or_list,
12481 # routine to define essential variables when we go 'up' to
12483 sub check_for_new_minimum_depth {
12485 if ( $depth < $minimum_depth ) {
12487 $minimum_depth = $depth;
12489 # these arrays need not retain values between calls
12490 $breakpoint_stack[$depth] = $starting_breakpoint_count;
12491 $container_type[$depth] = "";
12492 $identifier_count_stack[$depth] = 0;
12493 $index_before_arrow[$depth] = -1;
12494 $interrupted_list[$depth] = 1;
12495 $item_count_stack[$depth] = 0;
12496 $last_nonblank_type[$depth] = "";
12497 $opening_structure_index_stack[$depth] = -1;
12499 $breakpoint_undo_stack[$depth] = undef;
12500 $comma_index[$depth] = undef;
12501 $last_comma_index[$depth] = undef;
12502 $last_dot_index[$depth] = undef;
12503 $old_breakpoint_count_stack[$depth] = undef;
12504 $has_old_logical_breakpoints[$depth] = 0;
12505 $rand_or_list[$depth] = [];
12506 $rfor_semicolon_list[$depth] = [];
12507 $i_equals[$depth] = -1;
12509 # these arrays must retain values between calls
12510 if ( !defined( $has_broken_sublist[$depth] ) ) {
12511 $dont_align[$depth] = 0;
12512 $has_broken_sublist[$depth] = 0;
12513 $want_comma_break[$depth] = 0;
12519 # routine to decide which commas to break at within a container;
12521 # $bp_count = number of comma breakpoints set
12522 # $do_not_break_apart = a flag indicating if container need not
12524 sub set_comma_breakpoints {
12528 my $do_not_break_apart = 0;
12531 if ( $item_count_stack[$dd] ) {
12533 # handle commas not in containers...
12534 if ( $dont_align[$dd] ) {
12535 do_uncontained_comma_breaks($dd);
12538 # handle commas within containers...
12540 my $fbc = $forced_breakpoint_count;
12542 # always open comma lists not preceded by keywords,
12543 # barewords, identifiers (that is, anything that doesn't
12544 # look like a function call)
12545 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
12547 set_comma_breakpoints_do(
12549 $opening_structure_index_stack[$dd],
12551 $item_count_stack[$dd],
12552 $identifier_count_stack[$dd],
12554 $next_nonblank_type,
12555 $container_type[$dd],
12556 $interrupted_list[$dd],
12557 \$do_not_break_apart,
12560 $bp_count = $forced_breakpoint_count - $fbc;
12561 $do_not_break_apart = 0 if $must_break_open;
12564 return ( $bp_count, $do_not_break_apart );
12567 sub do_uncontained_comma_breaks {
12569 # Handle commas not in containers...
12570 # This is a catch-all routine for commas that we
12571 # don't know what to do with because the don't fall
12572 # within containers. We will bias the bond strength
12573 # to break at commas which ended lines in the input
12574 # file. This usually works better than just trying
12575 # to put as many items on a line as possible. A
12576 # downside is that if the input file is garbage it
12577 # won't work very well. However, the user can always
12578 # prevent following the old breakpoints with the
12582 my $old_comma_break_count = 0;
12583 foreach my $ii ( @{ $comma_index[$dd] } ) {
12584 if ( $old_breakpoint_to_go[$ii] ) {
12585 $old_comma_break_count++;
12586 $bond_strength_to_go[$ii] = $bias;
12588 # reduce bias magnitude to force breaks in order
12593 # Also put a break before the first comma if
12594 # (1) there was a break there in the input, and
12595 # (2) there was exactly one old break before the first comma break
12596 # (3) OLD: there are multiple old comma breaks
12597 # (3) NEW: there are one or more old comma breaks (see return example)
12599 # For example, we will follow the user and break after
12600 # 'print' in this snippet:
12602 # "conformability (Not the same dimension)\n",
12603 # "\t", $have, " is ", text_unit($hu), "\n",
12604 # "\t", $want, " is ", text_unit($wu), "\n",
12607 # Another example, just one comma, where we will break after
12610 # $x * cos($a) - $y * sin($a),
12611 # $x * sin($a) + $y * cos($a);
12613 # Breaking a print statement:
12615 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
12616 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
12618 # But we will not force a break after the opening paren here
12619 # (causes a blinker):
12620 # $heap->{stream}->set_output_filter(
12621 # poe::filter::reference->new('myotherfreezer') ),
12624 my $i_first_comma = $comma_index[$dd]->[0];
12625 if ( $old_breakpoint_to_go[$i_first_comma] ) {
12626 my $level_comma = $levels_to_go[$i_first_comma];
12629 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
12630 if ( $old_breakpoint_to_go[$ii] ) {
12632 last if ( $obp_count > 1 );
12634 if ( $levels_to_go[$ii] == $level_comma );
12638 # Changed rule from multiple old commas to just one here:
12639 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
12641 # Do not to break before an opening token because
12642 # it can lead to "blinkers".
12643 my $ibreakm = $ibreak;
12644 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
12645 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
12647 set_forced_breakpoint($ibreak);
12654 my %is_logical_container;
12657 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
12658 @is_logical_container{@q} = (1) x scalar(@q);
12661 sub set_for_semicolon_breakpoints {
12663 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
12664 set_forced_breakpoint($_);
12669 sub set_logical_breakpoints {
12672 $item_count_stack[$dd] == 0
12673 && $is_logical_container{ $container_type[$dd] }
12675 || $has_old_logical_breakpoints[$dd]
12679 # Look for breaks in this order:
12682 foreach my $i ( 0 .. 3 ) {
12683 if ( $rand_or_list[$dd][$i] ) {
12684 foreach ( @{ $rand_or_list[$dd][$i] } ) {
12685 set_forced_breakpoint($_);
12688 # break at any 'if' and 'unless' too
12689 foreach ( @{ $rand_or_list[$dd][4] } ) {
12690 set_forced_breakpoint($_);
12692 $rand_or_list[$dd] = [];
12700 sub is_unbreakable_container {
12702 # never break a container of one of these types
12703 # because bad things can happen (map1.t)
12705 return $is_sort_map_grep{ $container_type[$dd] };
12710 # This routine is responsible for setting line breaks for all lists,
12711 # so that hierarchical structure can be displayed and so that list
12712 # items can be vertically aligned. The output of this routine is
12713 # stored in the array @forced_breakpoint_to_go, which is used to set
12714 # final breakpoints.
12716 $starting_depth = $nesting_depth_to_go[0];
12719 $current_depth = $starting_depth;
12721 $last_colon_sequence_number = -1;
12722 $last_nonblank_token = ';';
12723 $last_nonblank_type = ';';
12724 $last_nonblank_block_type = ' ';
12725 $last_old_breakpoint_count = 0;
12726 $minimum_depth = $current_depth + 1; # forces update in check below
12727 $old_breakpoint_count = 0;
12728 $starting_breakpoint_count = $forced_breakpoint_count;
12731 $type_sequence = '';
12733 my $total_depth_variation = 0;
12734 my $i_old_assignment_break;
12735 my $depth_last = $starting_depth;
12737 check_for_new_minimum_depth($current_depth);
12739 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
12740 my $want_previous_breakpoint = -1;
12742 my $saw_good_breakpoint;
12743 my $i_line_end = -1;
12744 my $i_line_start = -1;
12746 # loop over all tokens in this batch
12747 while ( ++$i <= $max_index_to_go ) {
12748 if ( $type ne 'b' ) {
12749 $i_last_nonblank_token = $i - 1;
12750 $last_nonblank_type = $type;
12751 $last_nonblank_token = $token;
12752 $last_nonblank_block_type = $block_type;
12753 } ## end if ( $type ne 'b' )
12754 $type = $types_to_go[$i];
12755 $block_type = $block_type_to_go[$i];
12756 $token = $tokens_to_go[$i];
12757 $type_sequence = $type_sequence_to_go[$i];
12758 my $next_type = $types_to_go[ $i + 1 ];
12759 my $next_token = $tokens_to_go[ $i + 1 ];
12760 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12761 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12762 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12763 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
12765 # set break if flag was set
12766 if ( $want_previous_breakpoint >= 0 ) {
12767 set_forced_breakpoint($want_previous_breakpoint);
12768 $want_previous_breakpoint = -1;
12771 $last_old_breakpoint_count = $old_breakpoint_count;
12772 if ( $old_breakpoint_to_go[$i] ) {
12774 $i_line_start = $i_next_nonblank;
12776 $old_breakpoint_count++;
12778 # Break before certain keywords if user broke there and
12779 # this is a 'safe' break point. The idea is to retain
12780 # any preferred breaks for sequential list operations,
12781 # like a schwartzian transform.
12782 if ($rOpts_break_at_old_keyword_breakpoints) {
12784 $next_nonblank_type eq 'k'
12785 && $is_keyword_returning_list{$next_nonblank_token}
12786 && ( $type =~ /^[=\)\]\}Riw]$/
12788 && $is_keyword_returning_list{$token} )
12792 # we actually have to set this break next time through
12793 # the loop because if we are at a closing token (such
12794 # as '}') which forms a one-line block, this break might
12796 $want_previous_breakpoint = $i;
12797 } ## end if ( $next_nonblank_type...)
12798 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
12800 # Break before attributes if user broke there
12801 if ($rOpts_break_at_old_attribute_breakpoints) {
12802 if ( $next_nonblank_type eq 'A' ) {
12803 $want_previous_breakpoint = $i;
12807 # remember an = break as possible good break point
12808 if ( $is_assignment{$type} ) {
12809 $i_old_assignment_break = $i;
12811 elsif ( $is_assignment{$next_nonblank_type} ) {
12812 $i_old_assignment_break = $i_next_nonblank;
12814 } ## end if ( $old_breakpoint_to_go...)
12816 next if ( $type eq 'b' );
12817 $depth = $nesting_depth_to_go[ $i + 1 ];
12819 $total_depth_variation += abs( $depth - $depth_last );
12820 $depth_last = $depth;
12822 # safety check - be sure we always break after a comment
12823 # Shouldn't happen .. an error here probably means that the
12824 # nobreak flag did not get turned off correctly during
12826 if ( $type eq '#' ) {
12827 if ( $i != $max_index_to_go ) {
12829 "Non-fatal program bug: backup logic needed to break after a comment\n"
12831 report_definite_bug();
12832 $nobreak_to_go[$i] = 0;
12833 set_forced_breakpoint($i);
12834 } ## end if ( $i != $max_index_to_go)
12835 } ## end if ( $type eq '#' )
12837 # Force breakpoints at certain tokens in long lines.
12838 # Note that such breakpoints will be undone later if these tokens
12839 # are fully contained within parens on a line.
12842 # break before a keyword within a line
12846 # if one of these keywords:
12847 && $token =~ /^(if|unless|while|until|for)$/
12849 # but do not break at something like '1 while'
12850 && ( $last_nonblank_type ne 'n' || $i > 2 )
12852 # and let keywords follow a closing 'do' brace
12853 && $last_nonblank_block_type ne 'do'
12858 # or container is broken (by side-comment, etc)
12859 || ( $next_nonblank_token eq '('
12860 && $mate_index_to_go[$i_next_nonblank] < $i )
12864 set_forced_breakpoint( $i - 1 );
12865 } ## end if ( $type eq 'k' && $i...)
12867 # remember locations of -> if this is a pre-broken method chain
12868 if ( $type eq '->' ) {
12869 if ($rOpts_break_at_old_method_breakpoints) {
12871 # Case 1: look for lines with leading pointers
12872 if ( $i == $i_line_start ) {
12873 set_forced_breakpoint( $i - 1 );
12876 # Case 2: look for cuddled pointer calls
12879 # look for old lines with leading ')->' or ') ->'
12880 # and, when found, force a break before the
12881 # opening paren and after the previous closing paren.
12883 $types_to_go[$i_line_start] eq '}'
12884 && ( $i == $i_line_start + 1
12885 || $i == $i_line_start + 2
12886 && $types_to_go[ $i - 1 ] eq 'b' )
12889 set_forced_breakpoint( $i_line_start - 1 );
12890 set_forced_breakpoint(
12891 $mate_index_to_go[$i_line_start] );
12895 } ## end if ( $type eq '->' )
12897 # remember locations of '||' and '&&' for possible breaks if we
12898 # decide this is a long logical expression.
12899 elsif ( $type eq '||' ) {
12900 push @{ $rand_or_list[$depth][2] }, $i;
12901 ++$has_old_logical_breakpoints[$depth]
12902 if ( ( $i == $i_line_start || $i == $i_line_end )
12903 && $rOpts_break_at_old_logical_breakpoints );
12904 } ## end elsif ( $type eq '||' )
12905 elsif ( $type eq '&&' ) {
12906 push @{ $rand_or_list[$depth][3] }, $i;
12907 ++$has_old_logical_breakpoints[$depth]
12908 if ( ( $i == $i_line_start || $i == $i_line_end )
12909 && $rOpts_break_at_old_logical_breakpoints );
12910 } ## end elsif ( $type eq '&&' )
12911 elsif ( $type eq 'f' ) {
12912 push @{ $rfor_semicolon_list[$depth] }, $i;
12914 elsif ( $type eq 'k' ) {
12915 if ( $token eq 'and' ) {
12916 push @{ $rand_or_list[$depth][1] }, $i;
12917 ++$has_old_logical_breakpoints[$depth]
12918 if ( ( $i == $i_line_start || $i == $i_line_end )
12919 && $rOpts_break_at_old_logical_breakpoints );
12920 } ## end if ( $token eq 'and' )
12922 # break immediately at 'or's which are probably not in a logical
12923 # block -- but we will break in logical breaks below so that
12924 # they do not add to the forced_breakpoint_count
12925 elsif ( $token eq 'or' ) {
12926 push @{ $rand_or_list[$depth][0] }, $i;
12927 ++$has_old_logical_breakpoints[$depth]
12928 if ( ( $i == $i_line_start || $i == $i_line_end )
12929 && $rOpts_break_at_old_logical_breakpoints );
12930 if ( $is_logical_container{ $container_type[$depth] } ) {
12933 if ($is_long_line) { set_forced_breakpoint($i) }
12934 elsif ( ( $i == $i_line_start || $i == $i_line_end )
12935 && $rOpts_break_at_old_logical_breakpoints )
12937 $saw_good_breakpoint = 1;
12939 } ## end else [ if ( $is_logical_container...)]
12940 } ## end elsif ( $token eq 'or' )
12941 elsif ( $token eq 'if' || $token eq 'unless' ) {
12942 push @{ $rand_or_list[$depth][4] }, $i;
12943 if ( ( $i == $i_line_start || $i == $i_line_end )
12944 && $rOpts_break_at_old_logical_breakpoints )
12946 set_forced_breakpoint($i);
12948 } ## end elsif ( $token eq 'if' ||...)
12949 } ## end elsif ( $type eq 'k' )
12950 elsif ( $is_assignment{$type} ) {
12951 $i_equals[$depth] = $i;
12954 if ($type_sequence) {
12956 # handle any postponed closing breakpoints
12957 if ( $token =~ /^[\)\]\}\:]$/ ) {
12958 if ( $type eq ':' ) {
12959 $last_colon_sequence_number = $type_sequence;
12961 # retain break at a ':' line break
12962 if ( ( $i == $i_line_start || $i == $i_line_end )
12963 && $rOpts_break_at_old_ternary_breakpoints )
12966 set_forced_breakpoint($i);
12968 # break at previous '='
12969 if ( $i_equals[$depth] > 0 ) {
12970 set_forced_breakpoint( $i_equals[$depth] );
12971 $i_equals[$depth] = -1;
12973 } ## end if ( ( $i == $i_line_start...))
12974 } ## end if ( $type eq ':' )
12975 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
12976 my $inc = ( $type eq ':' ) ? 0 : 1;
12977 set_forced_breakpoint( $i - $inc );
12978 delete $postponed_breakpoint{$type_sequence};
12980 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
12982 # set breaks at ?/: if they will get separated (and are
12983 # not a ?/: chain), or if the '?' is at the end of the
12985 elsif ( $token eq '?' ) {
12986 my $i_colon = $mate_index_to_go[$i];
12988 $i_colon <= 0 # the ':' is not in this batch
12989 || $i == 0 # this '?' is the first token of the line
12991 $max_index_to_go # or this '?' is the last token
12995 # don't break at a '?' if preceded by ':' on
12996 # this line of previous ?/: pair on this line.
12997 # This is an attempt to preserve a chain of ?/:
12998 # expressions (elsif2.t). And don't break if
12999 # this has a side comment.
13000 set_forced_breakpoint($i)
13002 $type_sequence == (
13003 $last_colon_sequence_number +
13004 TYPE_SEQUENCE_INCREMENT
13006 || $tokens_to_go[$max_index_to_go] eq '#'
13008 set_closing_breakpoint($i);
13009 } ## end if ( $i_colon <= 0 ||...)
13010 } ## end elsif ( $token eq '?' )
13011 } ## end if ($type_sequence)
13013 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
13015 #------------------------------------------------------------
13016 # Handle Increasing Depth..
13018 # prepare for a new list when depth increases
13019 # token $i is a '(','{', or '['
13020 #------------------------------------------------------------
13021 if ( $depth > $current_depth ) {
13023 $breakpoint_stack[$depth] = $forced_breakpoint_count;
13024 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
13025 $has_broken_sublist[$depth] = 0;
13026 $identifier_count_stack[$depth] = 0;
13027 $index_before_arrow[$depth] = -1;
13028 $interrupted_list[$depth] = 0;
13029 $item_count_stack[$depth] = 0;
13030 $last_comma_index[$depth] = undef;
13031 $last_dot_index[$depth] = undef;
13032 $last_nonblank_type[$depth] = $last_nonblank_type;
13033 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
13034 $opening_structure_index_stack[$depth] = $i;
13035 $rand_or_list[$depth] = [];
13036 $rfor_semicolon_list[$depth] = [];
13037 $i_equals[$depth] = -1;
13038 $want_comma_break[$depth] = 0;
13039 $container_type[$depth] =
13040 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13041 ? $last_nonblank_token
13043 $has_old_logical_breakpoints[$depth] = 0;
13045 # if line ends here then signal closing token to break
13046 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13048 set_closing_breakpoint($i);
13051 # Not all lists of values should be vertically aligned..
13052 $dont_align[$depth] =
13054 # code BLOCKS are handled at a higher level
13055 ( $block_type ne "" )
13057 # certain paren lists
13058 || ( $type eq '(' ) && (
13060 # it does not usually look good to align a list of
13061 # identifiers in a parameter list, as in:
13062 # my($var1, $var2, ...)
13063 # (This test should probably be refined, for now I'm just
13064 # testing for any keyword)
13065 ( $last_nonblank_type eq 'k' )
13067 # a trailing '(' usually indicates a non-list
13068 || ( $next_nonblank_type eq '(' )
13071 # patch to outdent opening brace of long if/for/..
13072 # statements (like this one). See similar coding in
13073 # set_continuation breaks. We have also catch it here for
13074 # short line fragments which otherwise will not go through
13075 # set_continuation_breaks.
13079 # if we have the ')' but not its '(' in this batch..
13080 && ( $last_nonblank_token eq ')' )
13081 && $mate_index_to_go[$i_last_nonblank_token] < 0
13083 # and user wants brace to left
13084 && !$rOpts->{'opening-brace-always-on-right'}
13086 && ( $type eq '{' ) # should be true
13087 && ( $token eq '{' ) # should be true
13090 set_forced_breakpoint( $i - 1 );
13091 } ## end if ( $block_type && ( ...))
13092 } ## end if ( $depth > $current_depth)
13094 #------------------------------------------------------------
13095 # Handle Decreasing Depth..
13097 # finish off any old list when depth decreases
13098 # token $i is a ')','}', or ']'
13099 #------------------------------------------------------------
13100 elsif ( $depth < $current_depth ) {
13102 check_for_new_minimum_depth($depth);
13104 # force all outer logical containers to break after we see on
13106 $has_old_logical_breakpoints[$depth] ||=
13107 $has_old_logical_breakpoints[$current_depth];
13109 # Patch to break between ') {' if the paren list is broken.
13110 # There is similar logic in set_continuation_breaks for
13111 # non-broken lists.
13113 && $next_nonblank_block_type
13114 && $interrupted_list[$current_depth]
13115 && $next_nonblank_type eq '{'
13116 && !$rOpts->{'opening-brace-always-on-right'} )
13118 set_forced_breakpoint($i);
13119 } ## end if ( $token eq ')' && ...
13121 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
13123 # set breaks at commas if necessary
13124 my ( $bp_count, $do_not_break_apart ) =
13125 set_comma_breakpoints($current_depth);
13127 my $i_opening = $opening_structure_index_stack[$current_depth];
13128 my $saw_opening_structure = ( $i_opening >= 0 );
13130 # this term is long if we had to break at interior commas..
13131 my $is_long_term = $bp_count > 0;
13133 # If this is a short container with one or more comma arrows,
13134 # then we will mark it as a long term to open it if requested.
13135 # $rOpts_comma_arrow_breakpoints =
13136 # 0 - open only if comma precedes closing brace
13137 # 1 - stable: except for one line blocks
13138 # 2 - try to form 1 line blocks
13140 # 4 - always open up if vt=0
13141 # 5 - stable: even for one line blocks if vt=0
13142 if ( !$is_long_term
13143 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
13144 && $index_before_arrow[ $depth + 1 ] > 0
13145 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
13148 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
13149 || ( $rOpts_comma_arrow_breakpoints == 0
13150 && $last_nonblank_token eq ',' )
13151 || ( $rOpts_comma_arrow_breakpoints == 5
13152 && $old_breakpoint_to_go[$i_opening] );
13153 } ## end if ( !$is_long_term &&...)
13155 # mark term as long if the length between opening and closing
13156 # parens exceeds allowed line length
13157 if ( !$is_long_term && $saw_opening_structure ) {
13158 my $i_opening_minus = find_token_starting_list($i_opening);
13160 # Note: we have to allow for one extra space after a
13161 # closing token so that we do not strand a comma or
13162 # semicolon, hence the '>=' here (oneline.t)
13163 # Note: we ignore left weld lengths here for best results
13165 excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
13166 } ## end if ( !$is_long_term &&...)
13168 # We've set breaks after all comma-arrows. Now we have to
13169 # undo them if this can be a one-line block
13170 # (the only breakpoints set will be due to comma-arrows)
13173 # user doesn't require breaking after all comma-arrows
13174 ( $rOpts_comma_arrow_breakpoints != 0 )
13175 && ( $rOpts_comma_arrow_breakpoints != 4 )
13177 # and if the opening structure is in this batch
13178 && $saw_opening_structure
13180 # and either on the same old line
13182 $old_breakpoint_count_stack[$current_depth] ==
13183 $last_old_breakpoint_count
13185 # or user wants to form long blocks with arrows
13186 || $rOpts_comma_arrow_breakpoints == 2
13189 # and we made some breakpoints between the opening and closing
13190 && ( $breakpoint_undo_stack[$current_depth] <
13191 $forced_breakpoint_undo_count )
13193 # and this block is short enough to fit on one line
13194 # Note: use < because need 1 more space for possible comma
13199 undo_forced_breakpoint_stack(
13200 $breakpoint_undo_stack[$current_depth] );
13201 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
13203 # now see if we have any comma breakpoints left
13204 my $has_comma_breakpoints =
13205 ( $breakpoint_stack[$current_depth] !=
13206 $forced_breakpoint_count );
13208 # update broken-sublist flag of the outer container
13209 $has_broken_sublist[$depth] =
13210 $has_broken_sublist[$depth]
13211 || $has_broken_sublist[$current_depth]
13213 || $has_comma_breakpoints;
13215 # Having come to the closing ')', '}', or ']', now we have to decide if we
13216 # should 'open up' the structure by placing breaks at the opening and
13217 # closing containers. This is a tricky decision. Here are some of the
13218 # basic considerations:
13220 # -If this is a BLOCK container, then any breakpoints will have already
13221 # been set (and according to user preferences), so we need do nothing here.
13223 # -If we have a comma-separated list for which we can align the list items,
13224 # then we need to do so because otherwise the vertical aligner cannot
13225 # currently do the alignment.
13227 # -If this container does itself contain a container which has been broken
13228 # open, then it should be broken open to properly show the structure.
13230 # -If there is nothing to align, and no other reason to break apart,
13231 # then do not do it.
13233 # We will not break open the parens of a long but 'simple' logical expression.
13236 # This is an example of a simple logical expression and its formatting:
13238 # if ( $bigwasteofspace1 && $bigwasteofspace2
13239 # || $bigwasteofspace3 && $bigwasteofspace4 )
13241 # Most people would prefer this than the 'spacey' version:
13244 # $bigwasteofspace1 && $bigwasteofspace2
13245 # || $bigwasteofspace3 && $bigwasteofspace4
13248 # To illustrate the rules for breaking logical expressions, consider:
13252 # and ( exists $ids_excl_uc{$id_uc}
13253 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
13255 # This is on the verge of being difficult to read. The current default is to
13256 # open it up like this:
13261 # and ( exists $ids_excl_uc{$id_uc}
13262 # or grep $id_uc =~ /$_/, @ids_excl_uc )
13265 # This is a compromise which tries to avoid being too dense and to spacey.
13266 # A more spaced version would be:
13272 # exists $ids_excl_uc{$id_uc}
13273 # or grep $id_uc =~ /$_/, @ids_excl_uc
13277 # Some people might prefer the spacey version -- an option could be added. The
13278 # innermost expression contains a long block '( exists $ids_... ')'.
13280 # Here is how the logic goes: We will force a break at the 'or' that the
13281 # innermost expression contains, but we will not break apart its opening and
13282 # closing containers because (1) it contains no multi-line sub-containers itself,
13283 # and (2) there is no alignment to be gained by breaking it open like this
13286 # exists $ids_excl_uc{$id_uc}
13287 # or grep $id_uc =~ /$_/, @ids_excl_uc
13290 # (although this looks perfectly ok and might be good for long expressions). The
13291 # outer 'if' container, though, contains a broken sub-container, so it will be
13292 # broken open to avoid too much density. Also, since it contains no 'or's, there
13293 # will be a forced break at its 'and'.
13295 # set some flags telling something about this container..
13296 my $is_simple_logical_expression = 0;
13297 if ( $item_count_stack[$current_depth] == 0
13298 && $saw_opening_structure
13299 && $tokens_to_go[$i_opening] eq '('
13300 && $is_logical_container{ $container_type[$current_depth] }
13304 # This seems to be a simple logical expression with
13305 # no existing breakpoints. Set a flag to prevent
13307 if ( !$has_comma_breakpoints ) {
13308 $is_simple_logical_expression = 1;
13311 # This seems to be a simple logical expression with
13312 # breakpoints (broken sublists, for example). Break
13313 # at all 'or's and '||'s.
13315 set_logical_breakpoints($current_depth);
13317 } ## end if ( $item_count_stack...)
13320 && @{ $rfor_semicolon_list[$current_depth] } )
13322 set_for_semicolon_breakpoints($current_depth);
13324 # open up a long 'for' or 'foreach' container to allow
13325 # leading term alignment unless -lp is used.
13326 $has_comma_breakpoints = 1
13327 unless $rOpts_line_up_parentheses;
13328 } ## end if ( $is_long_term && ...)
13332 # breaks for code BLOCKS are handled at a higher level
13335 # we do not need to break at the top level of an 'if'
13337 && !$is_simple_logical_expression
13339 ## modification to keep ': (' containers vertically tight;
13340 ## but probably better to let user set -vt=1 to avoid
13341 ## inconsistency with other paren types
13342 ## && ($container_type[$current_depth] ne ':')
13344 # otherwise, we require one of these reasons for breaking:
13347 # - this term has forced line breaks
13348 $has_comma_breakpoints
13350 # - the opening container is separated from this batch
13351 # for some reason (comment, blank line, code block)
13352 # - this is a non-paren container spanning multiple lines
13353 || !$saw_opening_structure
13355 # - this is a long block contained in another breakable
13358 && $container_environment_to_go[$i_opening] ne
13364 # For -lp option, we must put a breakpoint before
13365 # the token which has been identified as starting
13366 # this indentation level. This is necessary for
13367 # proper alignment.
13368 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
13370 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
13371 if ( $i_opening + 1 < $max_index_to_go
13372 && $types_to_go[ $i_opening + 1 ] eq 'b' )
13374 $item = $leading_spaces_to_go[ $i_opening + 2 ];
13376 if ( defined($item) ) {
13377 my $i_start_2 = $item->get_starting_index();
13379 defined($i_start_2)
13381 # we are breaking after an opening brace, paren,
13382 # so don't break before it too
13383 && $i_start_2 ne $i_opening
13387 # Only break for breakpoints at the same
13388 # indentation level as the opening paren
13389 my $test1 = $nesting_depth_to_go[$i_opening];
13390 my $test2 = $nesting_depth_to_go[$i_start_2];
13391 if ( $test2 == $test1 ) {
13392 set_forced_breakpoint( $i_start_2 - 1 );
13394 } ## end if ( defined($i_start_2...))
13395 } ## end if ( defined($item) )
13396 } ## end if ( $rOpts_line_up_parentheses...)
13398 # break after opening structure.
13399 # note: break before closing structure will be automatic
13400 if ( $minimum_depth <= $current_depth ) {
13402 set_forced_breakpoint($i_opening)
13403 unless ( $do_not_break_apart
13404 || is_unbreakable_container($current_depth) );
13406 # break at ',' of lower depth level before opening token
13407 if ( $last_comma_index[$depth] ) {
13408 set_forced_breakpoint( $last_comma_index[$depth] );
13411 # break at '.' of lower depth level before opening token
13412 if ( $last_dot_index[$depth] ) {
13413 set_forced_breakpoint( $last_dot_index[$depth] );
13416 # break before opening structure if preceded by another
13417 # closing structure and a comma. This is normally
13418 # done by the previous closing brace, but not
13419 # if it was a one-line block.
13420 if ( $i_opening > 2 ) {
13422 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
13426 if ( $types_to_go[$i_prev] eq ','
13427 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
13429 set_forced_breakpoint($i_prev);
13432 # also break before something like ':(' or '?('
13435 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
13437 my $token_prev = $tokens_to_go[$i_prev];
13438 if ( $want_break_before{$token_prev} ) {
13439 set_forced_breakpoint($i_prev);
13441 } ## end elsif ( $types_to_go[$i_prev...])
13442 } ## end if ( $i_opening > 2 )
13443 } ## end if ( $minimum_depth <=...)
13445 # break after comma following closing structure
13446 if ( $next_type eq ',' ) {
13447 set_forced_breakpoint( $i + 1 );
13450 # break before an '=' following closing structure
13452 $is_assignment{$next_nonblank_type}
13453 && ( $breakpoint_stack[$current_depth] !=
13454 $forced_breakpoint_count )
13457 set_forced_breakpoint($i);
13458 } ## end if ( $is_assignment{$next_nonblank_type...})
13460 # break at any comma before the opening structure Added
13461 # for -lp, but seems to be good in general. It isn't
13462 # obvious how far back to look; the '5' below seems to
13463 # work well and will catch the comma in something like
13464 # push @list, myfunc( $param, $param, ..
13466 my $icomma = $last_comma_index[$depth];
13467 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
13468 unless ( $forced_breakpoint_to_go[$icomma] ) {
13469 set_forced_breakpoint($icomma);
13472 } # end logic to open up a container
13474 # Break open a logical container open if it was already open
13475 elsif ($is_simple_logical_expression
13476 && $has_old_logical_breakpoints[$current_depth] )
13478 set_logical_breakpoints($current_depth);
13481 # Handle long container which does not get opened up
13482 elsif ($is_long_term) {
13484 # must set fake breakpoint to alert outer containers that
13486 set_fake_breakpoint();
13487 } ## end elsif ($is_long_term)
13489 } ## end elsif ( $depth < $current_depth)
13491 #------------------------------------------------------------
13492 # Handle this token
13493 #------------------------------------------------------------
13495 $current_depth = $depth;
13497 # handle comma-arrow
13498 if ( $type eq '=>' ) {
13499 next if ( $last_nonblank_type eq '=>' );
13500 next if $rOpts_break_at_old_comma_breakpoints;
13501 next if $rOpts_comma_arrow_breakpoints == 3;
13502 $want_comma_break[$depth] = 1;
13503 $index_before_arrow[$depth] = $i_last_nonblank_token;
13505 } ## end if ( $type eq '=>' )
13507 elsif ( $type eq '.' ) {
13508 $last_dot_index[$depth] = $i;
13511 # Turn off alignment if we are sure that this is not a list
13512 # environment. To be safe, we will do this if we see certain
13513 # non-list tokens, such as ';', and also the environment is
13514 # not a list. Note that '=' could be in any of the = operators
13515 # (lextest.t). We can't just use the reported environment
13516 # because it can be incorrect in some cases.
13517 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
13518 && $container_environment_to_go[$i] ne 'LIST' )
13520 $dont_align[$depth] = 1;
13521 $want_comma_break[$depth] = 0;
13522 $index_before_arrow[$depth] = -1;
13523 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
13525 # now just handle any commas
13526 next unless ( $type eq ',' );
13528 $last_dot_index[$depth] = undef;
13529 $last_comma_index[$depth] = $i;
13531 # break here if this comma follows a '=>'
13532 # but not if there is a side comment after the comma
13533 if ( $want_comma_break[$depth] ) {
13535 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
13536 if ($rOpts_comma_arrow_breakpoints) {
13537 $want_comma_break[$depth] = 0;
13542 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13544 # break before the previous token if it looks safe
13545 # Example of something that we will not try to break before:
13546 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
13547 # Also we don't want to break at a binary operator (like +):
13551 # $y - $R, -fill => 'black',
13553 my $ibreak = $index_before_arrow[$depth] - 1;
13555 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
13557 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
13558 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
13559 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
13561 # don't break pointer calls, such as the following:
13562 # File::Spec->curdir => 1,
13563 # (This is tokenized as adjacent 'w' tokens)
13564 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
13566 # And don't break before a comma, as in the following:
13567 # ( LONGER_THAN,=> 1,
13568 # EIGHTY_CHARACTERS,=> 2,
13569 # CAUSES_FORMATTING,=> 3,
13572 # This example is for -tso but should be general rule
13573 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
13574 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
13576 set_forced_breakpoint($ibreak);
13578 } ## end if ( $types_to_go[$ibreak...])
13579 } ## end if ( $ibreak > 0 && $tokens_to_go...)
13581 $want_comma_break[$depth] = 0;
13582 $index_before_arrow[$depth] = -1;
13584 # handle list which mixes '=>'s and ','s:
13585 # treat any list items so far as an interrupted list
13586 $interrupted_list[$depth] = 1;
13588 } ## end if ( $want_comma_break...)
13590 # break after all commas above starting depth
13591 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
13592 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13596 # add this comma to the list..
13597 my $item_count = $item_count_stack[$depth];
13598 if ( $item_count == 0 ) {
13600 # but do not form a list with no opening structure
13603 # open INFILE_COPY, ">$input_file_copy"
13604 # or die ("very long message");
13606 if ( ( $opening_structure_index_stack[$depth] < 0 )
13607 && $container_environment_to_go[$i] eq 'BLOCK' )
13609 $dont_align[$depth] = 1;
13611 } ## end if ( $item_count == 0 )
13613 $comma_index[$depth][$item_count] = $i;
13614 ++$item_count_stack[$depth];
13615 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
13616 $identifier_count_stack[$depth]++;
13618 } ## end while ( ++$i <= $max_index_to_go)
13620 #-------------------------------------------
13621 # end of loop over all tokens in this batch
13622 #-------------------------------------------
13624 # set breaks for any unfinished lists ..
13625 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
13627 $interrupted_list[$dd] = 1;
13628 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
13629 set_comma_breakpoints($dd);
13630 set_logical_breakpoints($dd)
13631 if ( $has_old_logical_breakpoints[$dd] );
13632 set_for_semicolon_breakpoints($dd);
13634 # break open container...
13635 my $i_opening = $opening_structure_index_stack[$dd];
13636 set_forced_breakpoint($i_opening)
13638 is_unbreakable_container($dd)
13640 # Avoid a break which would place an isolated ' or "
13643 && $i_opening >= $max_index_to_go - 2
13644 && $token =~ /^['"]$/ )
13646 } ## end for ( my $dd = $current_depth...)
13648 # Return a flag indicating if the input file had some good breakpoints.
13649 # This flag will be used to force a break in a line shorter than the
13650 # allowed line length.
13651 if ( $has_old_logical_breakpoints[$current_depth] ) {
13652 $saw_good_breakpoint = 1;
13655 # A complex line with one break at an = has a good breakpoint.
13656 # This is not complex ($total_depth_variation=0):
13660 # This is complex ($total_depth_variation=6):
13662 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
13663 elsif ($i_old_assignment_break
13664 && $total_depth_variation > 4
13665 && $old_breakpoint_count == 1 )
13667 $saw_good_breakpoint = 1;
13668 } ## end elsif ( $i_old_assignment_break...)
13670 return $saw_good_breakpoint;
13671 } ## end sub scan_list
13674 sub find_token_starting_list {
13676 # When testing to see if a block will fit on one line, some
13677 # previous token(s) may also need to be on the line; particularly
13678 # if this is a sub call. So we will look back at least one
13679 # token. NOTE: This isn't perfect, but not critical, because
13680 # if we mis-identify a block, it will be wrapped and therefore
13681 # fixed the next time it is formatted.
13682 my $i_opening_paren = shift;
13683 my $i_opening_minus = $i_opening_paren;
13684 my $im1 = $i_opening_paren - 1;
13685 my $im2 = $i_opening_paren - 2;
13686 my $im3 = $i_opening_paren - 3;
13687 my $typem1 = $types_to_go[$im1];
13688 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
13690 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
13691 $i_opening_minus = $i_opening_paren;
13693 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
13694 $i_opening_minus = $im1 if $im1 >= 0;
13696 # walk back to improve length estimate
13697 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
13698 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
13699 $i_opening_minus = $j;
13701 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
13703 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
13704 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
13705 $i_opening_minus = $im2;
13707 return $i_opening_minus;
13710 { # begin set_comma_breakpoints_do
13712 my %is_keyword_with_special_leading_term;
13716 # These keywords have prototypes which allow a special leading item
13717 # followed by a list
13719 qw(formline grep kill map printf sprintf push chmod join pack unshift);
13720 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
13723 sub set_comma_breakpoints_do {
13725 # Given a list with some commas, set breakpoints at some of the
13726 # commas, if necessary, to make it easy to read. This list is
13729 $depth, $i_opening_paren, $i_closing_paren,
13730 $item_count, $identifier_count, $rcomma_index,
13731 $next_nonblank_type, $list_type, $interrupted,
13732 $rdo_not_break_apart, $must_break_open,
13735 # nothing to do if no commas seen
13736 return if ( $item_count < 1 );
13737 my $i_first_comma = $rcomma_index->[0];
13738 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
13739 my $i_last_comma = $i_true_last_comma;
13740 if ( $i_last_comma >= $max_index_to_go ) {
13741 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
13742 return if ( $item_count < 1 );
13745 #---------------------------------------------------------------
13746 # find lengths of all items in the list to calculate page layout
13747 #---------------------------------------------------------------
13748 my $comma_count = $item_count;
13754 my @max_length = ( 0, 0 );
13755 my $first_term_length;
13756 my $i = $i_opening_paren;
13759 foreach my $j ( 0 .. $comma_count - 1 ) {
13760 $is_odd = 1 - $is_odd;
13761 $i_prev_plus = $i + 1;
13762 $i = $rcomma_index->[$j];
13765 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
13767 ( $types_to_go[$i_prev_plus] eq 'b' )
13770 push @i_term_begin, $i_term_begin;
13771 push @i_term_end, $i_term_end;
13772 push @i_term_comma, $i;
13774 # note: currently adding 2 to all lengths (for comma and space)
13776 2 + token_sequence_length( $i_term_begin, $i_term_end );
13777 push @item_lengths, $length;
13780 $first_term_length = $length;
13784 if ( $length > $max_length[$is_odd] ) {
13785 $max_length[$is_odd] = $length;
13790 # now we have to make a distinction between the comma count and item
13791 # count, because the item count will be one greater than the comma
13792 # count if the last item is not terminated with a comma
13794 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
13795 ? $i_last_comma + 1
13798 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
13799 ? $i_closing_paren - 2
13800 : $i_closing_paren - 1;
13801 my $i_effective_last_comma = $i_last_comma;
13803 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
13805 if ( $last_item_length > 0 ) {
13807 # add 2 to length because other lengths include a comma and a blank
13808 $last_item_length += 2;
13809 push @item_lengths, $last_item_length;
13810 push @i_term_begin, $i_b + 1;
13811 push @i_term_end, $i_e;
13812 push @i_term_comma, undef;
13814 my $i_odd = $item_count % 2;
13816 if ( $last_item_length > $max_length[$i_odd] ) {
13817 $max_length[$i_odd] = $last_item_length;
13821 $i_effective_last_comma = $i_e + 1;
13823 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
13824 $identifier_count++;
13828 #---------------------------------------------------------------
13829 # End of length calculations
13830 #---------------------------------------------------------------
13832 #---------------------------------------------------------------
13833 # Compound List Rule 1:
13834 # Break at (almost) every comma for a list containing a broken
13835 # sublist. This has higher priority than the Interrupted List
13837 #---------------------------------------------------------------
13838 if ( $has_broken_sublist[$depth] ) {
13840 # Break at every comma except for a comma between two
13841 # simple, small terms. This prevents long vertical
13842 # columns of, say, just 0's.
13843 my $small_length = 10; # 2 + actual maximum length wanted
13845 # We'll insert a break in long runs of small terms to
13846 # allow alignment in uniform tables.
13847 my $skipped_count = 0;
13848 my $columns = table_columns_available($i_first_comma);
13849 my $fields = int( $columns / $small_length );
13850 if ( $rOpts_maximum_fields_per_table
13851 && $fields > $rOpts_maximum_fields_per_table )
13853 $fields = $rOpts_maximum_fields_per_table;
13855 my $max_skipped_count = $fields - 1;
13857 my $is_simple_last_term = 0;
13858 my $is_simple_next_term = 0;
13859 foreach my $j ( 0 .. $item_count ) {
13860 $is_simple_last_term = $is_simple_next_term;
13861 $is_simple_next_term = 0;
13862 if ( $j < $item_count
13863 && $i_term_end[$j] == $i_term_begin[$j]
13864 && $item_lengths[$j] <= $small_length )
13866 $is_simple_next_term = 1;
13869 if ( $is_simple_last_term
13870 && $is_simple_next_term
13871 && $skipped_count < $max_skipped_count )
13876 $skipped_count = 0;
13877 my $i = $i_term_comma[ $j - 1 ];
13878 last unless defined $i;
13879 set_forced_breakpoint($i);
13883 # always break at the last comma if this list is
13884 # interrupted; we wouldn't want to leave a terminal '{', for
13886 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
13890 #my ( $a, $b, $c ) = caller();
13891 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
13892 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
13893 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
13895 #---------------------------------------------------------------
13896 # Interrupted List Rule:
13897 # A list is forced to use old breakpoints if it was interrupted
13898 # by side comments or blank lines, or requested by user.
13899 #---------------------------------------------------------------
13900 if ( $rOpts_break_at_old_comma_breakpoints
13902 || $i_opening_paren < 0 )
13904 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
13908 #---------------------------------------------------------------
13909 # Looks like a list of items. We have to look at it and size it up.
13910 #---------------------------------------------------------------
13912 my $opening_token = $tokens_to_go[$i_opening_paren];
13913 my $opening_environment =
13914 $container_environment_to_go[$i_opening_paren];
13916 #-------------------------------------------------------------------
13917 # Return if this will fit on one line
13918 #-------------------------------------------------------------------
13920 my $i_opening_minus = find_token_starting_list($i_opening_paren);
13922 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
13924 #-------------------------------------------------------------------
13925 # Now we know that this block spans multiple lines; we have to set
13926 # at least one breakpoint -- real or fake -- as a signal to break
13927 # open any outer containers.
13928 #-------------------------------------------------------------------
13929 set_fake_breakpoint();
13931 # be sure we do not extend beyond the current list length
13932 if ( $i_effective_last_comma >= $max_index_to_go ) {
13933 $i_effective_last_comma = $max_index_to_go - 1;
13936 # Set a flag indicating if we need to break open to keep -lp
13937 # items aligned. This is necessary if any of the list terms
13938 # exceeds the available space after the '('.
13939 my $need_lp_break_open = $must_break_open;
13940 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
13941 my $columns_if_unbroken =
13942 maximum_line_length($i_opening_minus) -
13943 total_line_length( $i_opening_minus, $i_opening_paren );
13944 $need_lp_break_open =
13945 ( $max_length[0] > $columns_if_unbroken )
13946 || ( $max_length[1] > $columns_if_unbroken )
13947 || ( $first_term_length > $columns_if_unbroken );
13950 # Specify if the list must have an even number of fields or not.
13951 # It is generally safest to assume an even number, because the
13952 # list items might be a hash list. But if we can be sure that
13953 # it is not a hash, then we can allow an odd number for more
13955 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
13957 if ( $identifier_count >= $item_count - 1
13958 || $is_assignment{$next_nonblank_type}
13959 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
13965 # do we have a long first term which should be
13966 # left on a line by itself?
13967 my $use_separate_first_term = (
13968 $odd_or_even == 1 # only if we can use 1 field/line
13969 && $item_count > 3 # need several items
13970 && $first_term_length >
13971 2 * $max_length[0] - 2 # need long first term
13972 && $first_term_length >
13973 2 * $max_length[1] - 2 # need long first term
13976 # or do we know from the type of list that the first term should
13978 if ( !$use_separate_first_term ) {
13979 if ( $is_keyword_with_special_leading_term{$list_type} ) {
13980 $use_separate_first_term = 1;
13982 # should the container be broken open?
13983 if ( $item_count < 3 ) {
13984 if ( $i_first_comma - $i_opening_paren < 4 ) {
13985 ${$rdo_not_break_apart} = 1;
13988 elsif ($first_term_length < 20
13989 && $i_first_comma - $i_opening_paren < 4 )
13991 my $columns = table_columns_available($i_first_comma);
13992 if ( $first_term_length < $columns ) {
13993 ${$rdo_not_break_apart} = 1;
14000 if ($use_separate_first_term) {
14002 # ..set a break and update starting values
14003 $use_separate_first_term = 1;
14004 set_forced_breakpoint($i_first_comma);
14005 $i_opening_paren = $i_first_comma;
14006 $i_first_comma = $rcomma_index->[1];
14008 return if $comma_count == 1;
14009 shift @item_lengths;
14010 shift @i_term_begin;
14012 shift @i_term_comma;
14015 # if not, update the metrics to include the first term
14017 if ( $first_term_length > $max_length[0] ) {
14018 $max_length[0] = $first_term_length;
14022 # Field width parameters
14023 my $pair_width = ( $max_length[0] + $max_length[1] );
14025 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14027 # Number of free columns across the page width for laying out tables
14028 my $columns = table_columns_available($i_first_comma);
14030 # Estimated maximum number of fields which fit this space
14031 # This will be our first guess
14032 my $number_of_fields_max =
14033 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14035 my $number_of_fields = $number_of_fields_max;
14037 # Find the best-looking number of fields
14038 # and make this our second guess if possible
14039 my ( $number_of_fields_best, $ri_ragged_break_list,
14040 $new_identifier_count )
14041 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14044 if ( $number_of_fields_best != 0
14045 && $number_of_fields_best < $number_of_fields_max )
14047 $number_of_fields = $number_of_fields_best;
14050 # ----------------------------------------------------------------------
14051 # If we are crowded and the -lp option is being used, try to
14052 # undo some indentation
14053 # ----------------------------------------------------------------------
14055 $rOpts_line_up_parentheses
14057 $number_of_fields == 0
14058 || ( $number_of_fields == 1
14059 && $number_of_fields != $number_of_fields_best )
14063 my $available_spaces = get_available_spaces_to_go($i_first_comma);
14064 if ( $available_spaces > 0 ) {
14066 my $spaces_wanted = $max_width - $columns; # for 1 field
14068 if ( $number_of_fields_best == 0 ) {
14069 $number_of_fields_best =
14070 get_maximum_fields_wanted( \@item_lengths );
14073 if ( $number_of_fields_best != 1 ) {
14074 my $spaces_wanted_2 =
14075 1 + $pair_width - $columns; # for 2 fields
14076 if ( $available_spaces > $spaces_wanted_2 ) {
14077 $spaces_wanted = $spaces_wanted_2;
14081 if ( $spaces_wanted > 0 ) {
14082 my $deleted_spaces =
14083 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14086 if ( $deleted_spaces > 0 ) {
14087 $columns = table_columns_available($i_first_comma);
14088 $number_of_fields_max =
14089 maximum_number_of_fields( $columns, $odd_or_even,
14090 $max_width, $pair_width );
14091 $number_of_fields = $number_of_fields_max;
14093 if ( $number_of_fields_best == 1
14094 && $number_of_fields >= 1 )
14096 $number_of_fields = $number_of_fields_best;
14103 # try for one column if two won't work
14104 if ( $number_of_fields <= 0 ) {
14105 $number_of_fields = int( $columns / $max_width );
14108 # The user can place an upper bound on the number of fields,
14109 # which can be useful for doing maintenance on tables
14110 if ( $rOpts_maximum_fields_per_table
14111 && $number_of_fields > $rOpts_maximum_fields_per_table )
14113 $number_of_fields = $rOpts_maximum_fields_per_table;
14116 # How many columns (characters) and lines would this container take
14117 # if no additional whitespace were added?
14118 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
14119 $i_effective_last_comma + 1 );
14120 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
14121 my $packed_lines = 1 + int( $packed_columns / $columns );
14123 # are we an item contained in an outer list?
14124 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
14126 if ( $number_of_fields <= 0 ) {
14128 # #---------------------------------------------------------------
14129 # # We're in trouble. We can't find a single field width that works.
14130 # # There is no simple answer here; we may have a single long list
14132 # #---------------------------------------------------------------
14134 # In many cases, it may be best to not force a break if there is just one
14135 # comma, because the standard continuation break logic will do a better
14138 # In the common case that all but one of the terms can fit
14139 # on a single line, it may look better not to break open the
14140 # containing parens. Consider, for example
14144 # sort { $color_value{$::a} <=> $color_value{$::b}; }
14147 # which will look like this with the container broken:
14151 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
14154 # Here is an example of this rule for a long last term:
14156 # log_message( 0, 256, 128,
14157 # "Number of routes in adj-RIB-in to be considered: $peercount" );
14159 # And here is an example with a long first term:
14162 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
14163 # $r, $pu, $ps, $cu, $cs, $tt
14165 # if $style eq 'all';
14167 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
14168 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
14169 my $long_first_term =
14170 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
14172 # break at every comma ...
14175 # if requested by user or is best looking
14176 $number_of_fields_best == 1
14178 # or if this is a sublist of a larger list
14179 || $in_hierarchical_list
14181 # or if multiple commas and we don't have a long first or last
14183 || ( $comma_count > 1
14184 && !( $long_last_term || $long_first_term ) )
14187 foreach ( 0 .. $comma_count - 1 ) {
14188 set_forced_breakpoint( $rcomma_index->[$_] );
14191 elsif ($long_last_term) {
14193 set_forced_breakpoint($i_last_comma);
14194 ${$rdo_not_break_apart} = 1 unless $must_break_open;
14196 elsif ($long_first_term) {
14198 set_forced_breakpoint($i_first_comma);
14202 # let breaks be defined by default bond strength logic
14207 # --------------------------------------------------------
14208 # We have a tentative field count that seems to work.
14209 # How many lines will this require?
14210 # --------------------------------------------------------
14211 my $formatted_lines = $item_count / ($number_of_fields);
14212 if ( $formatted_lines != int $formatted_lines ) {
14213 $formatted_lines = 1 + int $formatted_lines;
14216 # So far we've been trying to fill out to the right margin. But
14217 # compact tables are easier to read, so let's see if we can use fewer
14218 # fields without increasing the number of lines.
14219 $number_of_fields =
14220 compactify_table( $item_count, $number_of_fields, $formatted_lines,
14223 # How many spaces across the page will we fill?
14224 my $columns_per_line =
14225 ( int $number_of_fields / 2 ) * $pair_width +
14226 ( $number_of_fields % 2 ) * $max_width;
14228 my $formatted_columns;
14230 if ( $number_of_fields > 1 ) {
14231 $formatted_columns =
14232 ( $pair_width * ( int( $item_count / 2 ) ) +
14233 ( $item_count % 2 ) * $max_width );
14236 $formatted_columns = $max_width * $item_count;
14238 if ( $formatted_columns < $packed_columns ) {
14239 $formatted_columns = $packed_columns;
14242 my $unused_columns = $formatted_columns - $packed_columns;
14244 # set some empirical parameters to help decide if we should try to
14245 # align; high sparsity does not look good, especially with few lines
14246 my $sparsity = ($unused_columns) / ($formatted_columns);
14247 my $max_allowed_sparsity =
14248 ( $item_count < 3 ) ? 0.1
14249 : ( $packed_lines == 1 ) ? 0.15
14250 : ( $packed_lines == 2 ) ? 0.4
14253 # Begin check for shortcut methods, which avoid treating a list
14254 # as a table for relatively small parenthesized lists. These
14255 # are usually easier to read if not formatted as tables.
14257 $packed_lines <= 2 # probably can fit in 2 lines
14258 && $item_count < 9 # doesn't have too many items
14259 && $opening_environment eq 'BLOCK' # not a sub-container
14260 && $opening_token eq '(' # is paren list
14264 # Shortcut method 1: for -lp and just one comma:
14265 # This is a no-brainer, just break at the comma.
14267 $rOpts_line_up_parentheses # -lp
14268 && $item_count == 2 # two items, one comma
14269 && !$must_break_open
14272 my $i_break = $rcomma_index->[0];
14273 set_forced_breakpoint($i_break);
14274 ${$rdo_not_break_apart} = 1;
14275 set_non_alignment_flags( $comma_count, $rcomma_index );
14280 # method 2 is for most small ragged lists which might look
14281 # best if not displayed as a table.
14283 ( $number_of_fields == 2 && $item_count == 3 )
14285 $new_identifier_count > 0 # isn't all quotes
14286 && $sparsity > 0.15
14287 ) # would be fairly spaced gaps if aligned
14291 my $break_count = set_ragged_breakpoints( \@i_term_comma,
14292 $ri_ragged_break_list );
14293 ++$break_count if ($use_separate_first_term);
14295 # NOTE: we should really use the true break count here,
14296 # which can be greater if there are large terms and
14297 # little space, but usually this will work well enough.
14298 unless ($must_break_open) {
14300 if ( $break_count <= 1 ) {
14301 ${$rdo_not_break_apart} = 1;
14303 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14305 ${$rdo_not_break_apart} = 1;
14308 set_non_alignment_flags( $comma_count, $rcomma_index );
14312 } # end shortcut methods
14316 FORMATTER_DEBUG_FLAG_SPARSE && do {
14318 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
14322 #---------------------------------------------------------------
14323 # Compound List Rule 2:
14324 # If this list is too long for one line, and it is an item of a
14325 # larger list, then we must format it, regardless of sparsity
14326 # (ian.t). One reason that we have to do this is to trigger
14327 # Compound List Rule 1, above, which causes breaks at all commas of
14328 # all outer lists. In this way, the structure will be properly
14330 #---------------------------------------------------------------
14332 # Decide if this list is too long for one line unless broken
14333 my $total_columns = table_columns_available($i_opening_paren);
14334 my $too_long = $packed_columns > $total_columns;
14336 # For a paren list, include the length of the token just before the
14337 # '(' because this is likely a sub call, and we would have to
14338 # include the sub name on the same line as the list. This is still
14339 # imprecise, but not too bad. (steve.t)
14340 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
14342 $too_long = excess_line_length( $i_opening_minus,
14343 $i_effective_last_comma + 1 ) > 0;
14346 # FIXME: For an item after a '=>', try to include the length of the
14347 # thing before the '=>'. This is crude and should be improved by
14348 # actually looking back token by token.
14349 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
14350 my $i_opening_minus = $i_opening_paren - 4;
14351 if ( $i_opening_minus >= 0 ) {
14352 $too_long = excess_line_length( $i_opening_minus,
14353 $i_effective_last_comma + 1 ) > 0;
14357 # Always break lists contained in '[' and '{' if too long for 1 line,
14358 # and always break lists which are too long and part of a more complex
14360 my $must_break_open_container = $must_break_open
14362 && ( $in_hierarchical_list || $opening_token ne '(' ) );
14364 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
14366 #---------------------------------------------------------------
14367 # The main decision:
14368 # Now decide if we will align the data into aligned columns. Do not
14369 # attempt to align columns if this is a tiny table or it would be
14370 # too spaced. It seems that the more packed lines we have, the
14371 # sparser the list that can be allowed and still look ok.
14372 #---------------------------------------------------------------
14374 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
14375 || ( $formatted_lines < 2 )
14376 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
14380 #---------------------------------------------------------------
14381 # too sparse: would look ugly if aligned in a table;
14382 #---------------------------------------------------------------
14384 # use old breakpoints if this is a 'big' list
14385 # FIXME: goal is to improve set_ragged_breakpoints so that
14386 # this is not necessary.
14387 if ( $packed_lines > 2 && $item_count > 10 ) {
14388 write_logfile_entry("List sparse: using old breakpoints\n");
14389 copy_old_breakpoints( $i_first_comma, $i_last_comma );
14392 # let the continuation logic handle it if 2 lines
14395 my $break_count = set_ragged_breakpoints( \@i_term_comma,
14396 $ri_ragged_break_list );
14397 ++$break_count if ($use_separate_first_term);
14399 unless ($must_break_open_container) {
14400 if ( $break_count <= 1 ) {
14401 ${$rdo_not_break_apart} = 1;
14403 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14405 ${$rdo_not_break_apart} = 1;
14408 set_non_alignment_flags( $comma_count, $rcomma_index );
14413 #---------------------------------------------------------------
14414 # go ahead and format as a table
14415 #---------------------------------------------------------------
14416 write_logfile_entry(
14417 "List: auto formatting with $number_of_fields fields/row\n");
14419 my $j_first_break =
14420 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
14423 my $j = $j_first_break ;
14424 $j < $comma_count ;
14425 $j += $number_of_fields
14428 my $i = $rcomma_index->[$j];
14429 set_forced_breakpoint($i);
14435 sub set_non_alignment_flags {
14437 # set flag which indicates that these commas should not be
14439 my ( $comma_count, $rcomma_index ) = @_;
14440 foreach ( 0 .. $comma_count - 1 ) {
14441 $matching_token_to_go[ $rcomma_index->[$_] ] = 1;
14446 sub study_list_complexity {
14448 # Look for complex tables which should be formatted with one term per line.
14449 # Returns the following:
14451 # \@i_ragged_break_list = list of good breakpoints to avoid lines
14452 # which are hard to read
14453 # $number_of_fields_best = suggested number of fields based on
14454 # complexity; = 0 if any number may be used.
14456 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
14457 my $item_count = @{$ri_term_begin};
14458 my $complex_item_count = 0;
14459 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
14460 my $i_max = @{$ritem_lengths} - 1;
14461 ##my @item_complexity;
14463 my $i_last_last_break = -3;
14464 my $i_last_break = -2;
14465 my @i_ragged_break_list;
14467 my $definitely_complex = 30;
14468 my $definitely_simple = 12;
14469 my $quote_count = 0;
14471 for my $i ( 0 .. $i_max ) {
14472 my $ib = $ri_term_begin->[$i];
14473 my $ie = $ri_term_end->[$i];
14475 # define complexity: start with the actual term length
14476 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
14478 ##TBD: join types here and check for variations
14479 ##my $str=join "", @tokens_to_go[$ib..$ie];
14482 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
14486 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
14490 if ( $ib eq $ie ) {
14491 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
14492 $complex_item_count++;
14493 $weighted_length *= 2;
14499 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
14500 $complex_item_count++;
14501 $weighted_length *= 2;
14503 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
14504 $weighted_length += 4;
14508 # add weight for extra tokens.
14509 $weighted_length += 2 * ( $ie - $ib );
14511 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
14512 ## print "# COMPLEXITY:$weighted_length $BUB\n";
14514 ##push @item_complexity, $weighted_length;
14516 # now mark a ragged break after this item it if it is 'long and
14518 if ( $weighted_length >= $definitely_complex ) {
14520 # if we broke after the previous term
14521 # then break before it too
14522 if ( $i_last_break == $i - 1
14524 && $i_last_last_break != $i - 2 )
14527 ## FIXME: don't strand a small term
14528 pop @i_ragged_break_list;
14529 push @i_ragged_break_list, $i - 2;
14530 push @i_ragged_break_list, $i - 1;
14533 push @i_ragged_break_list, $i;
14534 $i_last_last_break = $i_last_break;
14535 $i_last_break = $i;
14538 # don't break before a small last term -- it will
14539 # not look good on a line by itself.
14540 elsif ($i == $i_max
14541 && $i_last_break == $i - 1
14542 && $weighted_length <= $definitely_simple )
14544 pop @i_ragged_break_list;
14548 my $identifier_count = $i_max + 1 - $quote_count;
14550 # Need more tuning here..
14551 if ( $max_width > 12
14552 && $complex_item_count > $item_count / 2
14553 && $number_of_fields_best != 2 )
14555 $number_of_fields_best = 1;
14558 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
14561 sub get_maximum_fields_wanted {
14563 # Not all tables look good with more than one field of items.
14564 # This routine looks at a table and decides if it should be
14565 # formatted with just one field or not.
14566 # This coding is still under development.
14567 my ($ritem_lengths) = @_;
14569 my $number_of_fields_best = 0;
14571 # For just a few items, we tentatively assume just 1 field.
14572 my $item_count = @{$ritem_lengths};
14573 if ( $item_count <= 5 ) {
14574 $number_of_fields_best = 1;
14577 # For larger tables, look at it both ways and see what looks best
14581 my @max_length = ( 0, 0 );
14582 my @last_length_2 = ( undef, undef );
14583 my @first_length_2 = ( undef, undef );
14584 my $last_length = undef;
14585 my $total_variation_1 = 0;
14586 my $total_variation_2 = 0;
14587 my @total_variation_2 = ( 0, 0 );
14589 foreach my $j ( 0 .. $item_count - 1 ) {
14591 $is_odd = 1 - $is_odd;
14592 my $length = $ritem_lengths->[$j];
14593 if ( $length > $max_length[$is_odd] ) {
14594 $max_length[$is_odd] = $length;
14597 if ( defined($last_length) ) {
14598 my $dl = abs( $length - $last_length );
14599 $total_variation_1 += $dl;
14601 $last_length = $length;
14603 my $ll = $last_length_2[$is_odd];
14604 if ( defined($ll) ) {
14605 my $dl = abs( $length - $ll );
14606 $total_variation_2[$is_odd] += $dl;
14609 $first_length_2[$is_odd] = $length;
14611 $last_length_2[$is_odd] = $length;
14613 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
14615 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
14616 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
14617 $number_of_fields_best = 1;
14620 return ($number_of_fields_best);
14623 sub table_columns_available {
14624 my $i_first_comma = shift;
14626 maximum_line_length($i_first_comma) -
14627 leading_spaces_to_go($i_first_comma);
14629 # Patch: the vertical formatter does not line up lines whose lengths
14630 # exactly equal the available line length because of allowances
14631 # that must be made for side comments. Therefore, the number of
14632 # available columns is reduced by 1 character.
14637 sub maximum_number_of_fields {
14639 # how many fields will fit in the available space?
14640 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
14641 my $max_pairs = int( $columns / $pair_width );
14642 my $number_of_fields = $max_pairs * 2;
14643 if ( $odd_or_even == 1
14644 && $max_pairs * $pair_width + $max_width <= $columns )
14646 $number_of_fields++;
14648 return $number_of_fields;
14651 sub compactify_table {
14653 # given a table with a certain number of fields and a certain number
14654 # of lines, see if reducing the number of fields will make it look
14656 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
14657 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
14661 $min_fields = $number_of_fields ;
14662 $min_fields >= $odd_or_even
14663 && $min_fields * $formatted_lines >= $item_count ;
14664 $min_fields -= $odd_or_even
14667 $number_of_fields = $min_fields;
14670 return $number_of_fields;
14673 sub set_ragged_breakpoints {
14675 # Set breakpoints in a list that cannot be formatted nicely as a
14677 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
14679 my $break_count = 0;
14680 foreach ( @{$ri_ragged_break_list} ) {
14681 my $j = $ri_term_comma->[$_];
14683 set_forced_breakpoint($j);
14687 return $break_count;
14690 sub copy_old_breakpoints {
14691 my ( $i_first_comma, $i_last_comma ) = @_;
14692 for my $i ( $i_first_comma .. $i_last_comma ) {
14693 if ( $old_breakpoint_to_go[$i] ) {
14694 set_forced_breakpoint($i);
14701 my ( $i, $j ) = @_;
14702 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
14704 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14705 my ( $a, $b, $c ) = caller();
14707 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
14710 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
14713 # shouldn't happen; non-critical error
14715 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14716 my ( $a, $b, $c ) = caller();
14718 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
14724 sub set_fake_breakpoint {
14726 # Just bump up the breakpoint count as a signal that there are breaks.
14727 # This is useful if we have breaks but may want to postpone deciding where
14729 $forced_breakpoint_count++;
14733 sub set_forced_breakpoint {
14736 return unless defined $i && $i >= 0;
14738 # no breaks between welded tokens
14739 return if ( weld_len_right_to_go($i) );
14741 # when called with certain tokens, use bond strengths to decide
14742 # if we break before or after it
14743 my $token = $tokens_to_go[$i];
14745 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
14746 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14749 # breaks are forced before 'if' and 'unless'
14750 elsif ( $is_if_unless{$token} ) { $i-- }
14752 if ( $i >= 0 && $i <= $max_index_to_go ) {
14753 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14755 FORMATTER_DEBUG_FLAG_FORCE && do {
14756 my ( $a, $b, $c ) = caller();
14758 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
14761 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
14762 $forced_breakpoint_to_go[$i_nonblank] = 1;
14764 if ( $i_nonblank > $index_max_forced_break ) {
14765 $index_max_forced_break = $i_nonblank;
14767 $forced_breakpoint_count++;
14768 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
14771 # if we break at an opening container..break at the closing
14772 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
14773 set_closing_breakpoint($i_nonblank);
14780 sub clear_breakpoint_undo_stack {
14781 $forced_breakpoint_undo_count = 0;
14785 sub undo_forced_breakpoint_stack {
14787 my $i_start = shift;
14788 if ( $i_start < 0 ) {
14790 my ( $a, $b, $c ) = caller();
14792 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
14796 while ( $forced_breakpoint_undo_count > $i_start ) {
14798 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14799 if ( $i >= 0 && $i <= $max_index_to_go ) {
14800 $forced_breakpoint_to_go[$i] = 0;
14801 $forced_breakpoint_count--;
14803 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14804 my ( $a, $b, $c ) = caller();
14806 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
14810 # shouldn't happen, but not a critical error
14812 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14813 my ( $a, $b, $c ) = caller();
14815 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
14822 { # begin recombine_breakpoints
14834 @is_amp_amp{@q} = (1) x scalar(@q);
14837 @is_ternary{@q} = (1) x scalar(@q);
14839 @q = qw( + - * / );
14840 @is_math_op{@q} = (1) x scalar(@q);
14843 @is_plus_minus{@q} = (1) x scalar(@q);
14846 @is_mult_div{@q} = (1) x scalar(@q);
14849 sub DUMP_BREAKPOINTS {
14851 # Debug routine to dump current breakpoints...not normally called
14852 # We are given indexes to the current lines:
14853 # $ri_beg = ref to array of BEGinning indexes of each line
14854 # $ri_end = ref to array of ENDing indexes of each line
14855 my ( $ri_beg, $ri_end, $msg ) = @_;
14856 print STDERR "----Dumping breakpoints from: $msg----\n";
14857 for my $n ( 0 .. @{$ri_end} - 1 ) {
14858 my $ibeg = $ri_beg->[$n];
14859 my $iend = $ri_end->[$n];
14861 foreach my $i ( $ibeg .. $iend ) {
14862 $text .= $tokens_to_go[$i];
14864 print STDERR "$n ($ibeg:$iend) $text\n";
14866 print STDERR "----\n";
14870 sub delete_one_line_semicolons {
14872 my ( $self, $ri_beg, $ri_end ) = @_;
14873 my $rLL = $self->{rLL};
14874 my $K_opening_container = $self->{K_opening_container};
14876 # Walk down the lines of this batch and delete any semicolons
14877 # terminating one-line blocks;
14878 my $nmax = @{$ri_end} - 1;
14880 foreach my $n ( 0 .. $nmax ) {
14881 my $i_beg = $ri_beg->[$n];
14882 my $i_e = $ri_end->[$n];
14883 my $K_beg = $K_to_go[$i_beg];
14884 my $K_e = $K_to_go[$i_e];
14886 my $type_end = $rLL->[$K_end]->[_TYPE_];
14887 if ( $type_end eq '#' ) {
14888 $K_end = $self->K_previous_nonblank($K_end);
14889 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
14892 # we are looking for a line ending in closing brace
14894 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
14896 # ...and preceded by a semicolon on the same line
14897 my $K_semicolon = $self->K_previous_nonblank($K_end);
14898 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
14899 next if ( $i_semicolon <= $i_beg );
14900 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
14902 # safety check - shouldn't happen
14903 if ( $types_to_go[$i_semicolon] ne ';' ) {
14904 Fault("unexpected type looking for semicolon, ignoring");
14908 # ... with the corresponding opening brace on the same line
14909 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
14910 my $K_opening = $K_opening_container->{$type_sequence};
14911 my $i_opening = $i_beg + ( $K_opening - $K_beg );
14912 next if ( $i_opening < $i_beg );
14914 # ... and only one semicolon between these braces
14915 my $semicolon_count = 0;
14916 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
14917 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
14918 $semicolon_count++;
14922 next if ($semicolon_count);
14924 # ...ok, then make the semicolon invisible
14925 $tokens_to_go[$i_semicolon] = "";
14930 sub unmask_phantom_semicolons {
14932 my ( $self, $ri_beg, $ri_end ) = @_;
14934 # Walk down the lines of this batch and unmask any invisible line-ending
14935 # semicolons. They were placed by sub respace_tokens but we only now
14936 # know if we actually need them.
14938 my $nmax = @{$ri_end} - 1;
14939 foreach my $n ( 0 .. $nmax ) {
14941 my $i = $ri_end->[$n];
14942 if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
14944 $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;';
14946 my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] );
14947 note_added_semicolon($line_number);
14953 sub recombine_breakpoints {
14955 # sub set_continuation_breaks is very liberal in setting line breaks
14956 # for long lines, always setting breaks at good breakpoints, even
14957 # when that creates small lines. Sometimes small line fragments
14958 # are produced which would look better if they were combined.
14959 # That's the task of this routine.
14961 # We are given indexes to the current lines:
14962 # $ri_beg = ref to array of BEGinning indexes of each line
14963 # $ri_end = ref to array of ENDing indexes of each line
14964 my ( $ri_beg, $ri_end ) = @_;
14966 # Make a list of all good joining tokens between the lines
14969 my $nmax = @{$ri_end} - 1;
14970 for my $n ( 1 .. $nmax ) {
14971 my $ibeg_1 = $ri_beg->[ $n - 1 ];
14972 my $iend_1 = $ri_end->[ $n - 1 ];
14973 my $iend_2 = $ri_end->[$n];
14974 my $ibeg_2 = $ri_beg->[$n];
14976 my ( $itok, $itokp, $itokm );
14978 foreach my $itest ( $iend_1, $ibeg_2 ) {
14979 my $type = $types_to_go[$itest];
14980 if ( $is_math_op{$type}
14981 || $is_amp_amp{$type}
14982 || $is_assignment{$type}
14988 $joint[$n] = [$itok];
14991 my $more_to_do = 1;
14993 # We keep looping over all of the lines of this batch
14994 # until there are no more possible recombinations
14995 my $nmax_last = @{$ri_end};
14997 while ($more_to_do) {
15000 my $nmax = @{$ri_end} - 1;
15002 # Safety check for infinite loop
15003 unless ( $nmax < $nmax_last ) {
15005 # Shouldn't happen because splice below decreases nmax on each
15007 Fault("Program bug-infinite loop in recombine breakpoints\n");
15009 $nmax_last = $nmax;
15011 my $skip_Section_3;
15012 my $leading_amp_count = 0;
15013 my $this_line_is_semicolon_terminated;
15015 # loop over all remaining lines in this batch
15016 for my $iter ( 1 .. $nmax ) {
15018 # alternating sweep direction gives symmetric results
15019 # for recombining lines which exceed the line length
15020 # such as eval {{{{.... }}}}
15022 if ($reverse) { $n = 1 + $nmax - $iter; }
15023 else { $n = $iter }
15025 #----------------------------------------------------------
15026 # If we join the current pair of lines,
15027 # line $n-1 will become the left part of the joined line
15028 # line $n will become the right part of the joined line
15030 # Here are Indexes of the endpoint tokens of the two lines:
15032 # -----line $n-1--- | -----line $n-----
15033 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
15036 # We want to decide if we should remove the line break
15037 # between the tokens at $iend_1 and $ibeg_2
15039 # We will apply a number of ad-hoc tests to see if joining
15040 # here will look ok. The code will just issue a 'next'
15041 # command if the join doesn't look good. If we get through
15042 # the gauntlet of tests, the lines will be recombined.
15043 #----------------------------------------------------------
15045 # beginning and ending tokens of the lines we are working on
15046 my $ibeg_1 = $ri_beg->[ $n - 1 ];
15047 my $iend_1 = $ri_end->[ $n - 1 ];
15048 my $iend_2 = $ri_end->[$n];
15049 my $ibeg_2 = $ri_beg->[$n];
15050 my $ibeg_nmax = $ri_beg->[$nmax];
15052 # combined line cannot be too long
15053 my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
15054 next if ( $excess > 0 );
15056 my $type_iend_1 = $types_to_go[$iend_1];
15057 my $type_iend_2 = $types_to_go[$iend_2];
15058 my $type_ibeg_1 = $types_to_go[$ibeg_1];
15059 my $type_ibeg_2 = $types_to_go[$ibeg_2];
15061 # terminal token of line 2 if any side comment is ignored:
15062 my $iend_2t = $iend_2;
15063 my $type_iend_2t = $type_iend_2;
15065 # some beginning indexes of other lines, which may not exist
15066 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
15067 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
15068 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
15072 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15073 # $nesting_depth_to_go[$ibeg_1] );
15075 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
15077 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
15080 # If line $n is the last line, we set some flags and
15081 # do any special checks for it
15082 if ( $n == $nmax ) {
15084 # a terminal '{' should stay where it is
15085 # unless preceded by a fat comma
15086 next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
15088 if ( $type_iend_2 eq '#'
15089 && $iend_2 - $ibeg_2 >= 2
15090 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
15092 $iend_2t = $iend_2 - 2;
15093 $type_iend_2t = $types_to_go[$iend_2t];
15096 $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
15099 #----------------------------------------------------------
15100 # Recombine Section 0:
15101 # Examine the special token joining this line pair, if any.
15102 # Put as many tests in this section to avoid duplicate code and
15103 # to make formatting independent of whether breaks are to the
15104 # left or right of an operator.
15105 #----------------------------------------------------------
15107 my ($itok) = @{ $joint[$n] };
15110 # FIXME: Patch - may not be necessary
15112 $type_iend_1 eq 'b'
15117 $type_iend_2 eq 'b'
15122 my $type = $types_to_go[$itok];
15124 if ( $type eq ':' ) {
15126 # do not join at a colon unless it disobeys the break request
15127 if ( $itok eq $iend_1 ) {
15128 next unless $want_break_before{$type};
15131 $leading_amp_count++;
15132 next if $want_break_before{$type};
15136 # handle math operators + - * /
15137 elsif ( $is_math_op{$type} ) {
15139 # Combine these lines if this line is a single
15140 # number, or if it is a short term with same
15141 # operator as the previous line. For example, in
15142 # the following code we will combine all of the
15143 # short terms $A, $B, $C, $D, $E, $F, together
15144 # instead of leaving them one per line:
15146 # $A * $B * $C * $D * $E * $F *
15147 # ( 2. * $eps * $sigma * $area ) *
15148 # ( 1. / $tcold**3 - 1. / $thot**3 );
15150 # This can be important in math-intensive code.
15154 my $itokp = min( $inext_to_go[$itok], $iend_2 );
15155 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
15156 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
15157 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
15159 # check for a number on the right
15160 if ( $types_to_go[$itokp] eq 'n' ) {
15162 # ok if nothing else on right
15163 if ( $itokp == $iend_2 ) {
15168 # look one more token to right..
15169 # okay if math operator or some termination
15171 ( ( $itokpp == $iend_2 )
15172 && $is_math_op{ $types_to_go[$itokpp] } )
15173 || $types_to_go[$itokpp] =~ /^[#,;]$/;
15177 # check for a number on the left
15178 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
15180 # okay if nothing else to left
15181 if ( $itokm == $ibeg_1 ) {
15185 # otherwise look one more token to left
15188 # okay if math operator, comma, or assignment
15189 $good_combo = ( $itokmm == $ibeg_1 )
15190 && ( $is_math_op{ $types_to_go[$itokmm] }
15191 || $types_to_go[$itokmm] =~ /^[,]$/
15192 || $is_assignment{ $types_to_go[$itokmm] }
15197 # look for a single short token either side of the
15199 if ( !$good_combo ) {
15201 # Slight adjustment factor to make results
15202 # independent of break before or after operator in
15203 # long summed lists. (An operator and a space make
15205 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
15209 # numbers or id's on both sides of this joint
15210 $types_to_go[$itokp] =~ /^[in]$/
15211 && $types_to_go[$itokm] =~ /^[in]$/
15213 # one of the two lines must be short:
15216 # no more than 2 nonblank tokens right of
15221 && token_sequence_length( $itokp, $iend_2 )
15223 $rOpts_short_concatenation_item_length
15226 # no more than 2 nonblank tokens left of
15231 && token_sequence_length( $ibeg_1, $itokm )
15233 $rOpts_short_concatenation_item_length
15238 # keep pure terms; don't mix +- with */
15240 $is_plus_minus{$type}
15241 && ( $is_mult_div{ $types_to_go[$itokmm] }
15242 || $is_mult_div{ $types_to_go[$itokpp] } )
15245 $is_mult_div{$type}
15246 && ( $is_plus_minus{ $types_to_go[$itokmm] }
15247 || $is_plus_minus{ $types_to_go[$itokpp] } )
15253 # it is also good to combine if we can reduce to 2 lines
15254 if ( !$good_combo ) {
15256 # index on other line where same token would be in a
15259 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
15264 && $types_to_go[$iother] ne $type;
15267 next unless ($good_combo);
15271 elsif ( $is_amp_amp{$type} ) {
15275 elsif ( $is_assignment{$type} ) {
15277 } ## end assignment
15280 #----------------------------------------------------------
15281 # Recombine Section 1:
15282 # Join welded nested containers immediately
15283 #----------------------------------------------------------
15284 if ( weld_len_right_to_go($iend_1)
15285 || weld_len_left_to_go($ibeg_2) )
15289 # Old coding alternated sweep direction: no longer needed
15290 # $reverse = 1 - $reverse;
15295 #----------------------------------------------------------
15296 # Recombine Section 2:
15297 # Examine token at $iend_1 (right end of first line of pair)
15298 #----------------------------------------------------------
15300 # an isolated '}' may join with a ';' terminated segment
15301 if ( $type_iend_1 eq '}' ) {
15303 # Check for cases where combining a semicolon terminated
15304 # statement with a previous isolated closing paren will
15305 # allow the combined line to be outdented. This is
15306 # generally a good move. For example, we can join up
15307 # the last two lines here:
15309 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15310 # $size, $atime, $mtime, $ctime, $blksize, $blocks
15316 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15317 # $size, $atime, $mtime, $ctime, $blksize, $blocks
15320 # which makes the parens line up.
15322 # Another example, from Joe Matarazzo, probably looks best
15323 # with the 'or' clause appended to the trailing paren:
15324 # $self->some_method(
15327 # ) or die "Some_method didn't work";
15329 # But we do not want to do this for something like the -lp
15330 # option where the paren is not outdentable because the
15331 # trailing clause will be far to the right.
15333 # The logic here is synchronized with the logic in sub
15334 # sub set_adjusted_indentation, which actually does
15337 $skip_Section_3 ||= $this_line_is_semicolon_terminated
15339 # only one token on last line
15340 && $ibeg_1 == $iend_1
15342 # must be structural paren
15343 && $tokens_to_go[$iend_1] eq ')'
15345 # style must allow outdenting,
15346 && !$closing_token_indentation{')'}
15348 # only leading '&&', '||', and ':' if no others seen
15349 # (but note: our count made below could be wrong
15350 # due to intervening comments)
15351 && ( $leading_amp_count == 0
15352 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
15354 # but leading colons probably line up with a
15355 # previous colon or question (count could be wrong).
15356 && $type_ibeg_2 ne ':'
15358 # only one step in depth allowed. this line must not
15359 # begin with a ')' itself.
15360 && ( $nesting_depth_to_go[$iend_1] ==
15361 $nesting_depth_to_go[$iend_2] + 1 );
15363 # YVES patch 2 of 2:
15364 # Allow cuddled eval chains, like this:
15371 # This patch works together with a patch in
15372 # setting adjusted indentation (where the closing eval
15373 # brace is outdented if possible).
15374 # The problem is that an 'eval' block has continuation
15375 # indentation and it looks better to undo it in some
15376 # cases. If we do not use this patch we would get:
15384 # The alternative, for uncuddled style, is to create
15385 # a patch in set_adjusted_indentation which undoes
15386 # the indentation of a leading line like 'or do {'.
15387 # This doesn't work well with -icb through
15389 $block_type_to_go[$iend_1] eq 'eval'
15390 && !$rOpts->{'line-up-parentheses'}
15391 && !$rOpts->{'indent-closing-brace'}
15392 && $tokens_to_go[$iend_2] eq '{'
15394 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
15395 || ( $type_ibeg_2 eq 'k'
15396 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
15397 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
15401 $skip_Section_3 ||= 1;
15408 # handle '.' and '?' specially below
15409 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
15413 elsif ( $type_iend_1 eq '{' ) {
15416 # honor breaks at opening brace
15417 # Added to prevent recombining something like this:
15418 # } || eval { package main;
15419 next if $forced_breakpoint_to_go[$iend_1];
15422 # do not recombine lines with ending &&, ||,
15423 elsif ( $is_amp_amp{$type_iend_1} ) {
15424 next unless $want_break_before{$type_iend_1};
15427 # Identify and recombine a broken ?/: chain
15428 elsif ( $type_iend_1 eq '?' ) {
15430 # Do not recombine different levels
15432 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
15434 # do not recombine unless next line ends in :
15435 next unless $type_iend_2 eq ':';
15438 # for lines ending in a comma...
15439 elsif ( $type_iend_1 eq ',' ) {
15441 # Do not recombine at comma which is following the
15443 # TODO: might be best to make a special flag
15444 next if ( $old_breakpoint_to_go[$iend_1] );
15446 # an isolated '},' may join with an identifier + ';'
15447 # this is useful for the class of a 'bless' statement (bless.t)
15448 if ( $type_ibeg_1 eq '}'
15449 && $type_ibeg_2 eq 'i' )
15452 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
15453 && ( $iend_2 == ( $ibeg_2 + 1 ) )
15454 && $this_line_is_semicolon_terminated );
15456 # override breakpoint
15457 $forced_breakpoint_to_go[$iend_1] = 0;
15463 # do not recombine after a comma unless this will leave
15465 next unless ( $n + 1 >= $nmax );
15467 # do not recombine if there is a change in indentation depth
15470 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
15472 # do not recombine a "complex expression" after a
15473 # comma. "complex" means no parens.
15475 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
15476 if ( $tokens_to_go[$ii] eq '(' ) {
15481 next if $saw_paren;
15486 elsif ( $type_iend_1 eq '(' ) {
15488 # No longer doing this
15491 elsif ( $type_iend_1 eq ')' ) {
15493 # No longer doing this
15496 # keep a terminal for-semicolon
15497 elsif ( $type_iend_1 eq 'f' ) {
15501 # if '=' at end of line ...
15502 elsif ( $is_assignment{$type_iend_1} ) {
15504 # keep break after = if it was in input stream
15505 # this helps prevent 'blinkers'
15506 next if $old_breakpoint_to_go[$iend_1]
15508 # don't strand an isolated '='
15509 && $iend_1 != $ibeg_1;
15511 my $is_short_quote =
15512 ( $type_ibeg_2 eq 'Q'
15513 && $ibeg_2 == $iend_2
15514 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
15515 $rOpts_short_concatenation_item_length );
15517 ( $type_ibeg_1 eq '?'
15518 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
15520 # always join an isolated '=', a short quote, or if this
15521 # will put ?/: at start of adjacent lines
15522 if ( $ibeg_1 != $iend_1
15523 && !$is_short_quote
15530 # unless we can reduce this to two lines
15533 # or three lines, the last with a leading semicolon
15534 || ( $nmax == $n + 2
15535 && $types_to_go[$ibeg_nmax] eq ';' )
15537 # or the next line ends with a here doc
15538 || $type_iend_2 eq 'h'
15540 # or the next line ends in an open paren or brace
15541 # and the break hasn't been forced [dima.t]
15542 || ( !$forced_breakpoint_to_go[$iend_1]
15543 && $type_iend_2 eq '{' )
15546 # do not recombine if the two lines might align well
15547 # this is a very approximate test for this
15550 # RT#127633 - the leading tokens are not operators
15551 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
15553 # or they are different
15555 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
15561 # Recombine if we can make two lines
15564 # -lp users often prefer this:
15565 # my $title = function($env, $env, $sysarea,
15566 # "bubba Borrower Entry");
15567 # so we will recombine if -lp is used we have
15569 && ( !$rOpts_line_up_parentheses
15570 || $type_iend_2 ne ',' )
15574 # otherwise, scan the rhs line up to last token for
15575 # complexity. Note that we are not counting the last
15576 # token in case it is an opening paren.
15578 my $depth = $nesting_depth_to_go[$ibeg_2];
15579 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
15580 if ( $nesting_depth_to_go[$i] != $depth ) {
15582 last if ( $tv > 1 );
15584 $depth = $nesting_depth_to_go[$i];
15587 # ok to recombine if no level changes before last token
15590 # otherwise, do not recombine if more than two
15592 next if ( $tv > 1 );
15594 # check total complexity of the two adjacent lines
15595 # that will occur if we do this join
15598 ? $ri_end->[ $n + 1 ]
15600 foreach my $i ( $iend_2 .. $istop ) {
15601 if ( $nesting_depth_to_go[$i] != $depth ) {
15603 last if ( $tv > 2 );
15605 $depth = $nesting_depth_to_go[$i];
15608 # do not recombine if total is more than 2 level changes
15609 next if ( $tv > 2 );
15614 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
15615 $forced_breakpoint_to_go[$iend_1] = 0;
15620 elsif ( $type_iend_1 eq 'k' ) {
15622 # make major control keywords stand out
15627 #/^(last|next|redo|return)$/
15628 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
15630 # but only if followed by multiple lines
15634 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
15636 unless $want_break_before{ $tokens_to_go[$iend_1] };
15640 #----------------------------------------------------------
15641 # Recombine Section 3:
15642 # Examine token at $ibeg_2 (left end of second line of pair)
15643 #----------------------------------------------------------
15645 # join lines identified above as capable of
15646 # causing an outdented line with leading closing paren
15647 # Note that we are skipping the rest of this section
15648 # and the rest of the loop to do the join
15649 if ($skip_Section_3) {
15650 $forced_breakpoint_to_go[$iend_1] = 0;
15655 # handle lines with leading &&, ||
15656 elsif ( $is_amp_amp{$type_ibeg_2} ) {
15658 $leading_amp_count++;
15660 # ok to recombine if it follows a ? or :
15661 # and is followed by an open paren..
15663 ( $is_ternary{$type_ibeg_1}
15664 && $tokens_to_go[$iend_2] eq '(' )
15666 # or is followed by a ? or : at same depth
15668 # We are looking for something like this. We can
15669 # recombine the && line with the line above to make the
15670 # structure more clear:
15672 # exists $G->{Attr}->{V}
15673 # && exists $G->{Attr}->{V}->{$u}
15674 # ? %{ $G->{Attr}->{V}->{$u} }
15677 # We should probably leave something like this alone:
15679 # exists $G->{Attr}->{E}
15680 # && exists $G->{Attr}->{E}->{$u}
15681 # && exists $G->{Attr}->{E}->{$u}->{$v}
15682 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
15684 # so that we either have all of the &&'s (or ||'s)
15685 # on one line, as in the first example, or break at
15686 # each one as in the second example. However, it
15687 # sometimes makes things worse to check for this because
15688 # it prevents multiple recombinations. So this is not done.
15690 && $is_ternary{ $types_to_go[$ibeg_3] }
15691 && $nesting_depth_to_go[$ibeg_3] ==
15692 $nesting_depth_to_go[$ibeg_2] );
15694 next if !$ok && $want_break_before{$type_ibeg_2};
15695 $forced_breakpoint_to_go[$iend_1] = 0;
15697 # tweak the bond strength to give this joint priority
15702 # Identify and recombine a broken ?/: chain
15703 elsif ( $type_ibeg_2 eq '?' ) {
15705 # Do not recombine different levels
15706 my $lev = $levels_to_go[$ibeg_2];
15707 next if ( $lev ne $levels_to_go[$ibeg_1] );
15709 # Do not recombine a '?' if either next line or
15710 # previous line does not start with a ':'. The reasons
15711 # are that (1) no alignment of the ? will be possible
15712 # and (2) the expression is somewhat complex, so the
15713 # '?' is harder to see in the interior of the line.
15714 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
15715 my $precedes_colon =
15716 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
15717 next unless ( $follows_colon || $precedes_colon );
15719 # we will always combining a ? line following a : line
15720 if ( !$follows_colon ) {
15722 # ...otherwise recombine only if it looks like a chain.
15723 # we will just look at a few nearby lines to see if
15724 # this looks like a chain.
15725 my $local_count = 0;
15726 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
15729 && $types_to_go[$ii] eq ':'
15730 && $levels_to_go[$ii] == $lev;
15732 next unless ( $local_count > 1 );
15734 $forced_breakpoint_to_go[$iend_1] = 0;
15737 # do not recombine lines with leading '.'
15738 elsif ( $type_ibeg_2 eq '.' ) {
15739 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
15743 # ... unless there is just one and we can reduce
15744 # this to two lines if we do. For example, this
15748 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
15750 # looks better than this:
15751 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
15752 # . '$args .= $pat;'
15757 && $type_ibeg_1 ne $type_ibeg_2
15760 # ... or this would strand a short quote , like this
15761 # . "some long quote"
15764 || ( $types_to_go[$i_next_nonblank] eq 'Q'
15765 && $i_next_nonblank >= $iend_2 - 1
15766 && $token_lengths_to_go[$i_next_nonblank] <
15767 $rOpts_short_concatenation_item_length )
15771 # handle leading keyword..
15772 elsif ( $type_ibeg_2 eq 'k' ) {
15774 # handle leading "or"
15775 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
15778 $this_line_is_semicolon_terminated
15781 # following 'if' or 'unless' or 'or'
15782 $type_ibeg_1 eq 'k'
15783 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
15785 # important: only combine a very simple or
15786 # statement because the step below may have
15787 # combined a trailing 'and' with this or,
15788 # and we do not want to then combine
15789 # everything together
15790 && ( $iend_2 - $ibeg_2 <= 7 )
15795 $forced_breakpoint_to_go[$iend_1] = 0
15796 unless $old_breakpoint_to_go[$iend_1];
15799 # handle leading 'and'
15800 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
15802 # Decide if we will combine a single terminal 'and'
15803 # after an 'if' or 'unless'.
15805 # This looks best with the 'and' on the same
15806 # line as the 'if':
15809 # if $seconds and $nu < 2;
15811 # But this looks better as shown:
15814 # if !$this->{Parents}{$_}
15815 # or $this->{Parents}{$_} eq $_;
15819 $this_line_is_semicolon_terminated
15822 # following 'if' or 'unless' or 'or'
15823 $type_ibeg_1 eq 'k'
15824 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
15825 || $tokens_to_go[$ibeg_1] eq 'or' )
15830 # handle leading "if" and "unless"
15831 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
15833 # FIXME: This is still experimental..may not be too useful
15836 $this_line_is_semicolon_terminated
15838 # previous line begins with 'and' or 'or'
15839 && $type_ibeg_1 eq 'k'
15840 && $is_and_or{ $tokens_to_go[$ibeg_1] }
15845 # handle all other leading keywords
15848 # keywords look best at start of lines,
15849 # but combine things like "1 while"
15850 unless ( $is_assignment{$type_iend_1} ) {
15852 if ( ( $type_iend_1 ne 'k' )
15853 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
15858 # similar treatment of && and || as above for 'and' and 'or':
15859 # NOTE: This block of code is currently bypassed because
15860 # of a previous block but is retained for possible future use.
15861 elsif ( $is_amp_amp{$type_ibeg_2} ) {
15863 # maybe looking at something like:
15864 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
15868 $this_line_is_semicolon_terminated
15870 # previous line begins with an 'if' or 'unless' keyword
15871 && $type_ibeg_1 eq 'k'
15872 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
15877 # handle line with leading = or similar
15878 elsif ( $is_assignment{$type_ibeg_2} ) {
15879 next unless ( $n == 1 || $n == $nmax );
15880 next if $old_breakpoint_to_go[$iend_1];
15884 # unless we can reduce this to two lines
15887 # or three lines, the last with a leading semicolon
15888 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
15890 # or the next line ends with a here doc
15891 || $type_iend_2 eq 'h'
15893 # or this is a short line ending in ;
15894 || ( $n == $nmax && $this_line_is_semicolon_terminated )
15896 $forced_breakpoint_to_go[$iend_1] = 0;
15899 #----------------------------------------------------------
15900 # Recombine Section 4:
15901 # Combine the lines if we arrive here and it is possible
15902 #----------------------------------------------------------
15904 # honor hard breakpoints
15905 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
15907 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
15909 # Require a few extra spaces before recombining lines if we are
15910 # at an old breakpoint unless this is a simple list or terminal
15911 # line. The goal is to avoid oscillating between two
15912 # quasi-stable end states. For example this snippet caused
15916 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
15920 if ( $old_breakpoint_to_go[$iend_1]
15921 && !$this_line_is_semicolon_terminated
15924 && $type_iend_2 ne ',' );
15926 # do not recombine if we would skip in indentation levels
15927 if ( $n < $nmax ) {
15928 my $if_next = $ri_beg->[ $n + 1 ];
15931 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
15932 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
15934 # but an isolated 'if (' is undesirable
15937 && $iend_1 - $ibeg_1 <= 2
15938 && $type_ibeg_1 eq 'k'
15939 && $tokens_to_go[$ibeg_1] eq 'if'
15940 && $tokens_to_go[$iend_1] ne '('
15946 next if ( $bs >= NO_BREAK - 1 );
15948 # remember the pair with the greatest bond strength
15955 if ( $bs > $bs_best ) {
15962 # recombine the pair with the greatest bond strength
15964 splice @{$ri_beg}, $n_best, 1;
15965 splice @{$ri_end}, $n_best - 1, 1;
15966 splice @joint, $n_best, 1;
15968 # keep going if we are still making progress
15972 return ( $ri_beg, $ri_end );
15974 } # end recombine_breakpoints
15976 sub break_all_chain_tokens {
15978 # scan the current breakpoints looking for breaks at certain "chain
15979 # operators" (. : && || + etc) which often occur repeatedly in a long
15980 # statement. If we see a break at any one, break at all similar tokens
15981 # within the same container.
15983 my ( $ri_left, $ri_right ) = @_;
15985 my %saw_chain_type;
15986 my %left_chain_type;
15987 my %right_chain_type;
15988 my %interior_chain_type;
15989 my $nmax = @{$ri_right} - 1;
15991 # scan the left and right end tokens of all lines
15993 for my $n ( 0 .. $nmax ) {
15994 my $il = $ri_left->[$n];
15995 my $ir = $ri_right->[$n];
15996 my $typel = $types_to_go[$il];
15997 my $typer = $types_to_go[$ir];
15998 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
15999 $typer = '+' if ( $typer eq '-' );
16000 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
16001 $typer = '*' if ( $typer eq '/' );
16002 my $tokenl = $tokens_to_go[$il];
16003 my $tokenr = $tokens_to_go[$ir];
16005 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
16006 next if ( $typel eq '?' );
16007 push @{ $left_chain_type{$typel} }, $il;
16008 $saw_chain_type{$typel} = 1;
16011 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
16012 next if ( $typer eq '?' );
16013 push @{ $right_chain_type{$typer} }, $ir;
16014 $saw_chain_type{$typer} = 1;
16018 return unless $count;
16020 # now look for any interior tokens of the same types
16022 for my $n ( 0 .. $nmax ) {
16023 my $il = $ri_left->[$n];
16024 my $ir = $ri_right->[$n];
16025 foreach my $i ( $il + 1 .. $ir - 1 ) {
16026 my $type = $types_to_go[$i];
16027 $type = '+' if ( $type eq '-' );
16028 $type = '*' if ( $type eq '/' );
16029 if ( $saw_chain_type{$type} ) {
16030 push @{ $interior_chain_type{$type} }, $i;
16035 return unless $count;
16037 # now make a list of all new break points
16040 # loop over all chain types
16041 foreach my $type ( keys %saw_chain_type ) {
16043 # quit if just ONE continuation line with leading . For example--
16044 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16046 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
16048 # loop over all interior chain tokens
16049 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
16051 # loop over all left end tokens of same type
16052 if ( $left_chain_type{$type} ) {
16053 next if $nobreak_to_go[ $itest - 1 ];
16054 foreach my $i ( @{ $left_chain_type{$type} } ) {
16055 next unless in_same_container( $i, $itest );
16056 push @insert_list, $itest - 1;
16058 # Break at matching ? if this : is at a different level.
16059 # For example, the ? before $THRf_DEAD in the following
16060 # should get a break if its : gets a break.
16063 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16064 # : ( $_ & 4 ) ? $THRf_R_DETACHED
16065 # : $THRf_R_JOINABLE;
16067 && $levels_to_go[$i] != $levels_to_go[$itest] )
16069 my $i_question = $mate_index_to_go[$itest];
16070 if ( $i_question > 0 ) {
16071 push @insert_list, $i_question - 1;
16078 # loop over all right end tokens of same type
16079 if ( $right_chain_type{$type} ) {
16080 next if $nobreak_to_go[$itest];
16081 foreach my $i ( @{ $right_chain_type{$type} } ) {
16082 next unless in_same_container( $i, $itest );
16083 push @insert_list, $itest;
16085 # break at matching ? if this : is at a different level
16087 && $levels_to_go[$i] != $levels_to_go[$itest] )
16089 my $i_question = $mate_index_to_go[$itest];
16090 if ( $i_question >= 0 ) {
16091 push @insert_list, $i_question;
16100 # insert any new break points
16101 if (@insert_list) {
16102 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16109 # Look for assignment operators that could use a breakpoint.
16110 # For example, in the following snippet
16112 # $HOME = $ENV{HOME}
16115 # || die "no home directory for user $<";
16117 # we could break at the = to get this, which is a little nicer:
16122 # || die "no home directory for user $<";
16124 # The logic here follows the logic in set_logical_padding, which
16125 # will add the padding in the second line to improve alignment.
16127 my ( $ri_left, $ri_right ) = @_;
16128 my $nmax = @{$ri_right} - 1;
16129 return unless ( $nmax >= 2 );
16131 # scan the left ends of first two lines
16134 for my $n ( 1 .. 2 ) {
16135 my $il = $ri_left->[$n];
16136 my $typel = $types_to_go[$il];
16137 my $tokenl = $tokens_to_go[$il];
16139 my $has_leading_op = ( $tokenl =~ /^\w/ )
16140 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
16141 : $is_chain_operator{$typel}; # and, or
16142 return unless ($has_leading_op);
16145 unless ( $tokenl eq $tokbeg
16146 && $nesting_depth_to_go[$il] eq $depth_beg );
16149 $depth_beg = $nesting_depth_to_go[$il];
16152 # now look for any interior tokens of the same types
16153 my $il = $ri_left->[0];
16154 my $ir = $ri_right->[0];
16156 # now make a list of all new break points
16158 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
16159 my $type = $types_to_go[$i];
16160 if ( $is_assignment{$type}
16161 && $nesting_depth_to_go[$i] eq $depth_beg )
16163 if ( $want_break_before{$type} ) {
16164 push @insert_list, $i - 1;
16167 push @insert_list, $i;
16172 # Break after a 'return' followed by a chain of operators
16173 # return ( $^O !~ /win32|dos/i )
16174 # && ( $^O ne 'VMS' )
16175 # && ( $^O ne 'OS2' )
16176 # && ( $^O ne 'MacOS' );
16179 # ( $^O !~ /win32|dos/i )
16180 # && ( $^O ne 'VMS' )
16181 # && ( $^O ne 'OS2' )
16182 # && ( $^O ne 'MacOS' );
16184 if ( $types_to_go[$i] eq 'k'
16185 && $tokens_to_go[$i] eq 'return'
16187 && $nesting_depth_to_go[$i] eq $depth_beg )
16189 push @insert_list, $i;
16192 return unless (@insert_list);
16194 # One final check...
16195 # scan second and third lines and be sure there are no assignments
16196 # we want to avoid breaking at an = to make something like this:
16198 # $html_icons{"$type-$state"}
16199 # or $icon = $html_icons{$type}
16200 # or $icon = $html_icons{$state} )
16201 for my $n ( 1 .. 2 ) {
16202 my $il = $ri_left->[$n];
16203 my $ir = $ri_right->[$n];
16204 foreach my $i ( $il + 1 .. $ir ) {
16205 my $type = $types_to_go[$i];
16207 if ( $is_assignment{$type}
16208 && $nesting_depth_to_go[$i] eq $depth_beg );
16212 # ok, insert any new break point
16213 if (@insert_list) {
16214 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16219 sub insert_final_breaks {
16221 my ( $ri_left, $ri_right ) = @_;
16223 my $nmax = @{$ri_right} - 1;
16225 # scan the left and right end tokens of all lines
16227 my $i_first_colon = -1;
16228 for my $n ( 0 .. $nmax ) {
16229 my $il = $ri_left->[$n];
16230 my $ir = $ri_right->[$n];
16231 my $typel = $types_to_go[$il];
16232 my $typer = $types_to_go[$ir];
16233 return if ( $typel eq '?' );
16234 return if ( $typer eq '?' );
16235 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
16236 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16239 # For long ternary chains,
16240 # if the first : we see has its # ? is in the interior
16241 # of a preceding line, then see if there are any good
16242 # breakpoints before the ?.
16243 if ( $i_first_colon > 0 ) {
16244 my $i_question = $mate_index_to_go[$i_first_colon];
16245 if ( $i_question > 0 ) {
16247 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16248 my $token = $tokens_to_go[$ii];
16249 my $type = $types_to_go[$ii];
16251 # For now, a good break is either a comma or,
16252 # in a long chain, a 'return'.
16253 # Patch for RT #126633: added the $nmax>1 check to avoid
16254 # breaking after a return for a simple ternary. For longer
16255 # chains the break after return allows vertical alignment, so
16256 # it is still done. So perltidy -wba='?' will not break
16257 # immediately after the return in the following statement:
16259 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
16260 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
16265 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
16267 && in_same_container( $ii, $i_question )
16270 push @insert_list, $ii;
16274 ## # For now, a good break is either a comma or a 'return'.
16275 ## if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
16276 ## && in_same_container( $ii, $i_question ) )
16278 ## push @insert_list, $ii;
16283 # insert any new break points
16284 if (@insert_list) {
16285 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16292 sub in_same_container {
16294 # check to see if tokens at i1 and i2 are in the
16295 # same container, and not separated by a comma, ? or :
16296 # FIXME: this can be written more efficiently now
16297 my ( $i1, $i2 ) = @_;
16298 my $type = $types_to_go[$i1];
16299 my $depth = $nesting_depth_to_go[$i1];
16300 return unless ( $nesting_depth_to_go[$i2] == $depth );
16301 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
16303 ###########################################################
16304 # This is potentially a very slow routine and not critical.
16305 # For safety just give up for large differences.
16306 # See test file 'infinite_loop.txt'
16307 # TODO: replace this loop with a data structure
16308 ###########################################################
16309 return if ( $i2 - $i1 > 200 );
16311 foreach my $i ( $i1 + 1 .. $i2 - 1 ) {
16312 next if ( $nesting_depth_to_go[$i] > $depth );
16313 return if ( $nesting_depth_to_go[$i] < $depth );
16315 my $tok = $tokens_to_go[$i];
16316 $tok = ',' if $tok eq '=>'; # treat => same as ,
16318 # Example: we would not want to break at any of these .'s
16319 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
16320 if ( $type ne ':' ) {
16321 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
16324 return if ( $tok =~ /^[\,]$/ );
16330 sub set_continuation_breaks {
16332 # Define an array of indexes for inserting newline characters to
16333 # keep the line lengths below the maximum desired length. There is
16334 # an implied break after the last token, so it need not be included.
16337 # This routine is part of series of routines which adjust line
16338 # lengths. It is only called if a statement is longer than the
16339 # maximum line length, or if a preliminary scanning located
16340 # desirable break points. Sub scan_list has already looked at
16341 # these tokens and set breakpoints (in array
16342 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
16343 # after commas, after opening parens, and before closing parens).
16344 # This routine will honor these breakpoints and also add additional
16345 # breakpoints as necessary to keep the line length below the maximum
16346 # requested. It bases its decision on where the 'bond strength' is
16349 # Output: returns references to the arrays:
16352 # which contain the indexes $i of the first and last tokens on each
16355 # In addition, the array:
16356 # $forced_breakpoint_to_go[$i]
16357 # may be updated to be =1 for any index $i after which there must be
16358 # a break. This signals later routines not to undo the breakpoint.
16360 my $saw_good_break = shift;
16361 my @i_first = (); # the first index to output
16362 my @i_last = (); # the last index to output
16363 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
16364 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
16366 set_bond_strengths();
16369 my $imax = $max_index_to_go;
16370 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
16371 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
16372 my $i_begin = $imin; # index for starting next iteration
16374 my $leading_spaces = leading_spaces_to_go($imin);
16375 my $line_count = 0;
16376 my $last_break_strength = NO_BREAK;
16377 my $i_last_break = -1;
16378 my $max_bias = 0.001;
16379 my $tiny_bias = 0.0001;
16380 my $leading_alignment_token = "";
16381 my $leading_alignment_type = "";
16383 # see if any ?/:'s are in order
16384 my $colons_in_order = 1;
16386 my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ];
16387 my $colon_count = @colon_list;
16388 foreach (@colon_list) {
16389 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
16393 # This is a sufficient but not necessary condition for colon chain
16394 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
16396 #-------------------------------------------------------
16397 # BEGINNING of main loop to set continuation breakpoints
16398 # Keep iterating until we reach the end
16399 #-------------------------------------------------------
16400 while ( $i_begin <= $imax ) {
16401 my $lowest_strength = NO_BREAK;
16402 my $starting_sum = $summed_lengths_to_go[$i_begin];
16405 my $lowest_next_token = '';
16406 my $lowest_next_type = 'b';
16407 my $i_lowest_next_nonblank = -1;
16409 #-------------------------------------------------------
16410 # BEGINNING of inner loop to find the best next breakpoint
16411 #-------------------------------------------------------
16412 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
16413 my $type = $types_to_go[$i_test];
16414 my $token = $tokens_to_go[$i_test];
16415 my $next_type = $types_to_go[ $i_test + 1 ];
16416 my $next_token = $tokens_to_go[ $i_test + 1 ];
16417 my $i_next_nonblank = $inext_to_go[$i_test];
16418 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
16419 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16420 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
16421 my $strength = $bond_strength_to_go[$i_test];
16422 my $maximum_line_length = maximum_line_length($i_begin);
16424 # use old breaks as a tie-breaker. For example to
16425 # prevent blinkers with -pbp in this code:
16428 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
16431 # At the same time try to prevent a leading * in this code
16432 # with the default formatting:
16435 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
16436 ## * ( $x**( $a - 1 ) )
16437 ## * ( ( 1 - $x )**( $b - 1 ) );
16439 # reduce strength a bit to break ties at an old breakpoint ...
16441 $old_breakpoint_to_go[$i_test]
16443 # which is a 'good' breakpoint, meaning ...
16444 # we don't want to break before it
16445 && !$want_break_before{$type}
16447 # and either we want to break before the next token
16448 # or the next token is not short (i.e. not a '*', '/' etc.)
16449 && $i_next_nonblank <= $imax
16450 && ( $want_break_before{$next_nonblank_type}
16451 || $token_lengths_to_go[$i_next_nonblank] > 2
16452 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
16455 $strength -= $tiny_bias;
16458 # otherwise increase strength a bit if this token would be at the
16459 # maximum line length. This is necessary to avoid blinking
16460 # in the above example when the -iob flag is added.
16464 $summed_lengths_to_go[ $i_test + 1 ] -
16466 if ( $len >= $maximum_line_length ) {
16467 $strength += $tiny_bias;
16471 my $must_break = 0;
16473 # Force an immediate break at certain operators
16474 # with lower level than the start of the line,
16475 # unless we've already seen a better break.
16477 ##############################################
16478 # Note on an issue with a preceding ?
16479 ##############################################
16480 # We don't include a ? in the above list, but there may
16481 # be a break at a previous ? if the line is long.
16482 # Because of this we do not want to force a break if
16483 # there is a previous ? on this line. For now the best way
16484 # to do this is to not break if we have seen a lower strength
16485 # point, which is probably a ?.
16487 # Example of unwanted breaks we are avoiding at a '.' following a ?
16488 # from pod2html using perltidy -gnu:
16490 # ? "\n<A NAME=\""
16492 # . "\">\n$text</A>\n"
16493 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
16496 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
16497 || ( $next_nonblank_type eq 'k'
16498 && $next_nonblank_token =~ /^(and|or)$/ )
16500 && ( $nesting_depth_to_go[$i_begin] >
16501 $nesting_depth_to_go[$i_next_nonblank] )
16502 && ( $strength <= $lowest_strength )
16505 set_forced_breakpoint($i_next_nonblank);
16510 # Try to put a break where requested by scan_list
16511 $forced_breakpoint_to_go[$i_test]
16513 # break between ) { in a continued line so that the '{' can
16515 # See similar logic in scan_list which catches instances
16516 # where a line is just something like ') {'. We have to
16517 # be careful because the corresponding block keyword might
16518 # not be on the first line, such as 'for' here:
16522 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
16528 && ( $token eq ')' )
16529 && ( $next_nonblank_type eq '{' )
16530 && ($next_nonblank_block_type)
16531 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
16533 # RT #104427: Dont break before opening sub brace because
16534 # sub block breaks handled at higher level, unless
16535 # it looks like the preceeding list is long and broken
16537 $next_nonblank_block_type =~ /^sub\b/
16538 && ( $nesting_depth_to_go[$i_begin] ==
16539 $nesting_depth_to_go[$i_next_nonblank] )
16542 && !$rOpts->{'opening-brace-always-on-right'}
16545 # There is an implied forced break at a terminal opening brace
16546 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
16550 # Forced breakpoints must sometimes be overridden, for example
16551 # because of a side comment causing a NO_BREAK. It is easier
16552 # to catch this here than when they are set.
16553 if ( $strength < NO_BREAK - 1 ) {
16554 $strength = $lowest_strength - $tiny_bias;
16559 # quit if a break here would put a good terminal token on
16560 # the next line and we already have a possible break
16563 && ( $next_nonblank_type =~ /^[\;\,]$/ )
16567 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
16569 ) > $maximum_line_length
16573 last if ( $i_lowest >= 0 );
16576 # Avoid a break which would strand a single punctuation
16577 # token. For example, we do not want to strand a leading
16578 # '.' which is followed by a long quoted string.
16579 # But note that we do want to do this with -extrude (l=1)
16580 # so please test any changes to this code on -extrude.
16583 && ( $i_test == $i_begin )
16584 && ( $i_test < $imax )
16585 && ( $token eq $type )
16589 $summed_lengths_to_go[ $i_test + 1 ] -
16591 ) < $maximum_line_length
16595 $i_test = min( $imax, $inext_to_go[$i_test] );
16599 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
16602 # break at previous best break if it would have produced
16603 # a leading alignment of certain common tokens, and it
16604 # is different from the latest candidate break
16606 if ($leading_alignment_type);
16608 # Force at least one breakpoint if old code had good
16609 # break It is only called if a breakpoint is required or
16610 # desired. This will probably need some adjustments
16611 # over time. A goal is to try to be sure that, if a new
16612 # side comment is introduced into formatted text, then
16613 # the same breakpoints will occur. scbreak.t
16616 $i_test == $imax # we are at the end
16617 && !$forced_breakpoint_count #
16618 && $saw_good_break # old line had good break
16619 && $type =~ /^[#;\{]$/ # and this line ends in
16620 # ';' or side comment
16621 && $i_last_break < 0 # and we haven't made a break
16622 && $i_lowest >= 0 # and we saw a possible break
16623 && $i_lowest < $imax - 1 # (but not just before this ;)
16624 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
16627 # Do not skip past an important break point in a short final
16628 # segment. For example, without this check we would miss the
16629 # break at the final / in the following code:
16632 # ( $tau * $mass_pellet * $q_0 *
16633 # ( 1. - exp( -$t_stop / $tau ) ) -
16634 # 4. * $pi * $factor * $k_ice *
16635 # ( $t_melt - $t_ice ) *
16638 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
16640 if ( $line_count > 2
16641 && $i_lowest < $i_test
16642 && $i_test > $imax - 2
16643 && $nesting_depth_to_go[$i_begin] >
16644 $nesting_depth_to_go[$i_lowest]
16645 && $lowest_strength < $last_break_strength - .5 * WEAK )
16647 # Make this break for math operators for now
16648 my $ir = $inext_to_go[$i_lowest];
16649 my $il = $iprev_to_go[$ir];
16651 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
16652 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
16655 # Update the minimum bond strength location
16656 $lowest_strength = $strength;
16657 $i_lowest = $i_test;
16658 $lowest_next_token = $next_nonblank_token;
16659 $lowest_next_type = $next_nonblank_type;
16660 $i_lowest_next_nonblank = $i_next_nonblank;
16661 last if $must_break;
16663 # set flags to remember if a break here will produce a
16664 # leading alignment of certain common tokens
16665 if ( $line_count > 0
16667 && ( $lowest_strength - $last_break_strength <= $max_bias )
16670 my $i_last_end = $iprev_to_go[$i_begin];
16671 my $tok_beg = $tokens_to_go[$i_begin];
16672 my $type_beg = $types_to_go[$i_begin];
16675 # check for leading alignment of certain tokens
16677 $tok_beg eq $next_nonblank_token
16678 && $is_chain_operator{$tok_beg}
16679 && ( $type_beg eq 'k'
16680 || $type_beg eq $tok_beg )
16681 && $nesting_depth_to_go[$i_begin] >=
16682 $nesting_depth_to_go[$i_next_nonblank]
16685 || ( $tokens_to_go[$i_last_end] eq $token
16686 && $is_chain_operator{$token}
16687 && ( $type eq 'k' || $type eq $token )
16688 && $nesting_depth_to_go[$i_last_end] >=
16689 $nesting_depth_to_go[$i_test] )
16692 $leading_alignment_token = $next_nonblank_token;
16693 $leading_alignment_type = $next_nonblank_type;
16698 my $too_long = ( $i_test >= $imax );
16699 if ( !$too_long ) {
16702 $summed_lengths_to_go[ $i_test + 2 ] -
16704 $too_long = $next_length > $maximum_line_length;
16706 # To prevent blinkers we will avoid leaving a token exactly at
16707 # the line length limit unless it is the last token or one of
16708 # several "good" types.
16710 # The following code was a blinker with -pbp before this
16712 ## $last_nonblank_token eq '('
16713 ## && $is_indirect_object_taker{ $paren_type
16714 ## [$paren_depth] }
16715 # The issue causing the problem is that if the
16716 # term [$paren_depth] gets broken across a line then
16717 # the whitespace routine doesn't see both opening and closing
16718 # brackets and will format like '[ $paren_depth ]'. This
16719 # leads to an oscillation in length depending if we break
16720 # before the closing bracket or not.
16722 && $i_test + 1 < $imax
16723 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
16725 $too_long = $next_length >= $maximum_line_length;
16729 FORMATTER_DEBUG_FLAG_BREAK
16732 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
16733 my $i_testp2 = $i_test + 2;
16734 if ( $i_testp2 > $max_index_to_go + 1 ) {
16735 $i_testp2 = $max_index_to_go + 1;
16737 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
16738 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
16740 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
16743 # allow one extra terminal token after exceeding line length
16744 # if it would strand this token.
16745 if ( $rOpts_fuzzy_line_length
16747 && $i_lowest == $i_test
16748 && $token_lengths_to_go[$i_test] > 1
16749 && $next_nonblank_type =~ /^[\;\,]$/ )
16756 ( $i_test == $imax ) # we're done if no more tokens,
16758 ( $i_lowest >= 0 ) # or no more space and we have a break
16764 #-------------------------------------------------------
16765 # END of inner loop to find the best next breakpoint
16766 # Now decide exactly where to put the breakpoint
16767 #-------------------------------------------------------
16769 # it's always ok to break at imax if no other break was found
16770 if ( $i_lowest < 0 ) { $i_lowest = $imax }
16772 # semi-final index calculation
16773 my $i_next_nonblank = $inext_to_go[$i_lowest];
16774 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
16775 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16777 #-------------------------------------------------------
16778 # ?/: rule 1 : if a break here will separate a '?' on this
16779 # line from its closing ':', then break at the '?' instead.
16780 #-------------------------------------------------------
16781 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
16782 next unless ( $tokens_to_go[$i] eq '?' );
16784 # do not break if probable sequence of ?/: statements
16785 next if ($is_colon_chain);
16787 # do not break if statement is broken by side comment
16790 $tokens_to_go[$max_index_to_go] eq '#'
16791 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
16792 $max_index_to_go ) !~ /^[\;\}]$/
16795 # no break needed if matching : is also on the line
16797 if ( $mate_index_to_go[$i] >= 0
16798 && $mate_index_to_go[$i] <= $i_next_nonblank );
16801 if ( $want_break_before{'?'} ) { $i_lowest-- }
16805 #-------------------------------------------------------
16806 # END of inner loop to find the best next breakpoint:
16807 # Break the line after the token with index i=$i_lowest
16808 #-------------------------------------------------------
16810 # final index calculation
16811 $i_next_nonblank = $inext_to_go[$i_lowest];
16812 $next_nonblank_type = $types_to_go[$i_next_nonblank];
16813 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16815 FORMATTER_DEBUG_FLAG_BREAK
16817 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
16819 #-------------------------------------------------------
16820 # ?/: rule 2 : if we break at a '?', then break at its ':'
16822 # Note: this rule is also in sub scan_list to handle a break
16823 # at the start and end of a line (in case breaks are dictated
16824 # by side comments).
16825 #-------------------------------------------------------
16826 if ( $next_nonblank_type eq '?' ) {
16827 set_closing_breakpoint($i_next_nonblank);
16829 elsif ( $types_to_go[$i_lowest] eq '?' ) {
16830 set_closing_breakpoint($i_lowest);
16833 #-------------------------------------------------------
16834 # ?/: rule 3 : if we break at a ':' then we save
16835 # its location for further work below. We may need to go
16836 # back and break at its '?'.
16837 #-------------------------------------------------------
16838 if ( $next_nonblank_type eq ':' ) {
16839 push @i_colon_breaks, $i_next_nonblank;
16841 elsif ( $types_to_go[$i_lowest] eq ':' ) {
16842 push @i_colon_breaks, $i_lowest;
16845 # here we should set breaks for all '?'/':' pairs which are
16846 # separated by this line
16850 # save this line segment, after trimming blanks at the ends
16852 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
16854 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
16856 # set a forced breakpoint at a container opening, if necessary, to
16857 # signal a break at a closing container. Excepting '(' for now.
16858 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
16859 && !$forced_breakpoint_to_go[$i_lowest] )
16861 set_closing_breakpoint($i_lowest);
16864 # get ready to go again
16865 $i_begin = $i_lowest + 1;
16866 $last_break_strength = $lowest_strength;
16867 $i_last_break = $i_lowest;
16868 $leading_alignment_token = "";
16869 $leading_alignment_type = "";
16870 $lowest_next_token = '';
16871 $lowest_next_type = 'b';
16873 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
16877 # update indentation size
16878 if ( $i_begin <= $imax ) {
16879 $leading_spaces = leading_spaces_to_go($i_begin);
16883 #-------------------------------------------------------
16884 # END of main loop to set continuation breakpoints
16885 # Now go back and make any necessary corrections
16886 #-------------------------------------------------------
16888 #-------------------------------------------------------
16889 # ?/: rule 4 -- if we broke at a ':', then break at
16890 # corresponding '?' unless this is a chain of ?: expressions
16891 #-------------------------------------------------------
16892 if (@i_colon_breaks) {
16894 # using a simple method for deciding if we are in a ?/: chain --
16895 # this is a chain if it has multiple ?/: pairs all in order;
16897 # Note that if line starts in a ':' we count that above as a break
16898 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
16900 unless ($is_chain) {
16901 my @insert_list = ();
16902 foreach (@i_colon_breaks) {
16903 my $i_question = $mate_index_to_go[$_];
16904 if ( $i_question >= 0 ) {
16905 if ( $want_break_before{'?'} ) {
16906 $i_question = $iprev_to_go[$i_question];
16909 if ( $i_question >= 0 ) {
16910 push @insert_list, $i_question;
16913 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
16917 return ( \@i_first, \@i_last, $colon_count );
16920 sub insert_additional_breaks {
16922 # this routine will add line breaks at requested locations after
16923 # sub set_continuation_breaks has made preliminary breaks.
16925 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
16928 my $line_number = 0;
16929 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
16931 $i_f = $ri_first->[$line_number];
16932 $i_l = $ri_last->[$line_number];
16933 while ( $i_break_left >= $i_l ) {
16936 # shouldn't happen unless caller passes bad indexes
16937 if ( $line_number >= @{$ri_last} ) {
16939 "Non-fatal program bug: couldn't set break at $i_break_left\n"
16941 report_definite_bug();
16944 $i_f = $ri_first->[$line_number];
16945 $i_l = $ri_last->[$line_number];
16948 # Do not leave a blank at the end of a line; back up if necessary
16949 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
16951 my $i_break_right = $inext_to_go[$i_break_left];
16952 if ( $i_break_left >= $i_f
16953 && $i_break_left < $i_l
16954 && $i_break_right > $i_f
16955 && $i_break_right <= $i_l )
16957 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
16958 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
16964 sub set_closing_breakpoint {
16966 # set a breakpoint at a matching closing token
16967 # at present, this is only used to break at a ':' which matches a '?'
16968 my $i_break = shift;
16970 if ( $mate_index_to_go[$i_break] >= 0 ) {
16972 # CAUTION: infinite recursion possible here:
16973 # set_closing_breakpoint calls set_forced_breakpoint, and
16974 # set_forced_breakpoint call set_closing_breakpoint
16975 # ( test files attrib.t, BasicLyx.pm.html).
16976 # Don't reduce the '2' in the statement below
16977 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
16979 # break before } ] and ), but sub set_forced_breakpoint will decide
16980 # to break before or after a ? and :
16981 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
16982 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
16986 my $type_sequence = $type_sequence_to_go[$i_break];
16987 if ($type_sequence) {
16988 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
16989 $postponed_breakpoint{$type_sequence} = 1;
16995 sub compare_indentation_levels {
16997 # check to see if output line tabbing agrees with input line
16998 # this can be very useful for debugging a script which has an extra
17000 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
17001 if ( $guessed_indentation_level ne $structural_indentation_level ) {
17002 $last_tabbing_disagreement = $input_line_number;
17004 if ($in_tabbing_disagreement) {
17007 $tabbing_disagreement_count++;
17009 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17010 write_logfile_entry(
17011 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
17014 $in_tabbing_disagreement = $input_line_number;
17015 $first_tabbing_disagreement = $in_tabbing_disagreement
17016 unless ($first_tabbing_disagreement);
17021 if ($in_tabbing_disagreement) {
17023 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17024 write_logfile_entry(
17025 "End indentation disagreement from input line $in_tabbing_disagreement\n"
17028 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
17029 write_logfile_entry(
17030 "No further tabbing disagreements will be noted\n");
17033 $in_tabbing_disagreement = 0;